Quite recently, I was tasked with writing a service that would deal with just one, but very capacious task - collecting a large amount of data from the database, aggregating and filling it all in Excel according to a specific template. In the process of finding the best solution, several approaches were tried, and problems related to memory and performance were solved. In this article, I want to share with you the main points and stages of the implementation of this task.
1. Statement of the problem
Due to the fact that I cannot divulge the details of the technical specification, entities, data collection algorithms, etc. I had to come up with something similar:
So let's imagine that we have an online chat with high activity, and the customer wants to download all messages enriched with user data for a certain date in Excel. More than 1 million messages can be accumulated per day.
We have 3 tables:
User. Stores the username and some rating (no matter where it comes from and how it is calculated)
Message. Stores message data - Username, DateTime, Message text.
Task. The task of generating a report, which is created by the customer. Stores the ID, the Status of the task (completed or not), and two parameters: Start date of the message, End date of the message.
The columns will be as follows:
Excel 4 1) message_date. 2) name. 3) rating. 4) text. 1 . excel, .
2. ,
, , β Spring + Hibernate . Oracle, .
spring-boot-starter-data-jpa, Spring Data, Hibernate JPA, .
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
<version>2.4.5</version>
</dependency>
spring-boot-starter-test
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc10</artifactId>
<version>19.10.0.0</version>
</dependency>
. , TASK, βCREATEDβ , , . , , . Spring Data . :
package com.report.generator.demo.config;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
import org.springframework.scheduling.TaskScheduler;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
@Configuration
@EnableScheduling
@EnableAsync
@EnableJpaRepositories(basePackages = "com.report.generator.demo.repository")
@PropertySource({"classpath:application.properties"})
@ConditionalOnProperty(
value = "app.scheduling.enable", havingValue = "true", matchIfMissing = true
)
public class DemoConfig {
private static final int CORE_POOL_SIZE = 2;
@Bean(name = "taskScheduler")
public TaskScheduler getTaskScheduler() {
ThreadPoolTaskScheduler scheduler = new ThreadPoolTaskScheduler();
scheduler.setPoolSize(CORE_POOL_SIZE);
scheduler.initialize();
return scheduler;
}
}
@Scheduled , Task , , .
@Async("taskScheduler")
@Scheduled(fixedDelay = 60000)
public void scheduledTask() {
log.info("scheduledTask is started");
Task task = getTask();
if (Objects.isNull(task)) {
log.info("task not found");
return;
}
log.info("task found");
generate(task);
}
, GitHub.
3.
.. Hibernate . entity MessageData (id, name, rating, messageDate, test). β List<Message> :
List<Message> findAllByMessageDateBetween(Instant dateFrom, Instant dateTo);
MessageData . , List . , . 30 OutOfMemoryError .
List, β id , , , . . , . , , ScrollableResults. . , , , . .
β . .. , , , 200+ . java , . view. , , hibernate entity.
, 1 10 . . , , . β 1 , 10 , β 10 , 1 , . , view , . , , , . , , , 1 . .
, β . , entity. , , sys_refcursor, 300 , :
create function message_ref(
date_from timestamp,
date_to timestamp
) return sys_refcursor as
ret_cursor sys_refcursor;
begin
open ret_cursor for
select m.id,
u.name,
u.rating,
m.message_date,
m.text
from message m
left join users u on m.user_id = u.id
where m.message_date between date_from and date_to;
return ret_cursor;
end message_ref;
? @NamedNativeQuery. : "{ ? = call message_ref(?, ?) }", callable = true , , cacheMode = CacheModeType.IGNORE , . . , , resultClass = MessageData.class entity. MessageData :
package com.report.generator.demo.repository.entity;
import lombok.Data;
import org.hibernate.annotations.CacheModeType;
import org.hibernate.annotations.NamedNativeQuery;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import java.io.Serializable;
import java.time.Instant;
import static com.report.generator.demo.repository.entity.MessageData.MESSAGE_REF_QUERY_NAME;
@Data
@Entity
@NamedNativeQuery(
name = MESSAGE_REF_QUERY_NAME,
query = "{ ? = call message_ref(?, ?) }",
callable = true,
cacheMode = CacheModeType.IGNORE,
resultClass = MessageData.class
)
public class MessageData implements Serializable {
public static final String MESSAGE_REF_QUERY_NAME = "MessageData.callMessageRef";
private static final long serialVersionUID = -6780765638993961105L;
@Id
private long id;
@Column
private String name;
@Column
private int rating;
@Column(name = "MESSAGE_DATE")
private Instant messageDate;
@Column
private String text;
}
StatelessSession. : namedQuery hibernate CacheMode UnsupportedOperationException. :
query.setHint(JPA_SHARED_CACHE_STORE_MODE, null);
query.setHint(JPA_SHARED_CACHE_RETRIEVE_MODE, null);
:
@Transactional
void generate(Task task) {
log.info("generating report is started");
try (
StatelessSession statelessSession = sessionFactory.openStatelessSession()
) {
ReportExcelStreamWriter writer = new ReportExcelStreamWriter();
Query<MessageData> query = statelessSession.createNamedQuery(MESSAGE_REF_QUERY_NAME, MessageData.class);
query.setParameter(1, task.getDateFrom());
query.setParameter(2, task.getDateTo());
query.setHint(JPA_SHARED_CACHE_STORE_MODE, null);
query.setHint(JPA_SHARED_CACHE_RETRIEVE_MODE, null);
ScrollableResults results = query.scroll(ScrollMode.FORWARD_ONLY);
int index = 0;
while (results.next()) {
index++;
writer.createRow(index, (MessageData) results.get(0));
if (index % 100000 == 0) {
log.info("progress {} rows", index);
}
}
writer.writeWorkbook();
task.setStatus(DONE.toString());
log.info("task {} complete", task);
} catch (Exception e) {
task.setStatus(FAIL.toString());
e.printStackTrace();
log.error("an error occurred with message {}. While executing the task {}", e.getMessage(), task);
} finally {
taskRepository.save(task);
}
}
4. Excel
β excel , . β org.apache.poi. :
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.0.0</version>
</dependency>
XSSFWorkbook XSSFSheet, row . , :
package com.report.generator.demo.service;
import com.report.generator.demo.repository.entity.MessageData;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.FileOutputStream;
import java.io.IOException;
import java.time.Instant;
public class ReportExcelWriter {
private final XSSFWorkbook wb;
private final XSSFSheet sheet;
public ReportExcelWriter() {
this.wb = new XSSFWorkbook();
this.sheet = wb.createSheet();
createTitle();
}
public void createRow(int index, MessageData data) {
XSSFRow row = sheet.createRow(index);
setCellValue(row.createCell(0), data.getMessageDate());
setCellValue(row.createCell(1), data.getName());
setCellValue(row.createCell(2), data.getRating());
setCellValue(row.createCell(3), data.getText());
}
public void writeWorkbook() throws IOException {
FileOutputStream fileOut = new FileOutputStream(Instant.now().getEpochSecond() + ".xlsx");
wb.write(fileOut);
fileOut.close();
}
private void createTitle() {
XSSFRow rowTitle = sheet.createRow(0);
setCellValue(rowTitle.createCell(0), "Date");
setCellValue(rowTitle.createCell(1), "Name");
setCellValue(rowTitle.createCell(2), "Rating");
setCellValue(rowTitle.createCell(3), "Text");
}
private void setCellValue(XSSFCell cell, String value) {
cell.setCellValue(value);
}
private void setCellValue(XSSFCell cell, long value) {
cell.setCellValue(value);
}
private void setCellValue(XSSFCell cell, Instant value) {
cell.setCellValue(value.toString());
}
}
. 3 1 excel. OutOfMemoryError. :
2Gb, OutOfMemoryError 30% .
excel , List, , stream, - google . I/O Stream excel, , . org.apache.poi , streaming. excel. streaming :
package com.report.generator.demo.service;
import com.report.generator.demo.repository.entity.MessageData;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import java.io.FileOutputStream;
import java.io.IOException;
import java.time.Instant;
public class ReportExcelStreamWriter {
private final SXSSFWorkbook wb;
private final SXSSFSheet sheet;
public ReportExcelStreamWriter() {
this.wb = new SXSSFWorkbook();
this.sheet = wb.createSheet();
createTitle();
}
public void createRow(int index, MessageData data) {
SXSSFRow row = sheet.createRow(index);
setCellValue(row.createCell(0), data.getMessageDate());
setCellValue(row.createCell(1), data.getName());
setCellValue(row.createCell(2), data.getRating());
setCellValue(row.createCell(3), data.getText());
}
public void writeWorkbook() throws IOException {
FileOutputStream fileOut = new FileOutputStream(Instant.now().getEpochSecond() + ".xlsx");
wb.write(fileOut);
fileOut.close();
}
private void createTitle() {
SXSSFRow rowTitle = sheet.createRow(0);
setCellValue(rowTitle.createCell(0), "Date");
setCellValue(rowTitle.createCell(1), "Name");
setCellValue(rowTitle.createCell(2), "Rating");
setCellValue(rowTitle.createCell(3), "Text");
}
private void setCellValue(SXSSFCell cell, String value) {
cell.setCellValue(value);
}
private void setCellValue(SXSSFCell cell, long value) {
cell.setCellValue(value);
}
private void setCellValue(SXSSFCell cell, Instant value) {
cell.setCellValue(value.toString());
}
}
:
, , OutOfMemoryError.
5.
, StatelessSession, ScrollableResults org.apache.poi streaming. , jdbc, , , . . 1 . , . . . DemoApplicationTests. GitHub.