Skip to content

Commit

Permalink
Migrate Excel creation to XSSF backend (#254)
Browse files Browse the repository at this point in the history
To be able to operate with Excel templates
containing diagrams, the backend for Excel
generation is migrated to the XSSF one.
This required major refactorings in
`ExcelGenerator` and `ExcelWriter`.
The creation workflow is now similar to the
Word generation, as the template is modified,
loops inserted after the placeholder block, and
removing the block after finishing its
generation.
  • Loading branch information
AntonOellerer authored Aug 26, 2024
1 parent 2b7d321 commit 385d516
Show file tree
Hide file tree
Showing 23 changed files with 407 additions and 267 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plugins {
}

group 'com.docutools'
version = '4.1.3'
version = '4.2.0'

java {
toolchain {
Expand Down
2 changes: 1 addition & 1 deletion config/checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
<property name="tokens"
value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, ANNOTATION_DEF, ANNOTATION_FIELD_DEF,
PARAMETER_DEF, VARIABLE_DEF, METHOD_DEF"/>
<property name="allowedAbbreviations" value="URL,SXSSF"/>
<property name="allowedAbbreviations" value="URL,XSSF"/>
</module>
<module name="OverloadMethodsDeclarationOrder"/>
<module name="VariableDeclarationUsageDistance"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
import com.docutools.jocument.impl.DocumentImpl;
import com.docutools.jocument.impl.excel.interfaces.ExcelWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.stream.StreamSupport;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFFormulaEvaluator;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;


Expand All @@ -36,7 +39,7 @@ public ExcelDocumentImpl(Template template, PlaceholderResolver resolver, Genera
}

/**
* Start generation of a excel report from the template supplied in the constructor, using the also supplied
* Start generation of an Excel report from the template supplied in the constructor, using the also supplied
* resolver for resolving placeholders.
*
* @return The path to the generated report
Expand All @@ -46,18 +49,40 @@ public ExcelDocumentImpl(Template template, PlaceholderResolver resolver, Genera
protected Path generate() throws IOException {
logger.info("Starting generation");
Path file = Files.createTempFile("jocument-", ".xlsx");
ExcelWriter excelWriter = new SXSSFWriter(file);
try (XSSFWorkbook workbook = new XSSFWorkbook(template.openStream())) {
ExcelWriter excelWriter = new XSSFWriter(workbook);
for (Iterator<Sheet> it = workbook.sheetIterator(); it.hasNext(); ) {
Sheet sheet = it.next();
logger.info("Starting generation of sheet {}", sheet.getSheetName());
sanitizeSheet(sheet);
excelWriter.newSheet(sheet);
ExcelGenerator.apply(resolver, sheet.rowIterator(), excelWriter, options);
logger.info("Starting generation of sheet {}", sheet.getSheetName());
ExcelGenerator.apply(resolver, StreamSupport.stream(sheet.spliterator(), false).toList(), excelWriter, options);
}
XSSFFormulaEvaluator.evaluateAllFormulaCells(workbook);
try (OutputStream os = Files.newOutputStream(file)) {
logger.info("Writing document to {}", os);
workbook.write(os);
}
} finally {
excelWriter.recalculateFormulas();
excelWriter.complete();
}
return file;
}

/**
* Add empty rows to sheet.
* To save storage space, Excel files are usually stored in a sparse format, meaning that empty rows are not represented as java objects.
* To be able to work with loops which contain empty rows properly, we fill those empty rows up.
*
* @param sheet The sheet to insert the empty rows into
*/
private void sanitizeSheet(Sheet sheet) {
// creates rows where there are none
int lastRowNum = sheet.getLastRowNum();

for (int i = 0; i <= lastRowNum; i++) {
var row = sheet.getRow(i);
if (row == null) {
sheet.createRow(i);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.util.LocaleUtil;
import org.apache.xmlbeans.impl.values.XmlValueDisconnectedException;


/**
Expand All @@ -37,14 +38,13 @@ public class ExcelGenerator {

private final ExcelWriter excelWriter;
private final PlaceholderResolver resolver;
private final Iterator<Row> rowIterator;
private final List<Row> rows;
private final int nestedLoopDepth;
private final GenerationOptions options;
private int alreadyProcessedLoopsSize = 0;

private ExcelGenerator(Iterator<Row> rowIterator, ExcelWriter excelWriter, PlaceholderResolver resolver, int nestedLoopDepth,
private ExcelGenerator(List<Row> rows, ExcelWriter excelWriter, PlaceholderResolver resolver, int nestedLoopDepth,
GenerationOptions options) {
this.rowIterator = rowIterator;
this.rows = rows;
this.excelWriter = excelWriter;
this.resolver = resolver;
this.nestedLoopDepth = nestedLoopDepth;
Expand All @@ -55,46 +55,57 @@ private ExcelGenerator(Iterator<Row> rowIterator, ExcelWriter excelWriter, Place
* This function starts the generating process for the supplied row iterator.
*
* @param resolver The resolver to use for looking up placeholders
* @param rowIterator An iterator over the template row which should be processed
* @param rows The rows which should be processed
* @param excelWriter The writer to write the report out to.
* @param options {@link GenerationOptions}
*/
static void apply(PlaceholderResolver resolver, Iterator<Row> rowIterator, ExcelWriter excelWriter, GenerationOptions options) {
apply(resolver, rowIterator, excelWriter, 0, options);
static void apply(PlaceholderResolver resolver, List<Row> rows, ExcelWriter excelWriter, GenerationOptions options) {
apply(resolver, rows, excelWriter, 0, options);
}

private static void apply(PlaceholderResolver resolver, Iterator<Row> rowIterator, ExcelWriter excelWriter, int nestedLoopDepth,
private static void apply(PlaceholderResolver resolver, List<Row> rows, ExcelWriter excelWriter, int nestedLoopDepth,
GenerationOptions options) {
new ExcelGenerator(rowIterator, excelWriter, resolver, nestedLoopDepth, options).generate();
new ExcelGenerator(rows, excelWriter, resolver, nestedLoopDepth, options).generate();
}

private void generate() {
logger.debug("Starting generation by applying resolver {}", resolver);
for (Iterator<Row> iterator = rowIterator; iterator.hasNext(); ) {
Row row = iterator.next();
if (isLoopStart(row)) {
handleLoop(row, iterator);
} else {
handleRow(row);
List<Row> toProcess = new LinkedList<>(rows);
while (!toProcess.isEmpty()) {
Row row = toProcess.get(0);
toProcess = toProcess.subList(1, toProcess.size());
try {
if (isLoopStart(row)) {
toProcess = handleLoop(row, toProcess);
} else {
handleRow(row);
}
} catch (XmlValueDisconnectedException e) {
logger.warn(e);
}
}
if (nestedLoopDepth != 0) { //here for clarity, could be removed since generation finishes if nestedLoopDepth == 0
logger.debug("Adding offset of {}", alreadyProcessedLoopsSize);
excelWriter.addRowOffset(alreadyProcessedLoopsSize); //we are in nested loop, readd the offset to prevent subtracting it multiple times
}
logger.debug("Finished generation of elements by resolver {}", resolver);
}

private void handleRow(Row row) {
excelWriter.newRow(row);
if (notInNestedLoop()) {
// We can operate on original row
excelWriter.setRow(row);
} else {
// We need to insert a new row
excelWriter.shiftRows(row.getRowNum(), 1); // shift rows below insertion point one down, so we do not overwrite an existing one
excelWriter.newRow(row);
}
excelWriter.addRowToIgnore(row.getRowNum());
excelWriter.updateRowsWritten(1);
ModificationInformation modificationInformation = new ModificationInformation(Optional.empty(), 0);
for (Cell cell : row) {
Optional<Integer> skipUntil = modificationInformation.skipUntil();
if (skipUntil.isEmpty() || cell.getColumnIndex() > skipUntil.get()) {
if (ExcelUtils.containsPlaceholder(cell)) {
var newModificationInformation = replacePlaceholder(cell, modificationInformation.offset());
modificationInformation = modificationInformation.merge(newModificationInformation);
} else if (ExcelUtils.isSimpleCell(cell)) {
} else if (nestedLoopDepth != 0) {
excelWriter.addCell(cell);
}
}
Expand Down Expand Up @@ -122,38 +133,57 @@ private ModificationInformation replacePlaceholder(Cell cell, int offset) {
return ModificationInformation.empty();
}

private void handleLoop(Row row, Iterator<Row> iterator) {
private List<Row> handleLoop(Row row, List<Row> rows) {
logger.debug("Handling loop at row {}", row.getRowNum());
var loopBody = getLoopBody(row, iterator);
var loopBody = getLoopBody(row, rows);
var loopBodySize = getLoopBodySize(loopBody);
logger.debug("Loop body size: {}", loopBodySize);
var finalLoopBody = loopBody.subList(1, loopBody.size() - 1);
var placeholderData = getPlaceholderData(row);
placeholderData.stream()
.forEach(placeholderResolver -> {
excelWriter.addRowOffset(-1); //So we also fill the cell of the loop start placeholder
ExcelGenerator.apply(placeholderResolver, finalLoopBody.iterator(), excelWriter, nestedLoopDepth + 1, options);
excelWriter.addRowOffset(1); //To avoid subtracting the placeholder size multiple times
excelWriter.addRowOffset(loopBodySize);
});
var loopPlaceholderSize = getLoopSize(loopBody);
excelWriter.addRowOffset(-1 * loopPlaceholderSize);
logger.debug("Subtracting row offset of {}", loopPlaceholderSize);
alreadyProcessedLoopsSize += loopPlaceholderSize;
int loopSize = getLoopSize(loopBody);
excelWriter.addRowToIgnore(row.getRowNum()); // ignore opening tag
excelWriter.addRowToIgnore(loopBody.get(loopBody.size() - 1).getRowNum()); // ignore closing tag
if (notInNestedLoop()) {
// Insert all data after the template rows
excelWriter.setSectionOffset(loopSize);
}
var loopBodyWithoutTags = loopBody.subList(1, loopBody.size() - 1); // remove loop opening and closing tag
PlaceholderData placeholderData = getPlaceholderData(row);
placeholderData.stream().forEach(placeholderResolver ->
ExcelGenerator.apply(placeholderResolver, loopBodyWithoutTags, excelWriter, nestedLoopDepth + 1, options));
if (notInNestedLoop()) {
// Processing of the outermost loop has finished, we can delete the template
int rowNum = row.getRowNum();
excelWriter.finishLoopProcessing(rowNum, loopSize);
rows = rows.stream().filter(row1 -> {
try {
row1.toString();
return true;
} catch (XmlValueDisconnectedException ignored) {
return false;
}
}).toList();
} else {
// We finished a nested loop, remove the template from the working set to continue processing of the current iteration
rows = rows.subList(loopBodyWithoutTags.size() + 1, rows.size());
}
return rows;
}

private boolean notInNestedLoop() {
return nestedLoopDepth == 0;
}

private List<Row> getLoopBody(Row row, Iterator<Row> iterator) {
private List<Row> getLoopBody(Row row, List<Row> rows) {
var placeholder = ExcelUtils.getPlaceholder(row);
logger.debug("Unrolling loop of {}", placeholder);
logger.debug("Getting loop body of {}", placeholder);
LinkedList<Row> rowBuffer = new LinkedList<>();
rowBuffer.add(row);
var rowInFocus = iterator.next();
Iterator<Row> rowIterator = rows.iterator();
var rowInFocus = rowIterator.next();
while (!ExcelUtils.isMatchingLoopEnd(rowInFocus, placeholder)) {
rowBuffer.addLast(rowInFocus);
rowInFocus = iterator.next();
rowInFocus = rowIterator.next();
}
rowBuffer.addLast(rowInFocus);
logger.debug("Unrolled loop of {}", placeholder);
return rowBuffer;
}

Expand Down
Loading

0 comments on commit 385d516

Please sign in to comment.