Skip to content

Commit

Permalink
Use LocaleUtil for getting and setting locale
Browse files Browse the repository at this point in the history
As documents should be generated with a single
language, jocument now relies on the locale
defined in `LocaleUtils` for localization of
the full document.
  • Loading branch information
AntonOellerer committed Jun 25, 2024
1 parent 726a752 commit efcb5fd
Show file tree
Hide file tree
Showing 9 changed files with 5 additions and 205 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plugins {
}

group 'com.docutools'
version = '3.1.1'
version = '4.0.0'

java {
toolchain {
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/com/docutools/jocument/impl/DocumentImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.util.LocaleUtil;

public abstract class DocumentImpl extends Thread implements Document {
public static final String GERMAN_SPECIAL_CHARACTERS = "ÄäÖöÜüß";
Expand Down Expand Up @@ -40,6 +41,7 @@ protected DocumentImpl(Template template, PlaceholderResolver resolver, Generati
@Override
public void run() {
try {
LocaleUtil.setUserLocale(template.getLocale()); // LU is thread-local
logger.info("Starting generating document from path {} with template {} and resolver {}", path, template, resolver);
this.path = generate();
logger.info("Finished generating document from path {} with template {} and resolver {}", path, template, resolver);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,13 @@
import com.docutools.jocument.Template;
import com.docutools.jocument.impl.DocumentImpl;
import com.docutools.jocument.impl.excel.interfaces.ExcelWriter;
import com.docutools.jocument.impl.excel.util.ExcelUtils;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.Locale;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;


Expand Down Expand Up @@ -51,10 +48,6 @@ protected Path generate() throws IOException {
Path file = Files.createTempFile("jocument-", ".xlsx");
ExcelWriter excelWriter = new SXSSFWriter(file);
try (XSSFWorkbook workbook = new XSSFWorkbook(template.openStream())) {
var locale = ExcelUtils.getWorkbookLanguage(workbook).orElse(Locale.getDefault());
LocaleUtil.setUserLocale(locale);
logger.info("Set user locale to {}", locale);

for (Iterator<Sheet> it = workbook.sheetIterator(); it.hasNext(); ) {
Sheet sheet = it.next();
logger.info("Starting generation of sheet {}", sheet.getSheetName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

Expand All @@ -29,10 +27,6 @@ protected Path generate() throws IOException {
logger.info("Starting generation");
Path file = Files.createTempFile("jocument-", ".docx");
try (XWPFDocument document = new XWPFDocument(template.openStream())) {
var locale = WordUtilities.getDocumentLanguage(document).orElse(Locale.getDefault());
LocaleUtil.setUserLocale(locale);
logger.info("Set user locale to {}", locale);

List<IBodyElement> bodyElements = new ArrayList<>(document.getBodyElements().size() + document.getHeaderList().size());
bodyElements.addAll(document.getBodyElements());
bodyElements.addAll(document.getHeaderList().stream().flatMap(xwpfHeader -> xwpfHeader.getBodyElements().stream()).toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,9 @@ private void generate() {

private void transform(IBodyElement element, List<IBodyElement> remaining) {
logger.debug("Trying to transform element {}", element);
Locale locale = WordUtilities.detectMostCommonLocale(element.getBody().getXWPFDocument())
.orElse(LocaleUtil.getUserLocale());
if (isCustomPlaceholder(element)) {
resolver.resolve(WordUtilities.extractPlaceholderName((XWPFParagraph) element))
.ifPresent(placeholderData -> placeholderData.transform(element, locale, options));
.ifPresent(placeholderData -> placeholderData.transform(element, LocaleUtil.getUserLocale(), options));
} else if (isLoopStart(element, remaining)) {
unrollLoop((XWPFParagraph) element, remaining);
} else if (element instanceof XWPFParagraph xwpfParagraph) {
Expand All @@ -80,11 +78,9 @@ private void transform(XWPFTable table) {
}

private void transform(XWPFParagraph paragraph) {
Locale locale = WordUtilities.detectMostCommonLocale(paragraph)
.orElse(LocaleUtil.getUserLocale());
Matcher matcher = TAG_PATTERN.matcher(WordUtilities.toString(paragraph));
if (matcher.find()) {
WordUtilities.replaceText(paragraph, matcher.replaceAll(matchResult -> fillPlaceholder(matchResult, locale)));
WordUtilities.replaceText(paragraph, matcher.replaceAll(matchResult -> fillPlaceholder(matchResult, LocaleUtil.getUserLocale())));
}
logger.debug("Transformed paragraph {}", paragraph);
}
Expand Down
116 changes: 0 additions & 116 deletions src/main/java/com/docutools/jocument/impl/word/WordUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,15 @@

import com.docutools.jocument.impl.ParsingUtils;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.xwpf.usermodel.IBody;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFFooter;
import org.apache.poi.xwpf.usermodel.XWPFHeader;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
Expand Down Expand Up @@ -294,109 +286,6 @@ public static Optional<XmlCursor> openCursor(IBodyElement element) {
}
}

/**
* Returns the most common locale used in {@link org.apache.poi.xwpf.usermodel.XWPFRun}s for out the given
* {@link org.apache.poi.xwpf.usermodel.XWPFDocument}.
*
* @param document the document to parse
* @return distinct languages as {@link java.util.Locale} instances
*/
public static Optional<Locale> detectMostCommonLocale(XWPFDocument document) {
var tableParagraphs = document.getTables()
.stream()
.flatMap(table -> getTableEmbeddedParagraphs(table).stream());

var documentParagraphs = document.getParagraphs().stream();

return Stream.concat(tableParagraphs, documentParagraphs)
.flatMap(paragraph -> paragraph.getRuns().stream())
.map(XWPFRun::getLang)
.filter(Objects::nonNull)
.map(Locale::forLanguageTag)
.filter(WordUtilities::isValid)
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))
.entrySet()
.stream()
.max(Map.Entry.comparingByValue())
.map(Map.Entry::getKey);
}

/**
* Detect the most common locale of a paragraph by going through every run, retrieving its language
* tag, merging them, and retrieving the most common one.
*
* @param paragraph The paragraph for which the most common locale should be found
* @return If at least one locale has been found, the most common one is returned
*/
public static Optional<Locale> detectMostCommonLocale(XWPFParagraph paragraph) {
return paragraph
.getRuns()
.stream()
.map(XWPFRun::getLang)
.filter(Objects::nonNull)
.map(Locale::forLanguageTag)
.filter(WordUtilities::isValid)
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))
.entrySet()
.stream()
.max(Map.Entry.comparingByValue())
.map(Map.Entry::getKey);
}

/**
* Returns all languages used in {@link org.apache.poi.xwpf.usermodel.XWPFRun}s for out the given
* {@link org.apache.poi.xwpf.usermodel.XWPFDocument}.
*
* @param document the document to parse
* @return distinct languages as {@link java.util.Locale} instances
*/
public static Collection<Locale> detectLanguages(XWPFDocument document) {
var tableParagraphs = document.getTables()
.stream()
.flatMap(table -> getTableEmbeddedParagraphs(table).stream());

var documentParagraphs = document.getParagraphs().stream();

return Stream.concat(tableParagraphs, documentParagraphs)
.flatMap(paragraph -> paragraph.getRuns().stream())
.map(XWPFRun::getLang)
.filter(Objects::nonNull)
.distinct()
.map(Locale::forLanguageTag)
.filter(WordUtilities::isValid)
.toList();
}

/**
* Get all the paragraphs which are embedded in a table.
* This is done recursively, so paragraphs in tables in a table cell will also be found.
*
* @param table The table to check for embedded paragraphs
* @return A list of all the paragraphs in the table
*/
public static Collection<XWPFParagraph> getTableEmbeddedParagraphs(XWPFTable table) {
var paragraphs = new LinkedList<XWPFParagraph>();
for (XWPFTableRow row : table.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
paragraphs.addAll(cell.getParagraphs());
for (XWPFTable subTable : cell.getTables()) {
paragraphs.addAll(getTableEmbeddedParagraphs(subTable));
}
}
}
return paragraphs;
}

private static boolean isValid(Locale locale) {
//Taken from https://stackoverflow.com/a/3684832
try {
return locale.getISO3Language() != null && locale.getISO3Country() != null;
} catch (MissingResourceException e) {
logger.warn("Encountered missing resource exception when trying to verify locale %s".formatted(locale), e);
return false;
}
}

private static XWPFTable copyTableTo(XWPFTable sourceTable, XmlCursor cursor) {
logger.debug("Copying table {} before {}", sourceTable, cursor);
var document = sourceTable.getBody().getXWPFDocument();
Expand Down Expand Up @@ -490,11 +379,6 @@ private static void cloneRun(XWPFRun original, XWPFRun clone) {
clone.setText(text != null ? text : "");
}

public static Optional<Locale> getDocumentLanguage(XWPFDocument document) {
var documentLanguage = document.getProperties().getCoreProperties().getUnderlyingProperties().getLanguageProperty();
return documentLanguage.map(Locale::forLanguageTag).or(() -> detectMostCommonLocale(document));
}

public static String extractPlaceholderName(XWPFParagraph paragraph) {
return ParsingUtils.stripBrackets(WordUtilities.toString(paragraph));
}
Expand Down

This file was deleted.

Binary file not shown.
Binary file not shown.

0 comments on commit efcb5fd

Please sign in to comment.