Skip to content

Commit

Permalink
Data table improvements: isolation of RTL strings, merge of tables wi…
Browse files Browse the repository at this point in the history
…th similar headers (with blanks)
  • Loading branch information
ediweissmann committed May 12, 2020
1 parent dd0ac8b commit ce93526
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.sejda.core.support.util;

import static java.lang.Character.*;

public final class StringUtils {
private StringUtils() {
// hide
Expand All @@ -39,4 +41,37 @@ public static String asUnicodes(String in) {
public static String normalizeLineEndings(String in) {
return in.replaceAll("\\r\\n", "\n");
}

public static String isolateRTLIfRequired(String s) {
if(isRtl(s)) {
return '\u2068' + s + '\u2069';
} else {
return s;
}
}

public static boolean isRtl(String string) {
if (string == null) {
return false;
}

for (int i = 0, n = string.length(); i < n; ++i) {
byte d = Character.getDirectionality(string.charAt(i));

switch (d) {
case DIRECTIONALITY_RIGHT_TO_LEFT:
case DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
case DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
case DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
return true;

case DIRECTIONALITY_LEFT_TO_RIGHT:
case DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
case DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
return false;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@
package org.sejda.impl.sambox.component.excel;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.TreeSet;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.lang.Character.*;
import static org.apache.commons.lang3.StringUtils.rightPad;
import static org.sejda.core.support.util.StringUtils.isolateRTLIfRequired;

public class DataTable {

Expand All @@ -46,8 +46,14 @@ public DataTable(Collection<Integer> pageNumbers) {
this.pageNumbers.addAll(pageNumbers);
}

public void addRow(String... dataRow) {
addRow(Arrays.asList(dataRow));
public DataTable addRow(String... dataRow) {
List<String> row = new ArrayList<>();
for (String item: dataRow) {
row.add(item);
}
addRow(row);

return this;
}

public void addRow(List<String> dataRow) {
Expand All @@ -61,16 +67,24 @@ public void addRows(List<List<String>> dataRows) {
public List<String> headerRow() {
return data.get(0);
}

public List<String> headerRowIgnoreBlanks() {
return data.get(0).stream().filter(s -> !s.trim().isEmpty()).collect(Collectors.toList());
}

public boolean hasSameHeaderAs(DataTable other) {
String thisHeader = String.join("", this.headerRow()).trim();
String otherHeader = String.join("", other.headerRow()).trim();
String thisHeader = String.join("", this.headerRowIgnoreBlanks()).trim();
String otherHeader = String.join("", other.headerRowIgnoreBlanks()).trim();
LOG.debug("Comparing header columns: '{}' and '{}'", thisHeader, otherHeader);

return thisHeader.equalsIgnoreCase(otherHeader);
}

public boolean hasSameHeaderBlanksIgnoredAs(DataTable other) {
return this.headerRowIgnoreBlanks().equals(other.headerRowIgnoreBlanks());
}

public boolean hasSameColumnsAs(DataTable other) {
public boolean hasSameColumnCountAs(DataTable other) {
LOG.debug("Comparing header columns size: {} and {}", this.headerRow().size(), other.headerRow().size());
return other.headerRow().size() == this.headerRow().size();
}
Expand All @@ -82,7 +96,7 @@ public List<List<String>> getData() {
public TreeSet<Integer> getPageNumbers() {
return pageNumbers;
}

public DataTable mergeWith(DataTable other) {
TreeSet<Integer> resultPageNumbers = new TreeSet<>();
resultPageNumbers.addAll(this.pageNumbers);
Expand Down Expand Up @@ -190,6 +204,12 @@ public DataTable mergeColumns(int c1, int c2) {
}
return result;
}

public void addBlankColumn(int index) {
for(List<String> row : this.data) {
row.add(index, "");
}
}

private static String getOrEmpty(List<String> list, int index) {
if (list.size() <= index) {
Expand Down Expand Up @@ -228,7 +248,7 @@ public String toString() {
String cellPadded = rightPad("", colWidths.get(j));

if(j < row.size()) {
cellPadded = ensureLtr(rightPad(row.get(j), colWidths.get(j)));
cellPadded = isolateRTLIfRequired(rightPad(row.get(j), colWidths.get(j)));
}

sb.append("|").append(cellPadded);
Expand All @@ -242,38 +262,4 @@ public String toString() {

return sb.toString();
}

// TODO: ensure strings with mixed arabic and latin are displayed properly
private String ensureLtr(String s) {
if(isRtl(s)) {
return '\u200e' + s;
} else {
return s;
}
}

public static boolean isRtl(String string) {
if (string == null) {
return false;
}

for (int i = 0, n = string.length(); i < n; ++i) {
byte d = Character.getDirectionality(string.charAt(i));

switch (d) {
case DIRECTIONALITY_RIGHT_TO_LEFT:
case DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
case DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
case DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
return true;

case DIRECTIONALITY_LEFT_TO_RIGHT:
case DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
case DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
return false;
}
}

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ public static List<DataTable> mergeTablesSpanningMultiplePages(List<DataTable> d

for (DataTable dt : dataTables) {
if (current != null) {
if (current.hasSameColumnsAs(dt)) {
if (current.hasSameHeaderBlanksIgnoredAs(dt)) {
addBlankColumnsToMatchHeaders(current, dt);
}

if (current.hasSameColumnCountAs(dt)) {
current = current.mergeWith(dt);
} else {
results.add(current);
Expand All @@ -62,6 +66,31 @@ public static List<DataTable> mergeComplementaryColumns(List<DataTable> dataTabl
}
return results;
}

public static void addBlankColumnsToMatchHeaders(DataTable a, DataTable b) {
if (!a.hasSameHeaderBlanksIgnoredAs(b)) {
throw new RuntimeException("Only works when tables have same headers (blanks ignored)");
}

List<String> aHeaderRow = a.headerRow();
List<String> bHeaderRow = b.headerRow();
int aa = 0, bb = 0;
while(aa < aHeaderRow.size() && bb < bHeaderRow.size()) {
String aCol = aHeaderRow.get(aa).trim();
String bCol = bHeaderRow.get(bb).trim();

if (aCol.equals(bCol)) {
aa++;
bb++;
} else if(aCol.isEmpty()) {
b.addBlankColumn(bb);
} else if(bCol.isEmpty()) {
a.addBlankColumn(aa);
} else {
throw new RuntimeException("Should not happen");
}
}
}

static DataTable mergeComplementaryColumns(DataTable dataTable) {
DataTable result = dataTable;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,24 @@ public void testToStringWithArabic() {
DataTable data = new DataTable(1);
data.addRow("Word one longer header", "Word two");
data.addRow("Hello", "Goodbye", "1");
// TODO: ensure strings with mixed arabic and latin are displayed properly
//data.addRow("مرحبا", "مرحبًا ABC 123", "وداعا");
data.addRow("مرحبا", "مرحبًا ABC 123", "وداعا");
data.addRow("مرحبا", "مرحبًا", "وداعا");
data.addRow("مرحبا", "", "وداعا");

System.out.println(data.toString());
// System.out.println(data.toString());

String expected = "+-------------------------------------+\n" +
"|Word one longer header|Word two| |\n" +
"+-------------------------------------+\n" +
"|Hello |Goodbye |1 |\n" +
"+-------------------------------------+\n" +
"|\u200Eمرحبا |\u200Eمرحبًا |\u200Eوداعا|\n" +
"+-------------------------------------+";
String expected =
"+-------------------------------------------+\n" +
"|Word one longer header|Word two | |\n" +
"+-------------------------------------------+\n" +
"|Hello |Goodbye |1 |\n" +
"+-------------------------------------------+\n" +
"|\u2068مرحبا \u2069|\u2068مرحبًا ABC 123\u2069|\u2068وداعا\u2069|\n" +
"+-------------------------------------------+\n" +
"|\u2068مرحبا \u2069|\u2068مرحبًا \u2069|\u2068وداعا\u2069|\n" +
"+-------------------------------------------+\n" +
"|\u2068مرحبا \u2069| |\u2068وداعا\u2069|\n" +
"+-------------------------------------------+";

assertThat(data.toString().trim(), is(expected.trim()));
}
Expand Down Expand Up @@ -133,6 +138,18 @@ public void mergeColumnsWhenUnevenRowLength() {
assertThat(merged.getColumn(1), is(Arrays.asList("HeadB HeadC", "B1 C1", "B2", "", "B4 C4")));
}

@Test
public void addBlankColumn() {
DataTable dt = new DataTable(1);
dt.addRow("H1", "H2", "H3");
dt.addRow("A1", "A2", "A3");

dt.addBlankColumn(1);

assertThat(dt.getRow(0), is(Arrays.asList("H1", "", "H2", "H3")));
assertThat(dt.getRow(1), is(Arrays.asList("A1", "", "A2", "A3")));
}

@Test
public void testPagesAsString() {
assertThat(new DataTable(1).getPagesAsString(), is("Page 1"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.junit.Assert.assertThat;

import java.util.Arrays;
import java.util.List;

import org.junit.Test;

Expand Down Expand Up @@ -73,4 +74,53 @@ public void mergeComplementaryColumnsThatNeedsMultiplePasses() {
assertThat(merged.getColumn(2), is(Arrays.asList("H4", "D1", "E2", "")));

}
}

@Test
public void mergeWithAccountingBlankHeaders_scenario1() {
DataTable dt = new DataTable(1)
.addRow("H1", "H2", "H3")
.addRow("A1", "A2", "A3");

DataTable dt2 = new DataTable(3)
.addRow("H1", " ", "", "H2", "H3")
.addRow("C1", "CX", "CY", "C2", "C3");

List<DataTable> mergedList = DataTableUtils.mergeTablesSpanningMultiplePages(Arrays.asList(dt, dt2));
assertThat(mergedList.size(), is(1));
DataTable merged = mergedList.get(0);

assertThat(merged.toString(), is("\n" +
"+--------------+\n" +
"|H1| | |H2|H3|\n" +
"+--------------+\n" +
"|A1| | |A2|A3|\n" +
"+--------------+\n" +
"|C1|CX|CY|C2|C3|\n" +
"+--------------+\n"));

}

@Test
public void mergeWithAccountingBlankHeaders_scenario2() {
DataTable dt = new DataTable(1)
.addRow("H1", "", "H2", "H3")
.addRow("A1", "AX", "A2", "A3");

DataTable dt2 = new DataTable(3)
.addRow("H1", "H2", "", "H3")
.addRow("C1", "C2", "CX", "C3");

List<DataTable> mergedList = DataTableUtils.mergeTablesSpanningMultiplePages(Arrays.asList(dt, dt2));
assertThat(mergedList.size(), is(1));
DataTable merged = mergedList.get(0);

assertThat(merged.toString(), is("\n" +
"+--------------+\n" +
"|H1| |H2| |H3|\n" +
"+--------------+\n" +
"|A1|AX|A2| |A3|\n" +
"+--------------+\n" +
"|C1| |C2|CX|C3|\n" +
"+--------------+\n"));
}
}

0 comments on commit ce93526

Please sign in to comment.