Skip to content

Commit

Permalink
fix: unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
young authored and jazzido committed Jul 15, 2024
1 parent 20b1053 commit a017306
Showing 1 changed file with 57 additions and 52 deletions.
109 changes: 57 additions & 52 deletions src/test/java/technology/tabula/TestTextElement.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package technology.tabula;

import java.util.ArrayList;
import java.util.List;

import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.junit.Assert;
import org.junit.Test;

import java.util.ArrayList;
import java.util.List;

public class TestTextElement {


Expand All @@ -24,7 +24,7 @@ public void createTextElement() {
Assert.assertEquals(5f, textElement.getTop(), 0);
Assert.assertEquals(10f, textElement.getWidth(), 0);
Assert.assertEquals(20f, textElement.getHeight(), 0);
Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
Assert.assertEquals(0f, textElement.getDirection(), 0);

Expand All @@ -43,7 +43,7 @@ public void createTextElementWithDirection() {
Assert.assertEquals(5f, textElement.getTop(), 0);
Assert.assertEquals(10f, textElement.getWidth(), 0);
Assert.assertEquals(20f, textElement.getHeight(), 0);
Assert.assertEquals(new PDType1Font(Standard14Fonts.FontName.HELVETICA), textElement.getFont());
Assert.assertEquals(Standard14Fonts.FontName.HELVETICA.getName(), textElement.getFont().getName());
Assert.assertEquals(1f, textElement.getWidthOfSpace(), 0);
Assert.assertEquals(6f, textElement.getDirection(), 0);

Expand All @@ -54,18 +54,19 @@ public void createTextElementWithDirection() {
public void mergeFourElementsIntoFourWords() {

List<TextElement> elements = new ArrayList<>();
elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
elements.add(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
elements.add(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
elements.add(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
elements.add(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f));
elements.add(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f));
elements.add(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f));

List<TextChunk> words = TextElement.mergeWords(elements);

List<TextChunk> expectedWords = new ArrayList<>();
expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f)));
expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f)));
expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f)));
expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f)));
expectedWords.add(new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f)));
expectedWords.add(new TextChunk(new TextElement(20f, 15f, 10f, 20f, font, 1f, "B", 1f, 6f)));
expectedWords.add(new TextChunk(new TextElement(40f, 15f, 10f, 20f, font, 1f, "C", 1f, 6f)));
expectedWords.add(new TextChunk(new TextElement(60f, 15f, 10f, 20f, font, 1f, "D", 1f, 6f)));

Assert.assertEquals(expectedWords, words);

Expand All @@ -75,18 +76,19 @@ public void mergeFourElementsIntoFourWords() {
public void mergeFourElementsIntoOneWord() {

List<TextElement> elements = new ArrayList<>();
elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));

List<TextChunk> words = TextElement.mergeWords(elements);

List<TextChunk> expectedWords = new ArrayList<>();
TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
textChunk.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
textChunk.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));
expectedWords.add(textChunk);

Assert.assertEquals(expectedWords, words);
Expand All @@ -101,10 +103,11 @@ public void mergeElementsShouldBeIdempotent() {
*/

List<TextElement> elements = new ArrayList<>();
elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
elements.add(new TextElement(0f, 45f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
elements.add(new TextElement(0f, 45f, 10f, 20f, font, 1f, "D", 1f, 6f));

List<TextChunk> words = TextElement.mergeWords(elements);
List<TextChunk> words2 = TextElement.mergeWords(elements);
Expand All @@ -115,20 +118,21 @@ public void mergeElementsShouldBeIdempotent() {
public void mergeElementsWithSkippingRules() {

List<TextElement> elements = new ArrayList<>();
elements.add(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 17f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
elements.add(new TextElement(0.001f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f, 6f));
elements.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
elements.add(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 17f, 10f, 20f, font, 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
elements.add(new TextElement(0.001f, 25f, 10f, 20f, font, 1f, " ", 1f, 6f));
elements.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
PDFont TIMES_ROMAN = new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN);
elements.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));

List<TextChunk> words = TextElement.mergeWords(elements);

List<TextChunk> expectedWords = new ArrayList<>();
TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
textChunk.add(new TextElement(0f, 25f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "B", 1f, 6f));
textChunk.add(new TextElement(0f, 35f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "C", 1f, 6f));
TextChunk textChunk = new TextChunk(new TextElement(0f, 15f, 10f, 20f, font, 1f, "A", 1f, 6f));
textChunk.add(new TextElement(0f, 25f, 10f, 20f, font, 1f, "B", 1f, 6f));
textChunk.add(new TextElement(0f, 35f, 10f, 20f, font, 1f, "C", 1f, 6f));
textChunk.add(new TextElement(0f, 45f, 10f, 20f, TIMES_ROMAN, 10f, "D", 1f, 6f));
expectedWords.add(textChunk);

Expand All @@ -140,30 +144,31 @@ public void mergeElementsWithSkippingRules() {
public void mergeTenElementsIntoTwoWords() {

List<TextElement> elements = new ArrayList<>();
elements.add(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
elements.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
elements.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
elements.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
elements.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
elements.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
elements.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
elements.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA);
elements.add(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
elements.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
elements.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
elements.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
elements.add(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
elements.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
elements.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
elements.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
elements.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));

List<TextChunk> words = TextElement.mergeWords(elements);

List<TextChunk> expectedWords = new ArrayList<>();
TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "H", 1f, 6f));
textChunk.add(new TextElement(0f, 10f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
textChunk.add(new TextElement(0f, 20f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "L", 1f, 6f));
textChunk.add(new TextElement(0f, 30f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "A", 1f, 6f));
textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, " ", 1f)); //Check why width=10.5?
TextChunk textChunk = new TextChunk(new TextElement(0f, 0f, 10f, 20f, font, 1f, "H", 1f, 6f));
textChunk.add(new TextElement(0f, 10f, 10f, 20f, font, 1f, "O", 1f, 6f));
textChunk.add(new TextElement(0f, 20f, 10f, 20f, font, 1f, "L", 1f, 6f));
textChunk.add(new TextElement(0f, 30f, 10f, 20f, font, 1f, "A", 1f, 6f));
textChunk.add(new TextElement(0f, 30f, 10.5f, 20f, font, 1f, " ", 1f)); //Check why width=10.5?
expectedWords.add(textChunk);
TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "M", 1f, 6f));
textChunk2.add(new TextElement(0f, 70f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "U", 1f, 6f));
textChunk2.add(new TextElement(0f, 80f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "N", 1f, 6f));
textChunk2.add(new TextElement(0f, 90f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "D", 1f, 6f));
textChunk2.add(new TextElement(0f, 100f, 10f, 20f, new PDType1Font(Standard14Fonts.FontName.HELVETICA), 1f, "O", 1f, 6f));
TextChunk textChunk2 = new TextChunk(new TextElement(0f, 60f, 10f, 20f, font, 1f, "M", 1f, 6f));
textChunk2.add(new TextElement(0f, 70f, 10f, 20f, font, 1f, "U", 1f, 6f));
textChunk2.add(new TextElement(0f, 80f, 10f, 20f, font, 1f, "N", 1f, 6f));
textChunk2.add(new TextElement(0f, 90f, 10f, 20f, font, 1f, "D", 1f, 6f));
textChunk2.add(new TextElement(0f, 100f, 10f, 20f, font, 1f, "O", 1f, 6f));
expectedWords.add(textChunk2);

Assert.assertEquals(2, words.size());
Expand Down

0 comments on commit a017306

Please sign in to comment.