Skip to content

Commit

Permalink
Improved treatment of whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
rccarrasco committed Nov 23, 2013
1 parent 6a31c0e commit 94bebf7
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<groupId>eu.digitisation</groupId>
<artifactId>ocrevalUAtion</artifactId>
<name>ocrevalUAtion</name>
<version>0.9-SNAPSHOT</version>
<version>0.91</version>
<packaging>jar</packaging>
<description>OCR Evaluation Tool</description>
<organization>
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/replacements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
2028 0020
2029 0020
F1AC 003B
EFA1 00E6
EEC4 0063006B
Expand Down
74 changes: 74 additions & 0 deletions src/test/java/eu/digitisation/io/CharFilterTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (C) 2013 IMPACT Centre of Competence
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

package eu.digitisation.io;

import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Paths;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;

/**
*
* @author rafa
*/
public class CharFilterTest {

public CharFilterTest() {
}

@BeforeClass
public static void setUpClass() {
}

@AfterClass
public static void tearDownClass() {
}

@Before
public void setUp() {
}

@After
public void tearDown() {
}

/**
* Test of translate method, of class CharFilter.
* @throws java.net.URISyntaxException
*/
@Test
public void testTranslate_String() throws URISyntaxException {
System.out.println("translate");
URL resourceUrl = getClass().getResource("/replacements.txt");
File file = Paths.get(resourceUrl.toURI()).toFile();
CharFilter filter = new CharFilter(file);
String s = "a\u2028";
String expResult = "a ";
String result = filter.translate(s);
assertEquals(expResult.length(), result.length());
assertEquals(expResult, result);
}

}
41 changes: 21 additions & 20 deletions src/test/java/eu/digitisation/io/TextContentTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
package eu.digitisation.io;

import java.io.File;
import java.io.PrintWriter;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Paths;
import org.junit.After;
import org.junit.AfterClass;
import static org.junit.Assert.assertEquals;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
Expand All @@ -32,41 +33,41 @@
* @author carrasco@ua.es
*/
public class TextContentTest {

public TextContentTest() {
}

@BeforeClass
public static void setUpClass() {
}

@AfterClass
public static void tearDownClass() {
}

@Before
public void setUp() {
}

@After
public void tearDown() {
}

/**
* Test of getText method, of class Text.
* @throws java.lang.Exception
* Test of toString method, of class TextContent.
* @throws java.net.URISyntaxException
*/
@Test
public void testGetText() throws Exception {
System.out.println("getText");
URL inURL = getClass().getResource("/00445310.xml");
File ifile = Paths.get(inURL.toURI()).toFile();
URL outURL = getClass().getResource("/00445310.txt");
File ofile = Paths.get(outURL.toURI()).toFile();
TextContent instance = new TextContent(ifile, "utf-8", null);
try (PrintWriter writer = new PrintWriter(ofile)) {
String result = instance.toString();
writer.write(result);
}
public void testToString() throws URISyntaxException {
System.out.println("toString");
URL resourceUrl = getClass().getResource("/replacements.txt");
File file = Paths.get(resourceUrl.toURI()).toFile();
CharFilter filter = new CharFilter(file);
String s = "hola " + "\n" + " y\u2028 de todo\n";
TextContent instance = new TextContent(s, filter);
String expResult = "hola y de todo";
String result = instance.toString();
assertEquals(expResult.length(), result.length());
assertEquals(expResult, result);
}
}
}

0 comments on commit 94bebf7

Please sign in to comment.