Skip to content

Commit

Permalink
Merge pull request #55 from profeg/OCSVTransformer_Test_And_Refactor
Browse files Browse the repository at this point in the history
OCSVTransformer test and refactor
  • Loading branch information
lvca committed Apr 1, 2015
2 parents f2d1ca3 + 6a0bc93 commit bc0525e
Show file tree
Hide file tree
Showing 2 changed files with 232 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,35 +94,16 @@ public String getName() {

@Override
public Object executeTransform(final Object input) {
line++;

if (skipFrom > -1) {
if (skipTo > -1) {
if (line >= skipFrom && line <= skipTo)
return null;
} else if (line >= skipFrom)
// SKIP IT
return null;
}
if (skipTransform()) return null;

log(OETLProcessor.LOG_LEVELS.DEBUG, "parsing=%s", input);
log(OETLProcessor.LOG_LEVELS.DEBUG, "parsing=%s", input);

final List<String> fields = OStringSerializerHelper.smartSplit(input.toString(), new char[] { separator }, 0, -1, false, false,
false, false);

if (columnNames == null) {
if (!columnsOnFirstLine)
throw new OTransformException(getName() + ": columnsOnFirstLine=false and no columns declared");
columnNames = fields;

// REMOVE ANY STRING CHARACTERS IF ANY
for (int i = 0; i < columnNames.size(); ++i)
columnNames.set(i, getCellContent(columnNames.get(i)));

return null;
}
if (!isColumnNamesCorrect(fields)) return null;

final ODocument doc = new ODocument();
final ODocument doc = new ODocument();
for (int i = 0; i < columnNames.size() && i < fields.size(); ++i) {
final String fieldName = columnNames.get(i);
Object fieldValue = null;
Expand All @@ -144,36 +125,8 @@ public Object executeTransform(final Object input) {
}
} else if (fieldStringValue != null && !fieldStringValue.isEmpty()) {
// DETERMINE THE TYPE
final char firstChar = fieldStringValue.charAt(0);
if (Character.isDigit(firstChar)) {
// DATE
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
df.setLenient(true);
try {
fieldValue = df.parse(fieldStringValue);
} catch (ParseException pe) {
// NUMBER
try {
if (fieldStringValue.contains(".") || fieldStringValue.contains(",")) {
String numberAsString = fieldStringValue.replaceAll(",", ".");
fieldValue = new Float(numberAsString);
if (!isFinite((Float) fieldValue)) {
fieldValue = new Double(numberAsString);
}
} else
try {
fieldValue = new Integer(fieldStringValue);
} catch (Exception e) {
fieldValue = new Long(fieldStringValue);
}
} catch (NumberFormatException nf) {
fieldValue = fieldStringValue;
}
}
} else
fieldValue = fieldStringValue;

if (nullValue != null && nullValue.equals(fieldValue))
fieldValue = determineTheType(fieldStringValue);
if (nullValue != null && nullValue.equals(fieldValue))
// NULL VALUE, SKIP
continue;

Expand All @@ -191,7 +144,78 @@ public Object executeTransform(final Object input) {
return doc;
}

/**
private Object determineTheType(String fieldStringValue) {
Object fieldValue;
if ((fieldValue = transformToDate(fieldStringValue)) == null)// try maybe Date type
if ((fieldValue = transformToNumeric(fieldStringValue)) == null)// try maybe Numeric type
fieldValue = fieldStringValue; // type String
return fieldValue;
}

private Object transformToDate(String fieldStringValue) {
// DATE
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
df.setLenient(true);
Object fieldValue;
try {
fieldValue = df.parse(fieldStringValue);
} catch (ParseException pe) {
fieldValue = null;
}
return fieldValue;
}

private Object transformToNumeric(String fieldStringValue) {
Object fieldValue;
try {
if (fieldStringValue.contains(".") || fieldStringValue.contains(",")) {
String numberAsString = fieldStringValue.replaceAll(",", ".");
fieldValue = new Float(numberAsString);
if (!isFinite((Float) fieldValue)) {
fieldValue = new Double(numberAsString);
}
} else
try {
fieldValue = new Integer(fieldStringValue);
} catch (Exception e) {
fieldValue = new Long(fieldStringValue);
}
} catch (NumberFormatException nf) {
fieldValue = fieldStringValue;
}
return fieldValue;
}

private boolean isColumnNamesCorrect(List<String> fields) {
if (columnNames == null) {
if (!columnsOnFirstLine)
throw new OTransformException(getName() + ": columnsOnFirstLine=false and no columns declared");
columnNames = fields;

// REMOVE ANY STRING CHARACTERS IF ANY
for (int i = 0; i < columnNames.size(); ++i)
columnNames.set(i, getCellContent(columnNames.get(i)));

return false;
}
return true;
}

private boolean skipTransform() {
line++;

if (skipFrom > -1) {
if (skipTo > -1) {
if (line >= skipFrom && line <= skipTo)
return true;
} else if (line >= skipFrom)
// SKIP IT
return true;
}
return false;
}

/**
* Backport copy of Float.isFinite() method that was introduced since Java 1.8 but we must support 1.6. TODO replace after
* choosing Java 1.8 as minimal supported
**/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,158 @@ public void testLongWithingQuotes() {
ODocument doc = res.get(0);
assertEquals(new Long(3000000000L), (Long)doc.field("number"));
}

@Test
public void testGetCellContentSingleQuoted() {
String singleQuotedString = "\"aaa\"";
String unQuotedString = "aaa";
OCSVTransformer ocsvTransformer = new OCSVTransformer();
assertEquals(unQuotedString, ocsvTransformer.getCellContent(singleQuotedString));
}

@Test
public void testGetCellContentDoubleQuoted() {
String doubleQuotedString = "\"\"aaa\"\"";
String unQuotedString = "\"aaa\"";
OCSVTransformer ocsvTransformer = new OCSVTransformer();
assertEquals(unQuotedString, ocsvTransformer.getCellContent(doubleQuotedString));
}

@Test
public void testGetCellContentNullValue() {
OCSVTransformer ocsvTransformer = new OCSVTransformer();
assertEquals(null, ocsvTransformer.getCellContent(null));
}

@Test
public void testGetCellContentWithoutQuoteString() {
String unQuotedString = "aaa";
OCSVTransformer ocsvTransformer = new OCSVTransformer();
assertEquals(unQuotedString, ocsvTransformer.getCellContent(unQuotedString));
}

@Test
public void testIsFiniteFloat() {
OCSVTransformer ocsvTransformer = new OCSVTransformer();
assertFalse(ocsvTransformer.isFinite(Float.NaN));
assertFalse(ocsvTransformer.isFinite(Float.POSITIVE_INFINITY));
assertFalse(ocsvTransformer.isFinite(Float.NEGATIVE_INFINITY));
assertTrue(ocsvTransformer.isFinite(0f));
}

@Test
public void testNullCell() {
String cfgJson = "{source: { content: { value: 'id,postId,text\n1,,Hello'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id"));
assertNull((Integer) doc.field("postId"));
assertEquals("Hello", (String) doc.field("text"));
}

@Test
public void testNullValueInCell() {
String cfgJson = "{source: { content: { value: 'id,postId,text\n1,NULL,Hello'} }, extractor : { row : {} }, transformers : [{ csv : {nullValue: 'NULL'} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id"));
assertNull((Integer) doc.field("postId"));
assertEquals("Hello", (String) doc.field("text"));
}

@Test
public void testNullValueInCellEmptyString() {
String cfgJson = "{source: { content: { value: 'id,title,text\n1,,Hello'} }, extractor : { row : {} }, transformers : [{ csv : {nullValue: 'NULL'} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id"));
assertEquals("", (String) doc.field("title"));
assertEquals("Hello", (String) doc.field("text"));
}

@Test
public void testQuotedEmptyString() {
String cfgJson = "{source: { content: { value: 'id,title,text\n1,\"\",Hello'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id"));
assertEquals("", (String) doc.field("title"));
assertEquals("Hello", (String) doc.field("text"));
}

@Test
public void testCRLFDelimiter() {
String cfgJson = "{source: { content: { value: 'id,text,num\r\n1,my test text,1'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id"));
assertEquals("my test text", (String) doc.field("text"));
assertEquals(new Integer(1), (Integer) doc.field("num"));
}

@Test
public void testEndingLineBreak() {
String cfgJson = "{source: { content: { value: 'id,text,num\r\n1,my test text,1\r\n'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id"));
assertEquals("my test text", (String) doc.field("text"));
assertEquals(new Integer(1), (Integer) doc.field("num"));
}

@Test
public void testEndingSpaceInFieldName() {
String cfgJson = "{source: { content: { value: 'id ,text ,num \r\n1,my test text,1\r\n'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id "));
assertNotSame("my test text", (String) doc.field("text"));
assertEquals(new Integer(1), (Integer) doc.field("num "));
}

@Test
public void testCRLFIWithinQuotes() {
String cfgJson = "{source: { content: { value: 'id ,text ,num \r\n1,\"my test\r\n text\",1\r\n'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id "));
assertEquals("my test\r\n text", (String) doc.field("text "));
assertEquals(new Integer(1), (Integer) doc.field("num "));
}

@Test
public void testEscapingDoubleQuotes() {
String cfgJson = "{source: { content: { value: 'id ,text ,num \r\n1,\"my test \"\" text\",1\r\n'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(1), (Integer) doc.field("id "));
assertEquals("my test \"\" text", (String) doc.field("text "));
assertEquals(new Integer(1), (Integer) doc.field("num "));
}

public void testNegativeInteger() {
String cfgJson = "{source: { content: { value: 'id\r\n-1'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Integer(-1), (Integer) doc.field("id"));

}

public void testNegativeFloat() {
String cfgJson = "{source: { content: { value: 'id\r\n-1.0'} }, extractor : { row : {} }, transformers : [{ csv : {} }], loader : { test: {} } }";
process(cfgJson);
List<ODocument> res = getResult();
ODocument doc = res.get(0);
assertEquals(new Float(-1.0f), (Float) doc.field("id"));
}
}

0 comments on commit bc0525e

Please sign in to comment.