From 16b7a8e9a70f4c22bec618591900b5d4ae2ff2a0 Mon Sep 17 00:00:00 2001 From: Alex Tsibulya Date: Wed, 21 Sep 2016 07:38:51 -0700 Subject: [PATCH 1/4] Test issue 150 - Add a test for issue #150 to specify the expected behavior - Mark it `skip` pending implementation --- test/issues.test.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/issues.test.js b/test/issues.test.js index 3f186e3c..50b78913 100644 --- a/test/issues.test.js +++ b/test/issues.test.js @@ -239,4 +239,16 @@ it.describe("github issues", function (it) { }); }); }); + + it.skip("#150", function (it) { + it.should("not parse a row if a new line is ambiguous and there is more data", function () { + var data = "first_name,last_name,email_address\r"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "first_name,last_name,email_address\r", + "rows": [] + }); + }); + }); }); From 43c6d6a4b644eec80624cf2cf4398785d78e831b Mon Sep 17 00:00:00 2001 From: Alex Tsibulya Date: Tue, 20 Sep 2016 07:43:05 -0700 Subject: [PATCH 2/4] Test split CRLF - Add a test for CRLF split between two buffers (a.k.a issue #150) to specify the expected behavior - Mark it `skip` pending implementation --- test/parser.test.js | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/test/parser.test.js b/test/parser.test.js index 13fddbe8..4b5bfa7b 100644 --- a/test/parser.test.js +++ b/test/parser.test.js @@ -319,7 +319,7 @@ it.describe("fast-csv parser", function (it) { }); it.should("parse a block of CSV text with a trailing delimiter", function () { - var data = "first_name,last_name,email_address,empty\nFirst1,Last1,email1@email.com,\n"; + var data = "first_name,last_name,email_address,empty\rFirst1,Last1,email1@email.com,\r"; var myParser = parser({delimiter: ","}); assert.deepEqual(myParser(data, false), { "line": "", "rows": [ @@ -330,7 +330,7 @@ it.describe("fast-csv parser", function (it) { }); it.should("parse a block of CSV text with a trailing delimiter followed by a space", function() { - var data = "first_name,last_name,email_address,empty\nFirst1,Last1,email1@email.com, \n"; + var data = "first_name,last_name,email_address,empty\nFirst1,Last1,email1@email.com, \r"; var myParser = parser({ delimiter: "," }); assert.deepEqual(myParser(data, false), { "line": "", "rows": [ @@ -341,7 +341,7 @@ it.describe("fast-csv parser", function (it) { }); it.should("parse a block of Space Separated Value text with a trailing delimiter", function() { - var data = "first_name last_name email_address empty\nFirst1 Last1 email1@email.com \n"; + var data = "first_name last_name email_address empty\rFirst1 Last1 email1@email.com \r"; var myParser = parser({ delimiter: " " }); assert.deepEqual(myParser(data, false), { "line": "", "rows": [ @@ -614,6 +614,16 @@ it.describe("fast-csv parser", function (it) { }); }); + it.skip("not parse a row if a new line is incomplete and there is more data", function () { + var data = "first_name,last_name,email_address\r"; + var myParser = parser({delimiter: ","}); + var parsedData = myParser(data, true); + assert.deepEqual(parsedData, { + "line": "first_name,last_name,email_address\r", + "rows": [] + }); + }); + it.should("not parse a row if there is a trailing delimiter and there is more data", function () { var data = "first_name,last_name,email_address,"; var myParser = parser({delimiter: ","}); From 8d63d596d6c149e8ba91af0c969f6aaa5d4981ac Mon Sep 17 00:00:00 2001 From: Alex Tsibulya Date: Tue, 20 Sep 2016 07:52:21 -0700 Subject: [PATCH 3/4] Test ambiguous CR - Modify existing tests for `\r` row delimiter to specify the behavior in case of CR vs.CRLF ambiguity issue #150 - Mark them `skip` pending implementation --- test/parser.test.js | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/test/parser.test.js b/test/parser.test.js index 4b5bfa7b..4bb5137e 100644 --- a/test/parser.test.js +++ b/test/parser.test.js @@ -400,15 +400,13 @@ it.describe("fast-csv parser", function (it) { }); }); - it.should("parse a row if a new line is found and there is more data", function () { + it.skip("not parse a row if an ambiguous new line is found and there is more data", function () { var data = "first_name,last_name,email_address\r"; var myParser = parser({delimiter: ","}); var parsedData = myParser(data, true); assert.deepEqual(parsedData, { - "line": "", - "rows": [ - ["first_name", "last_name", "email_address"] - ] + "line": "first_name,last_name,email_address\r", + "rows": [] }); }); @@ -532,15 +530,13 @@ it.describe("fast-csv parser", function (it) { }); }); - it.should("parse a row if a new line is found and there is more data", function () { + it.skip("not parse a row if an ambiguous new line is found and there is more data", function () { var data = '"first_name","last_name","email_address"\r'; var myParser = parser({delimiter: ","}); var parsedData = myParser(data, true); assert.deepEqual(parsedData, { - "line": "", - "rows": [ - ["first_name", "last_name", "email_address"] - ] + "line": '"first_name","last_name","email_address"\r', + "rows": [] }); }); }); From 0b9b82bb972b1886ef84d0713a3acab0a7b3149c Mon Sep 17 00:00:00 2001 From: Alex Tsibulya Date: Wed, 21 Sep 2016 07:37:05 -0700 Subject: [PATCH 4/4] Keep the line if a new line is ambiguous Modify the parser to - parse CRLF as a single token - keep the current line unparsed if it ends in CR and there's more data This solves the issues #146 and #150 by ensuring that CRLF split by a buffer boundary doesn't get treated as two row delimiters CR+LF --- lib/parser/parser.js | 12 +++++++++--- test/issues.test.js | 2 +- test/parser.test.js | 6 +++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/parser/parser.js b/lib/parser/parser.js index ea6875ce..b50a809c 100644 --- a/lib/parser/parser.js +++ b/lib/parser/parser.js @@ -143,10 +143,11 @@ function createParser(options) { } function getNextToken(line, cursor) { - var token, nextIndex, subStr = line.substr(cursor); + var token, tokenLen, nextIndex, subStr = line.substr(cursor); if ((nextIndex = subStr.search(NEXT_TOKEN_REGEXP)) !== -1) { - token = line[cursor += nextIndex]; - cursor += subStr.match(NEXT_TOKEN_REGEXP)[1].length - 1; + tokenLen = subStr.match(NEXT_TOKEN_REGEXP)[1].length; + token = line.substr(cursor + nextIndex, tokenLen); + cursor += nextIndex + tokenLen - 1; } return {token: token, cursor: cursor}; } @@ -167,6 +168,11 @@ function createParser(options) { items = []; lastLineI = i; } else { + // if ends with CR and there is more data, keep unparsed due to possible coming LF in CRLF + if (token === '\r' && hasMoreData) { + i = lastLineI; + cursor = null; + } break; } } else if (hasComments && token === COMMENT) { diff --git a/test/issues.test.js b/test/issues.test.js index 50b78913..91f28217 100644 --- a/test/issues.test.js +++ b/test/issues.test.js @@ -240,7 +240,7 @@ it.describe("github issues", function (it) { }); }); - it.skip("#150", function (it) { + it.describe("#150", function (it) { it.should("not parse a row if a new line is ambiguous and there is more data", function () { var data = "first_name,last_name,email_address\r"; var myParser = parser({delimiter: ","}); diff --git a/test/parser.test.js b/test/parser.test.js index 4bb5137e..ba078054 100644 --- a/test/parser.test.js +++ b/test/parser.test.js @@ -400,7 +400,7 @@ it.describe("fast-csv parser", function (it) { }); }); - it.skip("not parse a row if an ambiguous new line is found and there is more data", function () { + it.should("not parse a row if an ambiguous new line is found and there is more data", function () { var data = "first_name,last_name,email_address\r"; var myParser = parser({delimiter: ","}); var parsedData = myParser(data, true); @@ -530,7 +530,7 @@ it.describe("fast-csv parser", function (it) { }); }); - it.skip("not parse a row if an ambiguous new line is found and there is more data", function () { + it.should("not parse a row if an ambiguous new line is found and there is more data", function () { var data = '"first_name","last_name","email_address"\r'; var myParser = parser({delimiter: ","}); var parsedData = myParser(data, true); @@ -610,7 +610,7 @@ it.describe("fast-csv parser", function (it) { }); }); - it.skip("not parse a row if a new line is incomplete and there is more data", function () { + it.should("not parse a row if a new line is incomplete and there is more data", function () { var data = "first_name,last_name,email_address\r"; var myParser = parser({delimiter: ","}); var parsedData = myParser(data, true);