diff --git a/README.md b/README.md index eda77a09..2ea9e5b0 100644 --- a/README.md +++ b/README.md @@ -83,12 +83,15 @@ we assume that a quad is simply a triple in a named or default graph. `N3.Parser` transforms Turtle, TriG, N-Triples, or N-Quads document into quads through a callback: ```JavaScript +const tomAndJerry = `PREFIX c: + # Tom is a cat + c:Tom a c:Cat. + c:Jerry a c:Mouse; + c:smarterThan c:Tom.` + const parser = new N3.Parser(); -parser.parse( - `PREFIX c: - c:Tom a c:Cat. - c:Jerry a c:Mouse; - c:smarterThan c:Tom.`, + +parser.parse(tomAndJerry, (error, quad, prefixes) => { if (quad) console.log(quad); @@ -101,9 +104,30 @@ If there are no more quads, the callback is invoked one last time with `null` for `quad` and a hash of prefixes as third argument.
-Pass a second callback to `parse` to retrieve prefixes as they are read. + +Alternatively, an object can be supplied, where `onQuad`, `onPrefix` and `onComment` are used to listen for `quads`, `prefixes` and `comments` as follows: +```JavaScript +const parser = new N3.Parser(); + +parser.parse(tomAndJerry, { + // onQuad (required) accepts a listener of type (quad: RDF.Quad) => void + onQuad: (err, quad) => { console.log(quad); }, + // onPrefix (optional) accepts a listener of type (prefix: string, iri: NamedNode) => void + onPrefix: (prefix, iri) => { console.log(prefix, 'expands to', iri.value); }, + // onComment (optional) accepts a listener of type (comment: string) => void + onComment: (comment) => { console.log('#', comment); }, +}); +``` +
-If no callbacks are provided, parsing happens synchronously. +If no callbacks are provided, parsing happens synchronously returning an array of quads. + +```JavaScript +const parser = new N3.Parser(); + +// An array of resultant Quads +const quadArray = parser.parse(tomAndJerry); +``` By default, `N3.Parser` parses a permissive superset of Turtle, TriG, N-Triples, and N-Quads.
@@ -169,6 +193,8 @@ function SlowConsumer() { A dedicated `prefix` event signals every prefix with `prefix` and `term` arguments. +A dedicated `comment` event can be enabled by setting `comments: true` in the N3.StreamParser constructor. + ## Writing ### From quads to a string diff --git a/src/N3Lexer.js b/src/N3Lexer.js index 8424d727..4e734a06 100644 --- a/src/N3Lexer.js +++ b/src/N3Lexer.js @@ -68,7 +68,7 @@ export default class N3Lexer { this._n3Mode = options.n3 !== false; } // Don't output comment tokens by default - this._comments = !!options.comments; + this.comments = !!options.comments; // Cache the last tested closing position of long literals this._literalClosingPos = 0; } @@ -85,7 +85,7 @@ export default class N3Lexer { let whiteSpaceMatch, comment; while (whiteSpaceMatch = this._newline.exec(input)) { // Try to find a comment - if (this._comments && (comment = this._comment.exec(whiteSpaceMatch[0]))) + if (this.comments && (comment = this._comment.exec(whiteSpaceMatch[0]))) emitToken('comment', comment[1], '', this._line, whiteSpaceMatch[0].length); // Advance the input input = input.substr(whiteSpaceMatch[0].length, input.length); @@ -101,7 +101,7 @@ export default class N3Lexer { // If the input is finished, emit EOF if (inputFinished) { // Try to find a final comment - if (this._comments && (comment = this._comment.exec(input))) + if (this.comments && (comment = this._comment.exec(input))) emitToken('comment', comment[1], '', this._line, input.length); input = null; emitToken('eof', '', '', this._line, 0); diff --git a/src/N3Parser.js b/src/N3Parser.js index 273846e3..fc4af341 100644 --- a/src/N3Parser.js +++ b/src/N3Parser.js @@ -1010,8 +1010,20 @@ export default class N3Parser { // ## Public methods - // ### `parse` parses the N3 input and emits each parsed quad through the callback + // ### `parse` parses the N3 input and emits each parsed quad through the onQuad callback. parse(input, quadCallback, prefixCallback) { + // The second parameter accepts an object { onQuad: ..., onPrefix: ..., onComment: ...} + // As a second and third parameter it still accepts a separate quadCallback and prefixCallback for backward compatibility as well + let onQuad, onPrefix, onComment; + if (quadCallback && (quadCallback.onQuad || quadCallback.onPrefix || quadCallback.onComment)) { + onQuad = quadCallback.onQuad; + onPrefix = quadCallback.onPrefix; + onComment = quadCallback.onComment; + } + else { + onQuad = quadCallback; + onPrefix = prefixCallback; + } // The read callback is the next function to be executed when a token arrives. // We start reading in the top context. this._readCallback = this._readInTopContext; @@ -1019,12 +1031,12 @@ export default class N3Parser { this._prefixes = Object.create(null); this._prefixes._ = this._blankNodePrefix ? this._blankNodePrefix.substr(2) : `b${blankNodePrefix++}_`; - this._prefixCallback = prefixCallback || noop; + this._prefixCallback = onPrefix || noop; this._inversePredicate = false; this._quantified = Object.create(null); // Parse synchronously if no quad callback is given - if (!quadCallback) { + if (!onQuad) { const quads = []; let error; this._callback = (e, t) => { e ? (error = e) : t && quads.push(t); }; @@ -1035,14 +1047,33 @@ export default class N3Parser { return quads; } - // Parse asynchronously otherwise, executing the read callback when a token arrives - this._callback = quadCallback; - this._lexer.tokenize(input, (error, token) => { + let processNextToken = (error, token) => { if (error !== null) this._callback(error), this._callback = noop; else if (this._readCallback) this._readCallback = this._readCallback(token); - }); + }; + + // Enable checking for comments on every token when a commentCallback has been set + if (onComment) { + // Enable the lexer to return comments as tokens first (disabled by default) + this._lexer.comments = true; + // Patch the processNextToken function + processNextToken = (error, token) => { + if (error !== null) + this._callback(error), this._callback = noop; + else if (this._readCallback) { + if (token.type === 'comment') + onComment(token.value); + else + this._readCallback = this._readCallback(token); + } + }; + } + + // Parse asynchronously otherwise, executing the read callback when a token arrives + this._callback = onQuad; + this._lexer.tokenize(input, processNextToken); } } diff --git a/src/N3StreamParser.js b/src/N3StreamParser.js index bd586352..95610d1b 100644 --- a/src/N3StreamParser.js +++ b/src/N3StreamParser.js @@ -11,6 +11,17 @@ export default class N3StreamParser extends Transform { // Set up parser with dummy stream to obtain `data` and `end` callbacks const parser = new N3Parser(options); let onData, onEnd; + + const callbacks = { + // Handle quads by pushing them down the pipeline + onQuad: (error, quad) => { error && this.emit('error', error) || quad && this.push(quad); }, + // Emit prefixes through the `prefix` event + onPrefix: (prefix, uri) => { this.emit('prefix', prefix, uri); }, + }; + + if (options && options.comments) + callbacks.onComment = comment => { this.emit('comment', comment); }; + parser.parse({ on: (event, callback) => { switch (event) { @@ -18,12 +29,7 @@ export default class N3StreamParser extends Transform { case 'end': onEnd = callback; break; } }, - }, - // Handle quads by pushing them down the pipeline - (error, quad) => { error && this.emit('error', error) || quad && this.push(quad); }, - // Emit prefixes through the `prefix` event - (prefix, uri) => { this.emit('prefix', prefix, uri); }, - ); + }, callbacks); // Implement Transform methods through parser callbacks this._transform = (chunk, encoding, done) => { onData(chunk); done(); }; diff --git a/test/N3Parser-test.js b/test/N3Parser-test.js index a02de20e..97b7ec62 100644 --- a/test/N3Parser-test.js +++ b/test/N3Parser-test.js @@ -41,6 +41,28 @@ describe('Parser', () => { ['g', 'h', 'i']), ); + it( + 'should parse three triples with comments if no comment callback is set', + shouldParse(' #comment2\n . \n .\n .', + ['a', 'b', 'c'], + ['d', 'e', 'f'], + ['g', 'h', 'i']), + ); + + it( + 'should parse three triples with comments when comment callback is set', + shouldParseWithCommentsEnabled(' #comment2\n . \n .\n .', + ['a', 'b', 'c'], + ['d', 'e', 'f'], + ['g', 'h', 'i']), + ); + + it( + 'should callback comments when a comment callback is set', + shouldCallbackComments('#comment1\n #comment2\n . \n .\n .', + 'comment1', 'comment2'), + ); + it('should parse a triple with a literal', shouldParse(' "string".', ['a', 'b', '"string"'])); @@ -203,6 +225,12 @@ describe('Parser', () => { 'Undefined prefix "d:" on line 1.'), ); + it( + 'should not parse undefined prefix in datatype with comments enabled', + shouldNotParseWithComments('#comment\n "c"^^d:e ', + 'Undefined prefix "d:" on line 2.'), + ); + it( 'should parse triples with SPARQL prefixes', shouldParse('PREFIX : <#>\n' + @@ -1601,6 +1629,12 @@ describe('Parser', () => { 'Unexpected literal on line 1.'), ); + it( + 'should not parse a literal as subject', + shouldNotParseWithComments(parser, '1 .', + 'Unexpected literal on line 1.'), + ); + it( 'should not parse RDF-star in the subject position', shouldNotParse(parser, '<< >> .', @@ -1632,6 +1666,12 @@ describe('Parser', () => { shouldNotParse(parser, '<<_:a _:b >> "c" .', 'Expected >> to follow "_:b0_b" on line 1.'), ); + + it( + 'should not parse nested quads with comments', + shouldNotParseWithComments(parser, '#comment1\n<<_:a _:b >> "c" .', + 'Expected >> to follow "_:b0_b" on line 2.'), + ); }); describe('A Parser instance for the TriG format', () => { @@ -3038,6 +3078,57 @@ function shouldParse(parser, input) { }; } +function shouldParseWithCommentsEnabled(parser, input) { + const expected = Array.prototype.slice.call(arguments, 1); + // Shift parameters as necessary + if (parser.call) + expected.shift(); + else + input = parser, parser = Parser; + + return function (done) { + const results = []; + const items = expected.map(mapToQuad); + new parser({ baseIRI: BASE_IRI }).parse(input, { + onQuad: (error, triple) => { + expect(error).toBeFalsy(); + if (triple) + results.push(triple); + else + expect(toSortedJSON(results)).toBe(toSortedJSON(items)), done(); + }, + onComment: comment => { + expect(comment).toBeDefined(); + }, + }); + }; +} + + +function shouldCallbackComments(parser, input) { + const expected = Array.prototype.slice.call(arguments, 1); + // Shift parameters as necessary + if (parser.call) + expected.shift(); + else + input = parser, parser = Parser; + + return function (done) { + const items = expected; + const comments = []; + new parser({ baseIRI: BASE_IRI }).parse(input, { + onQuad: (error, triple) => { + if (!triple) { + // Marks the end + expect(JSON.stringify(comments)).toBe(JSON.stringify(items)); + done(); + } + }, + onComment: comment => { comments.push(comment); }, + }); + }; +} + function mapToQuad(item) { item = item.map(t => { // don't touch if it's already an object @@ -3082,6 +3173,32 @@ function shouldNotParse(parser, input, expectedError, expectedContext) { }; } +function shouldNotParseWithComments(parser, input, expectedError, expectedContext) { + // Shift parameters if necessary + if (!parser.call) + expectedContext = expectedError, expectedError = input, input = parser, parser = Parser; + + return function (done) { + new parser({ baseIRI: BASE_IRI }).parse(input, { + onQuad: (error, triple) => { + if (error) { + expect(triple).toBeFalsy(); + expect(error).toBeInstanceOf(Error); + expect(error.message).toEqual(expectedError); + if (expectedContext) expect(error.context).toEqual(expectedContext); + done(); + } + else if (!triple) + done(new Error(`Expected error ${expectedError}`)); + }, + // Enables comment mode + onComment: comment => { + expect(comment).toBeDefined(); + }, + }); + }; +} + function itShouldResolve(baseIRI, relativeIri, expected) { let result; describe(`resolving <${relativeIri}> against <${baseIRI}>`, () => { diff --git a/test/N3StreamParser-test.js b/test/N3StreamParser-test.js index 5ccfbb0c..b50a3d83 100644 --- a/test/N3StreamParser-test.js +++ b/test/N3StreamParser-test.js @@ -53,6 +53,12 @@ describe('StreamParser', () => { { token: undefined, line: 1, previousToken: undefined }, ); + it( + "doesn't parse an invalid stream with comments", + shouldNotParseWithComments(['z.'], 'Unexpected "z." on line 1.'), + { token: undefined, line: 1, previousToken: undefined }, + ); + it( 'Should Not parse Bom in middle stream', shouldNotParse([' ', '\ufeff', '.'], 'Unexpected "" on line 1.'), @@ -64,6 +70,26 @@ describe('StreamParser', () => { { a: new NamedNode('http://a.org/#'), b: new NamedNode('http://b.org/#') }), ); + it( + 'parses two triples with comments when comments not enabled', + shouldParse(['#comment1\n #comment2\n#comment3\n . .'], 2), + ); + + it( + 'parses two triples with comments when comments enabled', + shouldParseWithCommentsEnabled(['#comment1\n #comment2\n#comment3\n . .'], 2), + ); + + it( + 'emits "comment" events', + shouldEmitComments(['#comment1\n #comment2\n#comment3\n . .'], ['comment1', 'comment2', 'comment3']), + ); + + it( + 'emits "comment" events', + shouldNotEmitCommentsWhenNotEnabled(['#comment1\n #comment2\n#comment3\n . .'], ['comment1', 'comment2', 'comment3']), + ); + it('passes an error', () => { const input = new Readable(), parser = new StreamParser(); let error = null; @@ -94,6 +120,24 @@ function shouldParse(chunks, expectedLength, validateTriples) { }; } +function shouldParseWithCommentsEnabled(chunks, expectedLength, validateTriples) { + return function (done) { + const triples = [], + inputStream = new ArrayReader(chunks), + parser = new StreamParser({ comments: true }), + outputStream = new ArrayWriter(triples); + expect(parser.import(inputStream)).toBe(parser); + parser.pipe(outputStream); + parser.on('comment', () => {}); + parser.on('error', done); + parser.on('end', () => { + expect(triples).toHaveLength(expectedLength); + if (validateTriples) validateTriples(triples); + done(); + }); + }; +} + function shouldNotParse(chunks, expectedMessage, expectedContext) { return function (done) { const inputStream = new ArrayReader(chunks), @@ -110,6 +154,22 @@ function shouldNotParse(chunks, expectedMessage, expectedContext) { }; } +function shouldNotParseWithComments(chunks, expectedMessage, expectedContext) { + return function (done) { + const inputStream = new ArrayReader(chunks), + parser = new StreamParser({ comments: true }), + outputStream = new ArrayWriter([]); + inputStream.pipe(parser); + parser.pipe(outputStream); + parser.on('error', error => { + expect(error).toBeInstanceOf(Error); + expect(error.message).toBe(expectedMessage); + if (expectedContext) expect(error.context).toEqual(expectedContext); + done(); + }); + }; +} + function shouldEmitPrefixes(chunks, expectedPrefixes) { return function (done) { const prefixes = {}, @@ -126,6 +186,36 @@ function shouldEmitPrefixes(chunks, expectedPrefixes) { }; } +function shouldEmitComments(chunks, expectedComments) { + return function (done) { + const comments = [], + parser = new StreamParser({ comments: true }), + inputStream = new ArrayReader(chunks); + inputStream.pipe(parser); + parser.on('data', () => {}); + parser.on('comment', comment => { comments.push(comment); }); + parser.on('error', done); + parser.on('end', error => { + expect(comments).toEqual(expectedComments); + done(error); + }); + }; +} + +function shouldNotEmitCommentsWhenNotEnabled(chunks, expectedComments) { + return function (done) { + const parser = new StreamParser(), + inputStream = new ArrayReader(chunks); + inputStream.pipe(parser); + parser.on('data', () => {}); + parser.on('comment', comment => { done(new Error('Should not emit comments but it did')); }); + parser.on('error', done); + parser.on('end', error => { + done(); + }); + }; +} + function ArrayReader(items) { const reader = new Readable(); reader._read = function () { this.push(items.shift() || null); };