diff --git a/src/language/__tests__/blockStringValue-test.js b/src/language/__tests__/blockStringValue-test.js new file mode 100644 index 0000000000..7dc639eddb --- /dev/null +++ b/src/language/__tests__/blockStringValue-test.js @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2015-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import { expect } from 'chai'; +import { describe, it } from 'mocha'; +import blockStringValue from '../blockStringValue'; + +describe('blockStringValue', () => { + + it('removes uniform indentation from a string', () => { + const rawValue = [ + '', + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('removes empty leading and trailing lines', () => { + const rawValue = [ + '', + '', + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + '', + '', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('removes blank leading and trailing lines', () => { + const rawValue = [ + ' ', + ' ', + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + ' ', + ' ', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('retains indentation from first line', () => { + const rawValue = [ + ' Hello,', + ' World!', + '', + ' Yours,', + ' GraphQL.', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + ' Hello,', + ' World!', + '', + 'Yours,', + ' GraphQL.', + ].join('\n')); + }); + + it('does not alter trailing spaces', () => { + const rawValue = [ + ' ', + ' Hello, ', + ' World! ', + ' ', + ' Yours, ', + ' GraphQL. ', + ' ', + ].join('\n'); + expect(blockStringValue(rawValue)).to.equal([ + 'Hello, ', + ' World! ', + ' ', + 'Yours, ', + ' GraphQL. ', + ].join('\n')); + }); + +}); diff --git a/src/language/__tests__/kitchen-sink.graphql b/src/language/__tests__/kitchen-sink.graphql index ff4a05c444..6fcf394bf3 100644 --- a/src/language/__tests__/kitchen-sink.graphql +++ b/src/language/__tests__/kitchen-sink.graphql @@ -46,7 +46,11 @@ subscription StoryLikeSubscription($input: StoryLikeSubscribeInput) { } fragment frag on Friend { - foo(size: $size, bar: $b, obj: {key: "value"}) + foo(size: $size, bar: $b, obj: {key: "value", block: """ + + block string uses \""" + + """}) } { diff --git a/src/language/__tests__/lexer-test.js b/src/language/__tests__/lexer-test.js index fea56f199a..a0ad472525 100644 --- a/src/language/__tests__/lexer-test.js +++ b/src/language/__tests__/lexer-test.js @@ -289,6 +289,121 @@ describe('Lexer', () => { ); }); + it('lexes block strings', () => { + + expect( + lexOne('"""simple"""') + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 12, + value: 'simple' + }); + + expect( + lexOne('" white space "') + ).to.containSubset({ + kind: TokenKind.STRING, + start: 0, + end: 15, + value: ' white space ' + }); + + expect( + lexOne('"""contains " quote"""') + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 22, + value: 'contains " quote' + }); + + expect( + lexOne('"""contains \\""" triplequote"""') + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 31, + value: 'contains """ triplequote' + }); + + expect( + lexOne('"""multi\nline"""') + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 16, + value: 'multi\nline' + }); + + expect( + lexOne('"""multi\rline\r\nnormalized"""') + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 28, + value: 'multi\nline\nnormalized' + }); + + expect( + lexOne('"""unescaped \\n\\r\\b\\t\\f\\u1234"""') + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 32, + value: 'unescaped \\n\\r\\b\\t\\f\\u1234' + }); + + expect( + lexOne('"""slashes \\\\ \\/"""') + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 19, + value: 'slashes \\\\ \\/' + }); + + expect( + lexOne(`""" + + spans + multiple + lines + + """`) + ).to.containSubset({ + kind: TokenKind.BLOCK_STRING, + start: 0, + end: 68, + value: 'spans\n multiple\n lines' + }); + + }); + + it('lex reports useful block string errors', () => { + + expect( + () => lexOne('"""') + ).to.throw('Syntax Error GraphQL request (1:4) Unterminated string.'); + + expect( + () => lexOne('"""no end quote') + ).to.throw('Syntax Error GraphQL request (1:16) Unterminated string.'); + + expect( + () => lexOne('"""contains unescaped \u0007 control char"""') + ).to.throw( + 'Syntax Error GraphQL request (1:23) Invalid character within String: "\\u0007".' + ); + + expect( + () => lexOne('"""null-byte is not \u0000 end of file"""') + ).to.throw( + 'Syntax Error GraphQL request (1:21) Invalid character within String: "\\u0000".' + ); + + }); + it('lexes numbers', () => { expect( diff --git a/src/language/__tests__/parser-test.js b/src/language/__tests__/parser-test.js index c06bfeb46e..190f070fc7 100644 --- a/src/language/__tests__/parser-test.js +++ b/src/language/__tests__/parser-test.js @@ -326,6 +326,22 @@ describe('Parser', () => { }); }); + it('parses block strings', () => { + expect(parseValue('["""long""" "short"]')).to.containSubset({ + kind: Kind.LIST, + loc: { start: 0, end: 20 }, + values: [ + { kind: Kind.STRING, + loc: { start: 1, end: 11}, + value: 'long', + block: true }, + { kind: Kind.STRING, + loc: { start: 12, end: 19}, + value: 'short', + block: false } ] + }); + }); + }); describe('parseType', () => { diff --git a/src/language/__tests__/printer-test.js b/src/language/__tests__/printer-test.js index 4648e36b29..bf30328879 100644 --- a/src/language/__tests__/printer-test.js +++ b/src/language/__tests__/printer-test.js @@ -127,7 +127,9 @@ describe('Printer', () => { } fragment frag on Friend { - foo(size: $size, bar: $b, obj: {key: "value"}) + foo(size: $size, bar: $b, obj: {key: "value", block: """ + block string uses \""" + """}) } { diff --git a/src/language/__tests__/visitor-test.js b/src/language/__tests__/visitor-test.js index 76515880b5..e7a1d0511f 100644 --- a/src/language/__tests__/visitor-test.js +++ b/src/language/__tests__/visitor-test.js @@ -590,6 +590,12 @@ describe('Visitor', () => { [ 'enter', 'StringValue', 'value', 'ObjectField' ], [ 'leave', 'StringValue', 'value', 'ObjectField' ], [ 'leave', 'ObjectField', 0, undefined ], + [ 'enter', 'ObjectField', 1, undefined ], + [ 'enter', 'Name', 'name', 'ObjectField' ], + [ 'leave', 'Name', 'name', 'ObjectField' ], + [ 'enter', 'StringValue', 'value', 'ObjectField' ], + [ 'leave', 'StringValue', 'value', 'ObjectField' ], + [ 'leave', 'ObjectField', 1, undefined ], [ 'leave', 'ObjectValue', 'value', 'Argument' ], [ 'leave', 'Argument', 2, undefined ], [ 'leave', 'Field', 0, undefined ], diff --git a/src/language/ast.js b/src/language/ast.js index 5ad2e95341..8da495fbad 100644 --- a/src/language/ast.js +++ b/src/language/ast.js @@ -66,6 +66,7 @@ type TokenKind = '' | 'Int' | 'Float' | 'String' + | 'BlockString' | 'Comment'; /** @@ -288,6 +289,7 @@ export type StringValueNode = { kind: 'StringValue'; loc?: Location; value: string; + block?: boolean; }; export type BooleanValueNode = { diff --git a/src/language/blockStringValue.js b/src/language/blockStringValue.js new file mode 100644 index 0000000000..0167cf7876 --- /dev/null +++ b/src/language/blockStringValue.js @@ -0,0 +1,64 @@ +/** + * Copyright (c) 2015-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @flow + */ + +/** + * Produces the value of a block string from its parsed raw value, similar to + * Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc. + * + * This implements the GraphQL spec's BlockStringValue() static algorithm. + */ +export default function blockStringValue(rawString: string): string { + // Expand a block string's raw value into independent lines. + const lines = rawString.split(/\r\n|[\n\r]/g); + + // Remove common indentation from all lines but first. + let commonIndent = null; + for (let i = 1; i < lines.length; i++) { + const line = lines[i]; + const indent = leadingWhitespace(line); + if ( + indent < line.length && + (commonIndent === null || indent < commonIndent) + ) { + commonIndent = indent; + if (commonIndent === 0) { + break; + } + } + } + + if (commonIndent) { + for (let i = 1; i < lines.length; i++) { + lines[i] = lines[i].slice(commonIndent); + } + } + + // Remove leading and trailing blank lines. + while (lines.length > 0 && isBlank(lines[0])) { + lines.shift(); + } + while (lines.length > 0 && isBlank(lines[lines.length - 1])) { + lines.pop(); + } + + // Return a string of the lines joined with U+000A. + return lines.join('\n'); +} + +function leadingWhitespace(str) { + let i = 0; + while (i < str.length && (str[i] === ' ' || str[i] === '\t')) { + i++; + } + return i; +} + +function isBlank(str) { + return leadingWhitespace(str) === str.length; +} diff --git a/src/language/lexer.js b/src/language/lexer.js index 04a7bbc276..2199c09e94 100644 --- a/src/language/lexer.js +++ b/src/language/lexer.js @@ -10,6 +10,7 @@ import type { Token } from './ast'; import type { Source } from './source'; import { syntaxError } from '../error'; +import blockStringValue from './blockStringValue'; /** * Given a Source object, this returns a Lexer for that source. @@ -100,6 +101,7 @@ const NAME = 'Name'; const INT = 'Int'; const FLOAT = 'Float'; const STRING = 'String'; +const BLOCK_STRING = 'BlockString'; const COMMENT = 'Comment'; /** @@ -126,6 +128,7 @@ export const TokenKind = { INT, FLOAT, STRING, + BLOCK_STRING, COMMENT }; @@ -269,7 +272,12 @@ function readToken(lexer: Lexer<*>, prev: Token): Token { case 53: case 54: case 55: case 56: case 57: return readNumber(source, position, code, line, col, prev); // " - case 34: return readString(source, position, line, col, prev); + case 34: + if (charCodeAt.call(body, position + 1) === 34 && + charCodeAt.call(body, position + 2) === 34) { + return readBlockString(source, position, line, col, prev); + } + return readString(source, position, line, col, prev); } throw syntaxError( @@ -452,10 +460,14 @@ function readString(source, start, line, col, prev): Token { position < body.length && (code = charCodeAt.call(body, position)) !== null && // not LineTerminator - code !== 0x000A && code !== 0x000D && - // not Quote (") - code !== 34 + code !== 0x000A && code !== 0x000D ) { + // Closing Quote (") + if (code === 34) { + value += slice.call(body, chunkStart, position); + return new Tok(STRING, start, position + 1, line, col, prev, value); + } + // SourceCharacter if (code < 0x0020 && code !== 0x0009) { throw syntaxError( @@ -508,12 +520,73 @@ function readString(source, start, line, col, prev): Token { } } - if (code !== 34) { // quote (") - throw syntaxError(source, position, 'Unterminated string.'); + throw syntaxError(source, position, 'Unterminated string.'); +} + +/** + * Reads a block string token from the source file. + * + * """("?"?(\\"""|\\(?!=""")|[^"\\]))*""" + */ +function readBlockString(source, start, line, col, prev): Token { + const body = source.body; + let position = start + 3; + let chunkStart = position; + let code = 0; + let rawValue = ''; + + while ( + position < body.length && + (code = charCodeAt.call(body, position)) !== null + ) { + // Closing Triple-Quote (""") + if ( + code === 34 && + charCodeAt.call(body, position + 1) === 34 && + charCodeAt.call(body, position + 2) === 34 + ) { + rawValue += slice.call(body, chunkStart, position); + return new Tok( + BLOCK_STRING, + start, + position + 3, + line, + col, + prev, + blockStringValue(rawValue) + ); + } + + // SourceCharacter + if ( + code < 0x0020 && + code !== 0x0009 && + code !== 0x000A && + code !== 0x000D + ) { + throw syntaxError( + source, + position, + `Invalid character within String: ${printCharCode(code)}.` + ); + } + + // Escape Triple-Quote (\""") + if ( + code === 92 && + charCodeAt.call(body, position + 1) === 34 && + charCodeAt.call(body, position + 2) === 34 && + charCodeAt.call(body, position + 3) === 34 + ) { + rawValue += slice.call(body, chunkStart, position) + '"""'; + position += 4; + chunkStart = position; + } else { + ++position; + } } - value += slice.call(body, chunkStart, position); - return new Tok(STRING, start, position + 1, line, col, prev, value); + throw syntaxError(source, position, 'Unterminated string.'); } /** diff --git a/src/language/parser.js b/src/language/parser.js index 5defac882f..a0cbee1437 100644 --- a/src/language/parser.js +++ b/src/language/parser.js @@ -543,10 +543,12 @@ function parseValueLiteral(lexer: Lexer<*>, isConst: boolean): ValueNode { loc: loc(lexer, token) }; case TokenKind.STRING: + case TokenKind.BLOCK_STRING: lexer.advance(); return { kind: (STRING: 'StringValue'), value: ((token.value: any): string), + block: token.kind === TokenKind.BLOCK_STRING, loc: loc(lexer, token) }; case TokenKind.NAME: diff --git a/src/language/printer.js b/src/language/printer.js index 37ec2d0b70..a611b73c28 100644 --- a/src/language/printer.js +++ b/src/language/printer.js @@ -72,7 +72,10 @@ const printDocASTReducer = { IntValue: ({ value }) => value, FloatValue: ({ value }) => value, - StringValue: ({ value }) => JSON.stringify(value), + StringValue: ({ value, block: isBlockString }) => + isBlockString ? + `"""\n${value.replace(/"""/g, '\\"""')}\n"""` : + JSON.stringify(value), BooleanValue: ({ value }) => JSON.stringify(value), NullValue: () => 'null', EnumValue: ({ value }) => value,