diff --git a/pkg/ast/ast_val_string_value.go b/pkg/ast/ast_val_string_value.go index ac7345d9b..2c3eef172 100644 --- a/pkg/ast/ast_val_string_value.go +++ b/pkg/ast/ast_val_string_value.go @@ -37,6 +37,100 @@ func (d *Document) StringValueIsBlockString(ref int) bool { return d.StringValues[ref].BlockString } +func (d *Document) BlockStringValueContentRawBytes(ref int) []byte { + + // Gets the full block string content, just inside the """ quotes. + // This is needed because the lexer ignores whitespace and we need to preserve it + // to account for the indentation of the block string. + + blockStart := 0 + for i := int(d.StringValues[ref].Content.Start) - 1; i >= 0; i-- { + if d.Input.RawBytes[i] == '"' { + blockStart = i + 1 + break + } + } + + blockEnd := d.Input.Length + for i := int(d.StringValues[ref].Content.End); i < d.Input.Length; i++ { + if d.Input.RawBytes[i] == '"' { + blockEnd = i + break + } + } + + return d.Input.RawBytes[blockStart:blockEnd] +} + +func (d *Document) BlockStringValueContentRawString(ref int) string { + return unsafebytes.BytesToString(d.BlockStringValueContentRawBytes(ref)) +} + +func (d *Document) BlockStringValueContentBytes(ref int) []byte { + + // Implements https://spec.graphql.org/October2021/#BlockStringValue() + + // NOTE: This implementation exactly follows the spec. + // It likely could be optimized for performance. + + // split the raw value into lines + rawValue := d.BlockStringValueContentRawBytes(ref) + lines := splitBytesIntoLines(rawValue) + + // find the common indent size (-1 means no common indent) + commonIndent := -1 + for i, line := range lines { + if i == 0 { + continue + } + indent := leadingWhitespaceCount(line) + if indent < len(line) { + if commonIndent == -1 || indent < commonIndent { + commonIndent = indent + } + } + } + + // remove the common indent from each line + if commonIndent != -1 { + for i := 1; i < len(lines); i++ { + var indent int + if len(lines[i]) > commonIndent { + indent = commonIndent + } else { + indent = len(lines[i]) + } + + lines[i] = lines[i][indent:] + } + } + + // find first non-whitespace-only line + firstLine := 0 + for i, line := range lines { + if leadingWhitespaceCount(line) != len(line) { + firstLine = i + break + } + } + + // find last non-whitespace-only line + lastLine := len(lines) - 1 + for i := len(lines) - 1; i >= 0; i-- { + if leadingWhitespaceCount(lines[i]) != len(lines[i]) { + lastLine = i + break + } + } + + // join the lines to keep and return the result + return bytes.Join(lines[firstLine:lastLine+1], []byte{'\n'}) +} + +func (d *Document) BlockStringValueContentString(ref int) string { + return unsafebytes.BytesToString(d.BlockStringValueContentBytes(ref)) +} + func (d *Document) StringValuesAreEquals(left, right int) bool { return d.StringValueIsBlockString(left) == d.StringValueIsBlockString(right) && bytes.Equal(d.StringValueContentBytes(left), d.StringValueContentBytes(right)) diff --git a/pkg/ast/ast_value.go b/pkg/ast/ast_value.go index 9ca94b6bc..17f385142 100644 --- a/pkg/ast/ast_value.go +++ b/pkg/ast/ast_value.go @@ -2,6 +2,7 @@ package ast import ( "bytes" + "encoding/json" "fmt" "io" @@ -144,7 +145,20 @@ func (d *Document) writeJSONValue(buf *bytes.Buffer, value Value) error { buf.Write(literal.TRUE) } case ValueKindString: - buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref))) + if d.StringValueIsBlockString(value.Ref) { + content := d.BlockStringValueContentString(value.Ref) + + enc := json.NewEncoder(buf) + enc.SetEscapeHTML(false) + if err := enc.Encode(content); err != nil { + return err + } + + // Remove the extra newline that Encode adds + buf.Truncate(buf.Len() - 1) + } else { + buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref))) + } case ValueKindList: buf.WriteByte(literal.LBRACK_BYTE) for ii, ref := range d.ListValues[value.Ref].Refs { diff --git a/pkg/ast/ast_value_test.go b/pkg/ast/ast_value_test.go index 11e84cd59..e26856109 100644 --- a/pkg/ast/ast_value_test.go +++ b/pkg/ast/ast_value_test.go @@ -83,15 +83,53 @@ func TestDocument_ValueToJSON(t *testing.T) { Ref: 1, } }, `true`)) - t.Run("ValueKindString", run(func(doc *Document) Value { + t.Run("ValueKindString - non-block", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ - Content: doc.Input.AppendInputString("foo"), + Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`), }) return Value{ Kind: ValueKindString, Ref: 0, } - }, `"foo"`)) + }, `"foo\nbar\tbaz\"qux"`)) + t.Run("ValueKindString - block", run(func(doc *Document) Value { + doc.Input.AppendInputString(`"""`) + doc.StringValues = append(doc.StringValues, StringValue{ + BlockString: true, + Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"), + }) + doc.Input.AppendInputString(`"""`) + return Value{ + Kind: ValueKindString, + Ref: 0, + } + }, `"foo\nbar\tbaz\"qux"`)) + t.Run("ValueKindString - block with indent", run(func(doc *Document) Value { + doc.Input.AppendInputString(`"""`) + doc.Input.AppendInputString("\n") + doc.StringValues = append(doc.StringValues, StringValue{ + BlockString: true, + Content: doc.Input.AppendInputString(" foo\n bar"), + }) + doc.Input.AppendInputString("\n") + doc.Input.AppendInputString(`"""`) + return Value{ + Kind: ValueKindString, + Ref: 0, + } + }, `"foo\nbar"`)) + t.Run("ValueKindString - block with mixed indent", run(func(doc *Document) Value { + doc.Input.AppendInputString(`"""`) + doc.StringValues = append(doc.StringValues, StringValue{ + BlockString: true, + Content: doc.Input.AppendInputString("foo\n\t bar\n\t baz"), + }) + doc.Input.AppendInputString(`"""`) + return Value{ + Kind: ValueKindString, + Ref: 0, + } + }, `"foo\nbar\n baz"`)) t.Run("ValueKindList", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ Content: doc.Input.AppendInputString("foo"), @@ -187,21 +225,21 @@ func TestDocument_PrintValue(t *testing.T) { } t.Run("ValueKindString - non-block", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ - Content: doc.Input.AppendInputString("foo"), + Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`), }) return Value{ Kind: ValueKindString, Ref: 0, } - }, `"foo"`)) + }, `"foo\nbar\tbaz\"qux"`)) t.Run("ValueKindString - block", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ BlockString: true, - Content: doc.Input.AppendInputString("foo"), + Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"), }) return Value{ Kind: ValueKindString, Ref: 0, } - }, `"""foo"""`)) + }, "\"\"\"foo\nbar\tbaz\"qux\"\"\"")) } diff --git a/pkg/ast/helpers.go b/pkg/ast/helpers.go index 1121d1aee..3cebfd4cd 100644 --- a/pkg/ast/helpers.go +++ b/pkg/ast/helpers.go @@ -15,3 +15,44 @@ func indexOf(refs []int, ref int) (int, bool) { func deleteRef(refs *[]int, index int) { *refs = append((*refs)[:index], (*refs)[index+1:]...) } + +// Splits byte slices into lines based on line terminators (\n, \r, \r\n) +// defined by https://spec.graphql.org/October2021/#sec-Line-Terminators +func splitBytesIntoLines(data []byte) [][]byte { + var lines [][]byte + start := 0 + length := len(data) + + for i := 0; i < length; i++ { + switch c := data[i]; c { + case '\n', '\r': + if start <= i { + lines = append(lines, data[start:i]) + } + + if c == '\r' && i+1 < length && data[i+1] == '\n' { + i++ + } + + start = i + 1 + } + } + + if start <= length { + lines = append(lines, data[start:]) + } + + return lines +} + +// counts leading whitespace characters (spaces or tabs) in a byte slice +func leadingWhitespaceCount(line []byte) int { + count := 0 + for _, c := range line { + if c != ' ' && c != '\t' { + break + } + count++ + } + return count +} diff --git a/v2/pkg/ast/ast_val_string_value.go b/v2/pkg/ast/ast_val_string_value.go index d09f4242a..f939223f9 100644 --- a/v2/pkg/ast/ast_val_string_value.go +++ b/v2/pkg/ast/ast_val_string_value.go @@ -37,6 +37,100 @@ func (d *Document) StringValueIsBlockString(ref int) bool { return d.StringValues[ref].BlockString } +func (d *Document) BlockStringValueContentRawBytes(ref int) []byte { + + // Gets the full block string content, just inside the """ quotes. + // This is needed because the lexer ignores whitespace and we need to preserve it + // to account for the indentation of the block string. + + blockStart := 0 + for i := int(d.StringValues[ref].Content.Start) - 1; i >= 0; i-- { + if d.Input.RawBytes[i] == '"' { + blockStart = i + 1 + break + } + } + + blockEnd := d.Input.Length + for i := int(d.StringValues[ref].Content.End); i < d.Input.Length; i++ { + if d.Input.RawBytes[i] == '"' { + blockEnd = i + break + } + } + + return d.Input.RawBytes[blockStart:blockEnd] +} + +func (d *Document) BlockStringValueContentRawString(ref int) string { + return unsafebytes.BytesToString(d.BlockStringValueContentRawBytes(ref)) +} + +func (d *Document) BlockStringValueContentBytes(ref int) []byte { + + // Implements https://spec.graphql.org/October2021/#BlockStringValue() + + // NOTE: This implementation exactly follows the spec. + // It likely could be optimized for performance. + + // split the raw value into lines + rawValue := d.BlockStringValueContentRawBytes(ref) + lines := splitBytesIntoLines(rawValue) + + // find the common indent size (-1 means no common indent) + commonIndent := -1 + for i, line := range lines { + if i == 0 { + continue + } + indent := leadingWhitespaceCount(line) + if indent < len(line) { + if commonIndent == -1 || indent < commonIndent { + commonIndent = indent + } + } + } + + // remove the common indent from each line + if commonIndent != -1 { + for i := 1; i < len(lines); i++ { + var indent int + if len(lines[i]) > commonIndent { + indent = commonIndent + } else { + indent = len(lines[i]) + } + + lines[i] = lines[i][indent:] + } + } + + // find first non-whitespace-only line + firstLine := 0 + for i, line := range lines { + if leadingWhitespaceCount(line) != len(line) { + firstLine = i + break + } + } + + // find last non-whitespace-only line + lastLine := len(lines) - 1 + for i := len(lines) - 1; i >= 0; i-- { + if leadingWhitespaceCount(lines[i]) != len(lines[i]) { + lastLine = i + break + } + } + + // join the lines to keep and return the result + return bytes.Join(lines[firstLine:lastLine+1], []byte{'\n'}) +} + +func (d *Document) BlockStringValueContentString(ref int) string { + return unsafebytes.BytesToString(d.BlockStringValueContentBytes(ref)) +} + func (d *Document) StringValuesAreEquals(left, right int) bool { return d.StringValueIsBlockString(left) == d.StringValueIsBlockString(right) && bytes.Equal(d.StringValueContentBytes(left), d.StringValueContentBytes(right)) diff --git a/v2/pkg/ast/ast_value.go b/v2/pkg/ast/ast_value.go index d8b6a0252..b6ca70d76 100644 --- a/v2/pkg/ast/ast_value.go +++ b/v2/pkg/ast/ast_value.go @@ -2,6 +2,7 @@ package ast import ( "bytes" + "encoding/json" "fmt" "io" @@ -144,7 +145,20 @@ func (d *Document) writeJSONValue(buf *bytes.Buffer, value Value) error { buf.Write(literal.TRUE) } case ValueKindString: - buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref))) + if d.StringValueIsBlockString(value.Ref) { + content := d.BlockStringValueContentString(value.Ref) + + enc := json.NewEncoder(buf) + enc.SetEscapeHTML(false) + if err := enc.Encode(content); err != nil { + return err + } + + // Remove the extra newline that Encode adds + buf.Truncate(buf.Len() - 1) + } else { + buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref))) + } case ValueKindList: buf.WriteByte(literal.LBRACK_BYTE) for ii, ref := range d.ListValues[value.Ref].Refs { diff --git a/v2/pkg/ast/ast_value_test.go b/v2/pkg/ast/ast_value_test.go index 11e84cd59..e26856109 100644 --- a/v2/pkg/ast/ast_value_test.go +++ b/v2/pkg/ast/ast_value_test.go @@ -83,15 +83,53 @@ func TestDocument_ValueToJSON(t *testing.T) { Ref: 1, } }, `true`)) - t.Run("ValueKindString", run(func(doc *Document) Value { + t.Run("ValueKindString - non-block", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ - Content: doc.Input.AppendInputString("foo"), + Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`), }) return Value{ Kind: ValueKindString, Ref: 0, } - }, `"foo"`)) + }, `"foo\nbar\tbaz\"qux"`)) + t.Run("ValueKindString - block", run(func(doc *Document) Value { + doc.Input.AppendInputString(`"""`) + doc.StringValues = append(doc.StringValues, StringValue{ + BlockString: true, + Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"), + }) + doc.Input.AppendInputString(`"""`) + return Value{ + Kind: ValueKindString, + Ref: 0, + } + }, `"foo\nbar\tbaz\"qux"`)) + t.Run("ValueKindString - block with indent", run(func(doc *Document) Value { + doc.Input.AppendInputString(`"""`) + doc.Input.AppendInputString("\n") + doc.StringValues = append(doc.StringValues, StringValue{ + BlockString: true, + Content: doc.Input.AppendInputString(" foo\n bar"), + }) + doc.Input.AppendInputString("\n") + doc.Input.AppendInputString(`"""`) + return Value{ + Kind: ValueKindString, + Ref: 0, + } + }, `"foo\nbar"`)) + t.Run("ValueKindString - block with mixed indent", run(func(doc *Document) Value { + doc.Input.AppendInputString(`"""`) + doc.StringValues = append(doc.StringValues, StringValue{ + BlockString: true, + Content: doc.Input.AppendInputString("foo\n\t bar\n\t baz"), + }) + doc.Input.AppendInputString(`"""`) + return Value{ + Kind: ValueKindString, + Ref: 0, + } + }, `"foo\nbar\n baz"`)) t.Run("ValueKindList", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ Content: doc.Input.AppendInputString("foo"), @@ -187,21 +225,21 @@ func TestDocument_PrintValue(t *testing.T) { } t.Run("ValueKindString - non-block", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ - Content: doc.Input.AppendInputString("foo"), + Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`), }) return Value{ Kind: ValueKindString, Ref: 0, } - }, `"foo"`)) + }, `"foo\nbar\tbaz\"qux"`)) t.Run("ValueKindString - block", run(func(doc *Document) Value { doc.StringValues = append(doc.StringValues, StringValue{ BlockString: true, - Content: doc.Input.AppendInputString("foo"), + Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"), }) return Value{ Kind: ValueKindString, Ref: 0, } - }, `"""foo"""`)) + }, "\"\"\"foo\nbar\tbaz\"qux\"\"\"")) } diff --git a/v2/pkg/ast/helpers.go b/v2/pkg/ast/helpers.go index 1121d1aee..3cebfd4cd 100644 --- a/v2/pkg/ast/helpers.go +++ b/v2/pkg/ast/helpers.go @@ -15,3 +15,44 @@ func indexOf(refs []int, ref int) (int, bool) { func deleteRef(refs *[]int, index int) { *refs = append((*refs)[:index], (*refs)[index+1:]...) } + +// Splits byte slices into lines based on line terminators (\n, \r, \r\n) +// defined by https://spec.graphql.org/October2021/#sec-Line-Terminators +func splitBytesIntoLines(data []byte) [][]byte { + var lines [][]byte + start := 0 + length := len(data) + + for i := 0; i < length; i++ { + switch c := data[i]; c { + case '\n', '\r': + if start <= i { + lines = append(lines, data[start:i]) + } + + if c == '\r' && i+1 < length && data[i+1] == '\n' { + i++ + } + + start = i + 1 + } + } + + if start <= length { + lines = append(lines, data[start:]) + } + + return lines +} + +// counts leading whitespace characters (spaces or tabs) in a byte slice +func leadingWhitespaceCount(line []byte) int { + count := 0 + for _, c := range line { + if c != ' ' && c != '\t' { + break + } + count++ + } + return count +}