Skip to content

Commit

Permalink
fix: JSON block string encoding (#843)
Browse files Browse the repository at this point in the history
Fixes #839 

Note, I considered using `gjson.AppendJSONString` for better
performance, but it currently doesn't have an option to disable escaping
of HTML characters - which I think would be undesirable here. See
tidwall/gjson#362

---------

Co-authored-by: Sergiy 🇺🇦 <818351+devsergiy@users.noreply.github.com>
  • Loading branch information
mattjohnsonpint and devsergiy authored Jul 15, 2024
1 parent 16602c6 commit 699eb81
Show file tree
Hide file tree
Showing 8 changed files with 390 additions and 16 deletions.
94 changes: 94 additions & 0 deletions pkg/ast/ast_val_string_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,100 @@ func (d *Document) StringValueIsBlockString(ref int) bool {
return d.StringValues[ref].BlockString
}

func (d *Document) BlockStringValueContentRawBytes(ref int) []byte {

// Gets the full block string content, just inside the """ quotes.
// This is needed because the lexer ignores whitespace and we need to preserve it
// to account for the indentation of the block string.

blockStart := 0
for i := int(d.StringValues[ref].Content.Start) - 1; i >= 0; i-- {
if d.Input.RawBytes[i] == '"' {
blockStart = i + 1
break
}
}

blockEnd := d.Input.Length
for i := int(d.StringValues[ref].Content.End); i < d.Input.Length; i++ {
if d.Input.RawBytes[i] == '"' {
blockEnd = i
break
}
}

return d.Input.RawBytes[blockStart:blockEnd]
}

func (d *Document) BlockStringValueContentRawString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentRawBytes(ref))
}

func (d *Document) BlockStringValueContentBytes(ref int) []byte {

// Implements https://spec.graphql.org/October2021/#BlockStringValue()

// NOTE: This implementation exactly follows the spec.
// It likely could be optimized for performance.

// split the raw value into lines
rawValue := d.BlockStringValueContentRawBytes(ref)
lines := splitBytesIntoLines(rawValue)

// find the common indent size (-1 means no common indent)
commonIndent := -1
for i, line := range lines {
if i == 0 {
continue
}
indent := leadingWhitespaceCount(line)
if indent < len(line) {
if commonIndent == -1 || indent < commonIndent {
commonIndent = indent
}
}
}

// remove the common indent from each line
if commonIndent != -1 {
for i := 1; i < len(lines); i++ {
var indent int
if len(lines[i]) > commonIndent {
indent = commonIndent
} else {
indent = len(lines[i])
}

lines[i] = lines[i][indent:]
}
}

// find first non-whitespace-only line
firstLine := 0
for i, line := range lines {
if leadingWhitespaceCount(line) != len(line) {
firstLine = i
break
}
}

// find last non-whitespace-only line
lastLine := len(lines) - 1
for i := len(lines) - 1; i >= 0; i-- {
if leadingWhitespaceCount(lines[i]) != len(lines[i]) {
lastLine = i
break
}
}

// join the lines to keep and return the result
return bytes.Join(lines[firstLine:lastLine+1], []byte{'\n'})
}

func (d *Document) BlockStringValueContentString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentBytes(ref))
}

func (d *Document) StringValuesAreEquals(left, right int) bool {
return d.StringValueIsBlockString(left) == d.StringValueIsBlockString(right) &&
bytes.Equal(d.StringValueContentBytes(left), d.StringValueContentBytes(right))
Expand Down
16 changes: 15 additions & 1 deletion pkg/ast/ast_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ast

import (
"bytes"
"encoding/json"
"fmt"
"io"

Expand Down Expand Up @@ -144,7 +145,20 @@ func (d *Document) writeJSONValue(buf *bytes.Buffer, value Value) error {
buf.Write(literal.TRUE)
}
case ValueKindString:
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
if d.StringValueIsBlockString(value.Ref) {
content := d.BlockStringValueContentString(value.Ref)

enc := json.NewEncoder(buf)
enc.SetEscapeHTML(false)
if err := enc.Encode(content); err != nil {
return err
}

// Remove the extra newline that Encode adds
buf.Truncate(buf.Len() - 1)
} else {
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
}
case ValueKindList:
buf.WriteByte(literal.LBRACK_BYTE)
for ii, ref := range d.ListValues[value.Ref].Refs {
Expand Down
52 changes: 45 additions & 7 deletions pkg/ast/ast_value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,53 @@ func TestDocument_ValueToJSON(t *testing.T) {
Ref: 1,
}
}, `true`))
t.Run("ValueKindString", run(func(doc *Document) Value {
t.Run("ValueKindString - non-block", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
Content: doc.Input.AppendInputString("foo"),
Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`),
})
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo"`))
}, `"foo\nbar\tbaz\"qux"`))
t.Run("ValueKindString - block", run(func(doc *Document) Value {
doc.Input.AppendInputString(`"""`)
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"),
})
doc.Input.AppendInputString(`"""`)
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo\nbar\tbaz\"qux"`))
t.Run("ValueKindString - block with indent", run(func(doc *Document) Value {
doc.Input.AppendInputString(`"""`)
doc.Input.AppendInputString("\n")
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString(" foo\n bar"),
})
doc.Input.AppendInputString("\n")
doc.Input.AppendInputString(`"""`)
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo\nbar"`))
t.Run("ValueKindString - block with mixed indent", run(func(doc *Document) Value {
doc.Input.AppendInputString(`"""`)
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString("foo\n\t bar\n\t baz"),
})
doc.Input.AppendInputString(`"""`)
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo\nbar\n baz"`))
t.Run("ValueKindList", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
Content: doc.Input.AppendInputString("foo"),
Expand Down Expand Up @@ -187,21 +225,21 @@ func TestDocument_PrintValue(t *testing.T) {
}
t.Run("ValueKindString - non-block", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
Content: doc.Input.AppendInputString("foo"),
Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`),
})
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo"`))
}, `"foo\nbar\tbaz\"qux"`))
t.Run("ValueKindString - block", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString("foo"),
Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"),
})
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"""foo"""`))
}, "\"\"\"foo\nbar\tbaz\"qux\"\"\""))
}
41 changes: 41 additions & 0 deletions pkg/ast/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,44 @@ func indexOf(refs []int, ref int) (int, bool) {
func deleteRef(refs *[]int, index int) {
*refs = append((*refs)[:index], (*refs)[index+1:]...)
}

// Splits byte slices into lines based on line terminators (\n, \r, \r\n)
// defined by https://spec.graphql.org/October2021/#sec-Line-Terminators
func splitBytesIntoLines(data []byte) [][]byte {
var lines [][]byte
start := 0
length := len(data)

for i := 0; i < length; i++ {
switch c := data[i]; c {
case '\n', '\r':
if start <= i {
lines = append(lines, data[start:i])
}

if c == '\r' && i+1 < length && data[i+1] == '\n' {
i++
}

start = i + 1
}
}

if start <= length {
lines = append(lines, data[start:])
}

return lines
}

// counts leading whitespace characters (spaces or tabs) in a byte slice
func leadingWhitespaceCount(line []byte) int {
count := 0
for _, c := range line {
if c != ' ' && c != '\t' {
break
}
count++
}
return count
}
94 changes: 94 additions & 0 deletions v2/pkg/ast/ast_val_string_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,100 @@ func (d *Document) StringValueIsBlockString(ref int) bool {
return d.StringValues[ref].BlockString
}

func (d *Document) BlockStringValueContentRawBytes(ref int) []byte {

// Gets the full block string content, just inside the """ quotes.
// This is needed because the lexer ignores whitespace and we need to preserve it
// to account for the indentation of the block string.

blockStart := 0
for i := int(d.StringValues[ref].Content.Start) - 1; i >= 0; i-- {
if d.Input.RawBytes[i] == '"' {
blockStart = i + 1
break
}
}

blockEnd := d.Input.Length
for i := int(d.StringValues[ref].Content.End); i < d.Input.Length; i++ {
if d.Input.RawBytes[i] == '"' {
blockEnd = i
break
}
}

return d.Input.RawBytes[blockStart:blockEnd]
}

func (d *Document) BlockStringValueContentRawString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentRawBytes(ref))
}

func (d *Document) BlockStringValueContentBytes(ref int) []byte {

// Implements https://spec.graphql.org/October2021/#BlockStringValue()

// NOTE: This implementation exactly follows the spec.
// It likely could be optimized for performance.

// split the raw value into lines
rawValue := d.BlockStringValueContentRawBytes(ref)
lines := splitBytesIntoLines(rawValue)

// find the common indent size (-1 means no common indent)
commonIndent := -1
for i, line := range lines {
if i == 0 {
continue
}
indent := leadingWhitespaceCount(line)
if indent < len(line) {
if commonIndent == -1 || indent < commonIndent {
commonIndent = indent
}
}
}

// remove the common indent from each line
if commonIndent != -1 {
for i := 1; i < len(lines); i++ {
var indent int
if len(lines[i]) > commonIndent {
indent = commonIndent
} else {
indent = len(lines[i])
}

lines[i] = lines[i][indent:]
}
}

// find first non-whitespace-only line
firstLine := 0
for i, line := range lines {
if leadingWhitespaceCount(line) != len(line) {
firstLine = i
break
}
}

// find last non-whitespace-only line
lastLine := len(lines) - 1
for i := len(lines) - 1; i >= 0; i-- {
if leadingWhitespaceCount(lines[i]) != len(lines[i]) {
lastLine = i
break
}
}

// join the lines to keep and return the result
return bytes.Join(lines[firstLine:lastLine+1], []byte{'\n'})
}

func (d *Document) BlockStringValueContentString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentBytes(ref))
}

func (d *Document) StringValuesAreEquals(left, right int) bool {
return d.StringValueIsBlockString(left) == d.StringValueIsBlockString(right) &&
bytes.Equal(d.StringValueContentBytes(left), d.StringValueContentBytes(right))
Expand Down
16 changes: 15 additions & 1 deletion v2/pkg/ast/ast_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ast

import (
"bytes"
"encoding/json"
"fmt"
"io"

Expand Down Expand Up @@ -144,7 +145,20 @@ func (d *Document) writeJSONValue(buf *bytes.Buffer, value Value) error {
buf.Write(literal.TRUE)
}
case ValueKindString:
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
if d.StringValueIsBlockString(value.Ref) {
content := d.BlockStringValueContentString(value.Ref)

enc := json.NewEncoder(buf)
enc.SetEscapeHTML(false)
if err := enc.Encode(content); err != nil {
return err
}

// Remove the extra newline that Encode adds
buf.Truncate(buf.Len() - 1)
} else {
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
}
case ValueKindList:
buf.WriteByte(literal.LBRACK_BYTE)
for ii, ref := range d.ListValues[value.Ref].Refs {
Expand Down
Loading

0 comments on commit 699eb81

Please sign in to comment.