Skip to content

Commit

Permalink
Properly escape strings during export. (#3429)
Browse files Browse the repository at this point in the history
Same change as #3424 but for the release/v1.0 branch. The changes could
not be cherry-picked since the code has since been heavily refactored.
  • Loading branch information
martinmr authored May 16, 2019
1 parent 7917750 commit 7849edd
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
20 changes: 17 additions & 3 deletions worker/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ import (
"bufio"
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -54,6 +54,20 @@ var rdfTypeMap = map[types.TypeID]string{
types.PasswordID: "xs:password",
}

// escapedString converts a string into an escaped string for exporting.
func escapedString(str string) string {
// We use the Marshal function in the JSON package for all export formats
// because it properly escapes strings.
byt, err := json.Marshal(str)
if err != nil {
// All valid stings should be able to be escaped to a JSON string so
// it's safe to panic here. Marshal has to return an error because it
// accepts an interface.
panic("Could not marshal string to JSON string")
}
return string(byt)
}

func toRDF(pl *posting.List, prefix string, readTs uint64) (*bpb.KVList, error) {
var buf bytes.Buffer

Expand All @@ -76,7 +90,7 @@ func toRDF(pl *posting.List, prefix string, readTs uint64) (*bpb.KVList, error)

// trim null character at end
trimmed := strings.TrimRight(str.Value.(string), "\x00")
buf.WriteString(strconv.Quote(trimmed))
buf.WriteString(escapedString(trimmed))
if p.PostingType == pb.Posting_VALUE_LANG {
buf.WriteByte('@')
buf.WriteString(string(p.LangTag))
Expand Down Expand Up @@ -121,7 +135,7 @@ func toRDF(pl *posting.List, prefix string, readTs uint64) (*bpb.KVList, error)
}

if facetTid == types.StringID {
buf.WriteString(strconv.Quote(fStringVal.Value.(string)))
buf.WriteString(escapedString(fStringVal.Value.(string)))
} else {
buf.WriteString(fStringVal.Value.(string))
}
Expand Down
10 changes: 8 additions & 2 deletions worker/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,15 @@ func populateGraphExport(t *testing.T) {
`<3> <name> "First Line\nSecondLine" .`,
"<1> <friend_not_served> <5> <author0> .",
`<5> <name> "" .`,
`<6> <name> "Ding!\u0007Ding!\u0007Ding!\u0007" .`,
}
idMap := map[string]uint64{
"1": 1,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"6": 6,
}

for _, edge := range rdfEdges {
Expand Down Expand Up @@ -149,7 +151,8 @@ func TestExport(t *testing.T) {
for scanner.Scan() {
nq, err := rdf.Parse(scanner.Text())
require.NoError(t, err)
require.Contains(t, []string{"_:uid1", "_:uid2", "_:uid3", "_:uid4", "_:uid5"}, nq.Subject)
require.Contains(t, []string{"_:uid1", "_:uid2", "_:uid3", "_:uid4", "_:uid5", "_:uid6"},
nq.Subject)
if nq.ObjectValue != nil {
switch nq.Subject {
case "_:uid1", "_:uid2":
Expand All @@ -161,6 +164,9 @@ func TestExport(t *testing.T) {
case "_:uid4":
case "_:uid5":
require.Equal(t, `<_:uid5> <name> "" .`, scanner.Text())
case "_:uid6":
require.Equal(t, `<_:uid6> <name> "Ding!\u0007Ding!\u0007Ding!\u0007" .`,
scanner.Text())
default:
t.Errorf("Unexpected subject: %v", nq.Subject)
}
Expand Down Expand Up @@ -203,7 +209,7 @@ func TestExport(t *testing.T) {
}
require.NoError(t, scanner.Err())
// This order will be preserved due to file naming.
require.Equal(t, 8, count)
require.Equal(t, 9, count)

require.Equal(t, 1, len(schemaFileList))
file = schemaFileList[0]
Expand Down

0 comments on commit 7849edd

Please sign in to comment.