From c86d19bed151127c59f3dafc7cfb791524c6d622 Mon Sep 17 00:00:00 2001 From: Ashish Goswami Date: Thu, 21 May 2020 20:00:41 +0530 Subject: [PATCH 1/2] Repalce strings.Trim with strings.TrimSpace in ParseRDF --- chunker/rdf_parser.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/chunker/rdf_parser.go b/chunker/rdf_parser.go index 85044a6b34b..c7020954c1d 100644 --- a/chunker/rdf_parser.go +++ b/chunker/rdf_parser.go @@ -97,7 +97,7 @@ L: item := it.Item() switch item.Typ { case itemSubject: - rnq.Subject = strings.Trim(item.Val, " ") + rnq.Subject = strings.TrimSpace(item.Val) case itemSubjectFunc: var err error @@ -113,10 +113,10 @@ L: case itemPredicate: // Here we split predicate and lang directive (ex: "name@en"), if needed. - rnq.Predicate, rnq.Lang = x.PredicateLang(strings.Trim(item.Val, " ")) + rnq.Predicate, rnq.Lang = x.PredicateLang(strings.TrimSpace(item.Val)) case itemObject: - rnq.ObjectId = strings.Trim(item.Val, " ") + rnq.ObjectId = strings.TrimSpace(item.Val) case itemStar: switch { @@ -144,9 +144,9 @@ L: return rnq, errors.Errorf("If predicate/subject is *, value should be * as well") } - val := strings.Trim(item.Val, " ") + val := strings.TrimSpace(item.Val) // TODO: Check if this condition is required. - if strings.Trim(val, " ") == "*" { + if val == "*" { return rnq, errors.Errorf("itemObject can't be *") } // Lets find out the storage type from the type map. @@ -190,7 +190,7 @@ L: break L case itemLabel: - rnq.Label = strings.Trim(item.Val, " ") + rnq.Label = strings.TrimSpace(item.Val) case itemLeftRound: it.Prev() // backup '(' From b95bf1751c51abf8941c1b195200475818ac20ad Mon Sep 17 00:00:00 2001 From: Ashish Goswami Date: Tue, 26 May 2020 12:25:32 +0530 Subject: [PATCH 2/2] Use TrimFunc instead of TrimSpace TrimSpace is faster but it removes all whitespaces like tab, newlines etc. Using TrimSpace might change existing behaviour. This commit replaces TrimSpace with TrimFunc. TrimFunc is passed isSpaceRune function to avoid alloctions. --- chunker/rdf_parser.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/chunker/rdf_parser.go b/chunker/rdf_parser.go index c7020954c1d..1f465095e36 100644 --- a/chunker/rdf_parser.go +++ b/chunker/rdf_parser.go @@ -72,6 +72,10 @@ func ParseRDFs(b []byte) ([]*api.NQuad, *pb.Metadata, error) { return nqs, calculateTypeHints(nqs), nil } +func isSpaceRune(r rune) bool { + return r == ' ' +} + // ParseRDF parses a mutation string and returns the N-Quad representation for it. // It parses N-Quad statements based on http://www.w3.org/TR/n-quads/. func ParseRDF(line string, l *lex.Lexer) (api.NQuad, error) { @@ -97,7 +101,7 @@ L: item := it.Item() switch item.Typ { case itemSubject: - rnq.Subject = strings.TrimSpace(item.Val) + rnq.Subject = strings.TrimFunc(item.Val, isSpaceRune) case itemSubjectFunc: var err error @@ -113,10 +117,10 @@ L: case itemPredicate: // Here we split predicate and lang directive (ex: "name@en"), if needed. - rnq.Predicate, rnq.Lang = x.PredicateLang(strings.TrimSpace(item.Val)) + rnq.Predicate, rnq.Lang = x.PredicateLang(strings.TrimFunc(item.Val, isSpaceRune)) case itemObject: - rnq.ObjectId = strings.TrimSpace(item.Val) + rnq.ObjectId = strings.TrimFunc(item.Val, isSpaceRune) case itemStar: switch { @@ -144,7 +148,7 @@ L: return rnq, errors.Errorf("If predicate/subject is *, value should be * as well") } - val := strings.TrimSpace(item.Val) + val := strings.TrimFunc(item.Val, isSpaceRune) // TODO: Check if this condition is required. if val == "*" { return rnq, errors.Errorf("itemObject can't be *") @@ -190,7 +194,7 @@ L: break L case itemLabel: - rnq.Label = strings.TrimSpace(item.Val) + rnq.Label = strings.TrimFunc(item.Val, isSpaceRune) case itemLeftRound: it.Prev() // backup '('