Skip to content

Commit

Permalink
feat: Handling invalid RDF in canonized JSON LD
Browse files Browse the repository at this point in the history
Previously: we weren't dropping undefined terms from RDFs.

Fix: changed normalization aproach through json-gold library so that it
returns parsing error whenever invalid data found in dataset.

Added error handling logic for invalid RDF data errors where aries json
ld processor is going to remove the invalid data from dataset and try
again recursively. (Following
digitalbazaar/jsonld.js#199)

closes hyperledger-archives#1592

Signed-off-by: sudesh.shetty <sudesh.shetty@securekey.com>
  • Loading branch information
sudeshrshetty committed Apr 10, 2020
1 parent 7e4eafc commit 4ace988
Show file tree
Hide file tree
Showing 11 changed files with 532 additions and 152 deletions.
163 changes: 163 additions & 0 deletions pkg/doc/signature/jsonld/processor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
Copyright SecureKey Technologies Inc. All Rights Reserved.
SPDX-License-Identifier: Apache-2.0
*/

package jsonld

import (
"fmt"
"regexp"
"strconv"
"strings"

"github.com/piprate/json-gold/ld"

"github.com/hyperledger/aries-framework-go/pkg/common/log"
)

const (
format = "application/n-quads"
algorithm = "URDNA2015"
handleNormalizeErr = "Error while parsing N-Quads; invalid quad. line:"
)

var logger = log.New("aries-framework/json-ld-processor")

// nolint:gochecknoglobals
var (
invalidRDFLinePattern = regexp.MustCompile("[0-9]*$")
)

// Processor is JSON-LD processor for aries.
// processing mode JSON-LD 1.0 {RFC: https://www.w3.org/TR/2014/REC-json-ld-20140116}
type Processor struct {
}

// NewProcessor returns new JSON-LD processor for aries
func NewProcessor() *Processor {
return &Processor{}
}

// GetCanonicalDocument returns canonized document of given json ld
func (p *Processor) GetCanonicalDocument(doc map[string]interface{}) ([]byte, error) {
proc := ld.NewJsonLdProcessor()
options := ld.NewJsonLdOptions("")
options.ProcessingMode = ld.JsonLd_1_1
options.Algorithm = algorithm
options.ProduceGeneralizedRdf = true

normalizedTriples, err := proc.Normalize(doc, options)
if err != nil {
normalizedTriples, err = p.retryForInvalidRDFError(proc, options, doc, err)
if err != nil {
return nil, fmt.Errorf("failed to normalize JSON-LD document: %w", err)
}
}

if ds, ok := normalizedTriples.(*ld.RDFDataset); ok {
serializer := ld.NQuadRDFSerializer{}
resp, err := serializer.Serialize(ds)

if err != nil {
return nil, fmt.Errorf("failed to serialize normalized RDF dataset : %w", err)
}

return []byte(resp.(string)), nil
}

return nil, fmt.Errorf("failed to normalize JSON-LD document, unexpected RDF dataset")
}

// Compact compacts given json ld object
func (p *Processor) Compact(input, context interface{}, loader ld.DocumentLoader) (map[string]interface{}, error) {
proc := ld.NewJsonLdProcessor()
options := ld.NewJsonLdOptions("")
options.ProcessingMode = ld.JsonLd_1_1
options.Format = format
options.ProduceGeneralizedRdf = true

if loader != nil {
options.DocumentLoader = loader
}

return proc.Compact(input, context, options)
}

// retryForInvalidRDFError handles incorrect RDF data error and returns new data set by
// removing invalid line from data set
func (p *Processor) retryForInvalidRDFError(proc *ld.JsonLdProcessor, opts *ld.JsonLdOptions,
doc map[string]interface{}, err error) (interface{}, error) {
if err != nil && !strings.Contains(err.Error(), handleNormalizeErr) {
return nil, err
}

lineNumber, err := findLineNumber(err)
if err != nil {
return nil, err
}

// handling invalid RDF data, by following pattern [https://github.com/digitalbazaar/jsonld.js/issues/199]
logger.Warnf("Failed to normalize JSON-LD document due to invalid RDF, retrying after removing invalid data.")

// prepare data set
opts.Format = ""

datasetObj, err := proc.ToRDF(doc, opts)
if err != nil {
return nil, fmt.Errorf("failed to create dataset: %w", err)
}

dataset, ok := datasetObj.(*ld.RDFDataset)
if !ok {
return nil, fmt.Errorf("unexpected RDF data set found")
}

// normalize dataset and get view
opts.Format = format

r, err := ld.NewNormalisationAlgorithm(opts.Algorithm).Main(dataset, opts)
if err != nil {
return nil, fmt.Errorf("failed to create normalized state: %w", err)
}

return p.parseNQuadsWithRetry(r.(string), lineNumber)
}

// handleInvalidRDF handles invalid RDF data by discarding incorrect line
func (p *Processor) parseNQuadsWithRetry(view string, lineNumber int) (interface{}, error) {
logger.Warnf("Removing incorrect RDF from line number %d", lineNumber)
// polish view and parse again
view = removeQuad(view, lineNumber-1)

normalizedTriples, err := ld.ParseNQuads(view)
if err != nil && strings.Contains(err.Error(), handleNormalizeErr) {
lineNumber, err = findLineNumber(err)
if err != nil {
return nil, err
}

return p.parseNQuadsWithRetry(view, lineNumber)
}

return normalizedTriples, nil
}

// removeQuad removes quad from given index of view
func removeQuad(view string, index int) string {
lines := strings.Split(view, "\n")
return strings.Join(append(lines[:index], lines[index+1:]...), "\n")
}

// findLineNumber finds problematic line number from error
func findLineNumber(err error) (int, error) {
s := invalidRDFLinePattern.FindString(err.Error())

i, err := strconv.Atoi(s)
if err != nil {
return -1, fmt.Errorf("unable to locate invalid RDF data line number: %w", err)
}

return i, nil
}
Loading

0 comments on commit 4ace988

Please sign in to comment.