forked from hyperledger-archives/aries-framework-go
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Handling invalid RDF in canonized JSON LD
Previously: we weren't dropping undefined terms from RDFs. Fix: changed normalization aproach through json-gold library so that it returns parsing error whenever invalid data found in dataset. Added error handling logic for invalid RDF data errors where aries json ld processor is going to remove the invalid data from dataset and try again recursively. (Following digitalbazaar/jsonld.js#199) closes hyperledger-archives#1592 Signed-off-by: sudesh.shetty <sudesh.shetty@securekey.com>
- Loading branch information
1 parent
7e4eafc
commit 4ace988
Showing
11 changed files
with
532 additions
and
152 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
/* | ||
Copyright SecureKey Technologies Inc. All Rights Reserved. | ||
SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package jsonld | ||
|
||
import ( | ||
"fmt" | ||
"regexp" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/piprate/json-gold/ld" | ||
|
||
"github.com/hyperledger/aries-framework-go/pkg/common/log" | ||
) | ||
|
||
const ( | ||
format = "application/n-quads" | ||
algorithm = "URDNA2015" | ||
handleNormalizeErr = "Error while parsing N-Quads; invalid quad. line:" | ||
) | ||
|
||
var logger = log.New("aries-framework/json-ld-processor") | ||
|
||
// nolint:gochecknoglobals | ||
var ( | ||
invalidRDFLinePattern = regexp.MustCompile("[0-9]*$") | ||
) | ||
|
||
// Processor is JSON-LD processor for aries. | ||
// processing mode JSON-LD 1.0 {RFC: https://www.w3.org/TR/2014/REC-json-ld-20140116} | ||
type Processor struct { | ||
} | ||
|
||
// NewProcessor returns new JSON-LD processor for aries | ||
func NewProcessor() *Processor { | ||
return &Processor{} | ||
} | ||
|
||
// GetCanonicalDocument returns canonized document of given json ld | ||
func (p *Processor) GetCanonicalDocument(doc map[string]interface{}) ([]byte, error) { | ||
proc := ld.NewJsonLdProcessor() | ||
options := ld.NewJsonLdOptions("") | ||
options.ProcessingMode = ld.JsonLd_1_1 | ||
options.Algorithm = algorithm | ||
options.ProduceGeneralizedRdf = true | ||
|
||
normalizedTriples, err := proc.Normalize(doc, options) | ||
if err != nil { | ||
normalizedTriples, err = p.retryForInvalidRDFError(proc, options, doc, err) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to normalize JSON-LD document: %w", err) | ||
} | ||
} | ||
|
||
if ds, ok := normalizedTriples.(*ld.RDFDataset); ok { | ||
serializer := ld.NQuadRDFSerializer{} | ||
resp, err := serializer.Serialize(ds) | ||
|
||
if err != nil { | ||
return nil, fmt.Errorf("failed to serialize normalized RDF dataset : %w", err) | ||
} | ||
|
||
return []byte(resp.(string)), nil | ||
} | ||
|
||
return nil, fmt.Errorf("failed to normalize JSON-LD document, unexpected RDF dataset") | ||
} | ||
|
||
// Compact compacts given json ld object | ||
func (p *Processor) Compact(input, context interface{}, loader ld.DocumentLoader) (map[string]interface{}, error) { | ||
proc := ld.NewJsonLdProcessor() | ||
options := ld.NewJsonLdOptions("") | ||
options.ProcessingMode = ld.JsonLd_1_1 | ||
options.Format = format | ||
options.ProduceGeneralizedRdf = true | ||
|
||
if loader != nil { | ||
options.DocumentLoader = loader | ||
} | ||
|
||
return proc.Compact(input, context, options) | ||
} | ||
|
||
// retryForInvalidRDFError handles incorrect RDF data error and returns new data set by | ||
// removing invalid line from data set | ||
func (p *Processor) retryForInvalidRDFError(proc *ld.JsonLdProcessor, opts *ld.JsonLdOptions, | ||
doc map[string]interface{}, err error) (interface{}, error) { | ||
if err != nil && !strings.Contains(err.Error(), handleNormalizeErr) { | ||
return nil, err | ||
} | ||
|
||
lineNumber, err := findLineNumber(err) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// handling invalid RDF data, by following pattern [https://github.com/digitalbazaar/jsonld.js/issues/199] | ||
logger.Warnf("Failed to normalize JSON-LD document due to invalid RDF, retrying after removing invalid data.") | ||
|
||
// prepare data set | ||
opts.Format = "" | ||
|
||
datasetObj, err := proc.ToRDF(doc, opts) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to create dataset: %w", err) | ||
} | ||
|
||
dataset, ok := datasetObj.(*ld.RDFDataset) | ||
if !ok { | ||
return nil, fmt.Errorf("unexpected RDF data set found") | ||
} | ||
|
||
// normalize dataset and get view | ||
opts.Format = format | ||
|
||
r, err := ld.NewNormalisationAlgorithm(opts.Algorithm).Main(dataset, opts) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to create normalized state: %w", err) | ||
} | ||
|
||
return p.parseNQuadsWithRetry(r.(string), lineNumber) | ||
} | ||
|
||
// handleInvalidRDF handles invalid RDF data by discarding incorrect line | ||
func (p *Processor) parseNQuadsWithRetry(view string, lineNumber int) (interface{}, error) { | ||
logger.Warnf("Removing incorrect RDF from line number %d", lineNumber) | ||
// polish view and parse again | ||
view = removeQuad(view, lineNumber-1) | ||
|
||
normalizedTriples, err := ld.ParseNQuads(view) | ||
if err != nil && strings.Contains(err.Error(), handleNormalizeErr) { | ||
lineNumber, err = findLineNumber(err) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return p.parseNQuadsWithRetry(view, lineNumber) | ||
} | ||
|
||
return normalizedTriples, nil | ||
} | ||
|
||
// removeQuad removes quad from given index of view | ||
func removeQuad(view string, index int) string { | ||
lines := strings.Split(view, "\n") | ||
return strings.Join(append(lines[:index], lines[index+1:]...), "\n") | ||
} | ||
|
||
// findLineNumber finds problematic line number from error | ||
func findLineNumber(err error) (int, error) { | ||
s := invalidRDFLinePattern.FindString(err.Error()) | ||
|
||
i, err := strconv.Atoi(s) | ||
if err != nil { | ||
return -1, fmt.Errorf("unable to locate invalid RDF data line number: %w", err) | ||
} | ||
|
||
return i, nil | ||
} |
Oops, something went wrong.