diff --git a/jena-arq/Grammar/.gitignore b/jena-arq/Grammar/.gitignore index d1648fbab98..a786afe999c 100644 --- a/jena-arq/Grammar/.gitignore +++ b/jena-arq/Grammar/.gitignore @@ -9,5 +9,7 @@ Y12.html sparql-grammar.html # Grammar in BNF text sparql.bnf -# Grammar in HTML as a standalone web page. +## BNF +sparql.bnf +## Grammar in HTML as a standalone web page. sparql-html.html diff --git a/jena-arq/Grammar/RDF-Protobuf/binary-rdf.proto b/jena-arq/Grammar/RDF-Protobuf/binary-rdf.proto index 96cb29a5036..76bc9121eaa 100644 --- a/jena-arq/Grammar/RDF-Protobuf/binary-rdf.proto +++ b/jena-arq/Grammar/RDF-Protobuf/binary-rdf.proto @@ -55,7 +55,8 @@ message RDF_Literal { string lex = 1 ; oneof literalKind { bool simple = 9 ; - string langtag = 2 ; + string langtag = 2 ; // Language tag only literal + string langdir = 5 ; // Language tag and base direction separated by "--" string datatype = 3 ; RDF_PrefixName dtPrefix = 4 ; } diff --git a/jena-arq/Grammar/RDF-Thrift/BinaryRDF.thrift b/jena-arq/Grammar/RDF-Thrift/BinaryRDF.thrift index 460b72e09b4..60b6e7d088f 100644 --- a/jena-arq/Grammar/RDF-Thrift/BinaryRDF.thrift +++ b/jena-arq/Grammar/RDF-Thrift/BinaryRDF.thrift @@ -44,8 +44,10 @@ struct RDF_BNode { // union with additional values. struct RDF_Literal { +// Keep this numbering. 1: required string lex ; 2: optional string langtag ; +5: optional string baseDirection ; 3: optional string datatype ; // Either 3 or 4 but UNION is heavy. 4: optional RDF_PrefixName dtPrefix ; // datatype as prefix name } diff --git a/jena-arq/Grammar/RDF-Thrift/gen-thrift b/jena-arq/Grammar/RDF-Thrift/gen-thrift index fff4df66905..87daf52ca8f 100755 --- a/jena-arq/Grammar/RDF-Thrift/gen-thrift +++ b/jena-arq/Grammar/RDF-Thrift/gen-thrift @@ -1,11 +1,11 @@ #!/bin/bash ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 -if [ "$#" != 1 ] -then - echo "Usage: $(basename $0) FILE" 2>&1 - exit 1 -fi +## if [ "$#" != 1 ] +## then +## echo "Usage: $(basename $0) FILE" 2>&1 +## exit 1 +## fi # Find the namespace PKG=../../src/main/java/org/apache/jena/riot/thrift/wire @@ -16,7 +16,9 @@ rm -f "$PKG"/*.java ## undated: suppress the date at @Generated annotations ## suppress: suppress @Generated annotations entirely -thrift -r -out ../../src/main/java -gen 'java:generated_annotations=suppress' "$@" +THRIFT="${1:-BinaryRDF.thrift}" + +thrift -r -out ../../src/main/java -gen 'java:generated_annotations=suppress' "$THRIFT" for f in "$PKG"/*.java do diff --git a/jena-arq/Grammar/README b/jena-arq/Grammar/README new file mode 100644 index 00000000000..65c77d522a8 --- /dev/null +++ b/jena-arq/Grammar/README @@ -0,0 +1,31 @@ +==== SPARQL Parser + +Run + + grammar + +This produces the code for the current version SPARQL +and the ARQ (SPARQL with extensions) parsers. + +== + +To produce the HTML for the SPARQL Grammar for the W3C spec: +produce the SPARQL 1.2 specific file + + grammar + +which also generates Java code and can be used for checking. + +Then run + + sparql2html + +The output is in sparql-grammar.html. + +This replaces the table between "" in the specific HTML. + +Running + + sparql2bnf + +produces a text BNF form. diff --git a/jena-arq/Grammar/Turtle/turtle.jj b/jena-arq/Grammar/Turtle/turtle.jj index 57296a15ed8..ca1c11e4ae5 100644 --- a/jena-arq/Grammar/Turtle/turtle.jj +++ b/jena-arq/Grammar/Turtle/turtle.jj @@ -80,9 +80,9 @@ void Statement() : {} { Directive() | DirectiveOld() -// | (TriplesSameSubject() ( | ) ) +// | (Triples() ( | ) ) // Strict - | (TriplesSameSubject() ) + | (Triples() ) } // Turtle [3] directive @@ -106,17 +106,15 @@ void DirectiveOld() : { Token t ; String iri ; } { setBase(iri, t.beginLine, t.beginColumn) ; } } -// Turtle [6] triples -void TriplesSameSubject() : { Node s ; } +void Triples() : { Node n; } { - s = Subject() - PredicateObjectList(s) + n = Subject() PredicateObjectList(n) | - s = BlankNodePropertyList() - ( PredicateObjectList(s) )? + n = BlankNodePropertyList() ( PredicateObjectList(n) )? +| + n = ReifiedTriple() ( PredicateObjectList(n) )? } -// Turtle [7] predicateObjectList void PredicateObjectList(Node s) : { Node p = null ; } { p = Verb() @@ -124,7 +122,6 @@ void PredicateObjectList(Node s) : { Node p = null ; } ( (p = Verb() ObjectList(s, p))? )* } -// Turtle [8] objectList void ObjectList(Node s, Node p): { Node o ; } { o = Object() @@ -137,17 +134,6 @@ void ObjectList(Node s, Node p): { Node o ; } )* } -// RDF-star Annotation Syntax -void Annotation(Node s, Node p, Node o) : {} -{ - ( - - { Node x = createQuotedTriple(s, p, o, token.beginLine, token.beginColumn); } - PredicateObjectList(x) - - )? -} - // Turtle [9] verb Node Verb() : { Node p ; } { @@ -163,13 +149,10 @@ Node Subject() : { Node s; String iri ; } s = BlankNode() | s = Collection() - | - s = QuotedTriple() ) { return s ; } } - // Turtle [11] predicate Node Predicate() : { String iri;} { @@ -185,43 +168,12 @@ Node Object(): { Node o ; String iri; } | o = Collection() | o = BlankNodePropertyList() | o = Literal() - | o = QuotedTriple() + | o = TripleTerm() + | o = ReifiedTriple() ) { return o; } } -Node QuotedTripleSubject(): { Node o ; String iri; } -{ - ( iri = iri() { o = createURI(iri, token.beginLine, token.beginColumn) ; } - | o = BlankNode() - | o = QuotedTriple() - ) - { return o; } -} - -Node QuotedTripleObject(): { Node o ; String iri; } -{ - ( iri = iri() { o = createURI(iri, token.beginLine, token.beginColumn) ; } - | o = BlankNode() - | o = Literal() - | o = QuotedTriple() - ) - { return o ; } -} - -// The syntax for RDF-star <<>> -Node QuotedTriple() : { Node s , p , o ; Token t ; } -{ - t = - s = QuotedTripleSubject() - p = Verb() - o = QuotedTripleObject() - - { Node n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); - return n; - } -} - // Turtle [13] literal Node Literal() : { Node n ;} { @@ -302,7 +254,7 @@ Node RDFLiteral() : { Token t ; String lex = null ; } String LangTag() : { Token t ; } { // Enumerate the directives here because they look like language tags. - ( t = | t = AnyDirective() ) + ( t = | t = AnyDirective() ) { String lang = stripChars(t.image, 1) ; return lang ; } } @@ -369,6 +321,106 @@ String IRIREF() : { Token t ; } { return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; } } +Node Reifier() : { Node n = null; String iriStr; Token t; } +{ + t = + ( + iriStr = iri() { n = createURI(iriStr, token.beginLine, token.beginColumn) ; } + | n = BlankNode() + )? + { if ( n == null ) n = createBNode(t.beginLine, t.beginColumn) ; } + { return n ; } +} + +Node ReifiedTriple() : { Node reifId = null ; Token tok ; Node s; Node p ; Node o ; } +{ + tok = + s = ReifiedTripleSubject() + p = Verb() + o = ReifiedTripleObject() + ( reifId = Reifier())? + { reifId = emitTripleReifier(tok.beginLine, tok.beginColumn, reifId, s, p, o); } + + { return reifId ; } +} + +// rtSubject +Node ReifiedTripleSubject() : { Node s; String iri; } { + ( iri = iri() { s = createURI(iri, token.beginLine, token.beginColumn) ; } + | s = BlankNode() + | s = ReifiedTriple() + ) + { return s; } +} + +// rtObject +Node ReifiedTripleObject() : { Node o; String iri; } { + ( iri = iri() { o = createURI(iri, token.beginLine, token.beginColumn) ; } + | o = BlankNode() + | o = Literal() + | o = TripleTerm() + | o = ReifiedTriple() + ) + { return o; } +} + +Node TripleTerm() : { Node n = null ; Token openToken ; String iri ; Node s , p , o ; } +{ + + s = TripleTermSubject() + p = Verb() + o = TripleTermObject() + + { return createTripleTerm(s, p, o, token.beginLine, token.beginColumn) ; } +} + +// ttSubject +Node TripleTermSubject(): { Node o ; String iri; } +{ + ( iri = iri() { o = createURI(iri, token.beginLine, token.beginColumn) ; } + | o = BlankNode() + ) + { return o; } +} + +// ttObject +Node TripleTermObject(): { Node o ; String iri; } +{ + ( iri = iri() { o = createURI(iri, token.beginLine, token.beginColumn) ; } + | o = BlankNode() + | o = Literal() + | o = TripleTerm() + ) + { return o ; } +} + +// RDF-star Annotation Syntax +void Annotation(Node s, Node p, Node o) : { Node reifId = null; } +{ + ( + reifId = Reifier() + { reifId = emitTripleReifier(token.beginLine, token.beginColumn, reifId, s, p, o) ; } + { setReifierId(reifId); } + | + { reifId = getOrAllocReifierId(s, p, o, token.beginLine, token.beginColumn); } + { clearReifierId(); } + AnnotationBlock(reifId) + ) * +} + +void AnnotationBlock( Node reifId ) : {} +{ + + PredicateObjectList(reifId) + +// ( +// +// { Node x = createQuotedTriple(s, p, o, token.beginLine, token.beginColumn); } +// PredicateObjectList(x) +// +// )? +} + // ------------------------------------------ // Tokens @@ -457,18 +509,23 @@ TOKEN : | < SEMICOLON: ";" > | < COMMA: "," > | < DOT: "." > + +| < DATATYPE: "^^"> +| < AT: "@"> + +| < L_TRIPLE: "<<(" > +| < R_TRIPLE: ")>>" > | < LT2: "<<" > | < GT2: ">>" > | < L_ANN: "{|" > | < R_ANN: "|}" > - -| < DATATYPE: "^^"> -| < AT: "@"> +| < TILDE: "~" > +| < VBAR: "|" > | )? ":" > | > | | ["0"-"9"]) ((|".")* )? > -| ()+("-" ()+)* > +| ()+("-" ()+)* ( "--" ()* )? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> diff --git a/jena-arq/Grammar/arq.jj b/jena-arq/Grammar/arq.jj index 4ffca766a36..96c41746932 100644 --- a/jena-arq/Grammar/arq.jj +++ b/jena-arq/Grammar/arq.jj @@ -628,6 +628,18 @@ Element TriplesBlock(ElementPathBlock acc) : { } ( (TriplesBlock(acc))? )? { return acc ; } } +Node ReifiedTripleBlock(TripleCollector acc) : { Node reifId ; } +{ + reifId = ReifiedTriple(acc) + PropertyList(reifId, acc) + { return reifId ; } +} +Node ReifiedTripleBlockPath(TripleCollector acc) : { Node reifId ; } +{ + reifId = ReifiedTriple(acc) + PropertyListPath(reifId, acc) + { return reifId ; } +} Element GraphPatternNotTriples() : { Element el = null ; } { ( @@ -636,6 +648,10 @@ Element GraphPatternNotTriples() : { Element el = null ; } el = OptionalGraphPattern() | el = LateralGraphPattern() + | + el = SemiJoinGraphPattern() + | + el = AntiJoinGraphPattern() | el = MinusGraphPattern() | @@ -667,6 +683,14 @@ Element LateralGraphPattern() : { Element el ; } { el = GroupGraphPattern() { return new ElementLateral(el) ; } } +Element SemiJoinGraphPattern() : { Element el ; } +{ el = GroupGraphPattern() + { return new ElementSemiJoin(el) ; } +} +Element AntiJoinGraphPattern() : { Element el ; } +{ el = GroupGraphPattern() + { return new ElementAntiJoin(el) ; } +} Element GraphGraphPattern() : { Element el ; Node n ;} { n = VarOrIri() el = GroupGraphPattern() @@ -756,7 +780,20 @@ Node DataBlockValue() : { Node n ; String iri ; } | n = NumericLiteral() { return n ; } | n = BooleanLiteral() { return n ; } | { return null ; } -| n = QuotedTripleData() { return n ; } +| n = TripleTermData() { return n ; } +} +Node Reifier() : { Token tok = null ; Node reifId = null ; } +{ + tok = + (reifId = VarOrReifierId() )? + { return reifId; } +} +Node VarOrReifierId() : { Node n = null; String iri = null; } +{ + ( n = Var() { return n; } + | iri = iri() { return createNode(iri) ; } + | n = BlankNode() { return n ; } + ) } Element Assignment() : { Var v ; Expr expr ; } { @@ -833,7 +870,7 @@ Expr FunctionCall() : { String fname ; Args a ; } { if ( AggregateRegistry.isRegistered(fname) ) { if ( ! getAllowAggregatesInExpressions() ) - throwParseException("Aggregate expression not legal at this point : "+fname, -1, -1) ; + throwParseException("Aggregate expression not legal at this point : "+fname, token.beginLine, token.beginColumn) ; Aggregator agg = AggregatorFactory.createCustom(fname, a) ; Expr exprAgg = getQuery().allocAggregate(agg) ; return exprAgg ; @@ -898,6 +935,8 @@ void TriplesSameSubject(TripleCollector acc) : { Node s ; } s = TriplesNode(tempAcc) PropertyList(s, tempAcc) { insert(acc, tempAcc) ; } +| + ReifiedTripleBlock(acc) } void PropertyList(Node s, TripleCollector acc) : { } { @@ -941,6 +980,8 @@ void TriplesSameSubjectPath(TripleCollector acc) : { Node s ; } s = TriplesNodePath(tempAcc) PropertyListPath(s, tempAcc) { insert(acc, tempAcc) ; } +| + ReifiedTripleBlockPath(acc) } void PropertyListPath(Node s, TripleCollector acc) : { } { @@ -1059,6 +1100,11 @@ Path PathMod(Path p) : { long i1 ; long i2 ; } ) ) } +long Integer() : {Token t ;} +{ + t = + { return integerValue(t.image) ; } +} Path PathPrimary() : { String str ; Path p ; Node n ; } { ( @@ -1109,16 +1155,11 @@ P_Path0 PathOneInPropertySet() : { String str ; Node n ; } ) ) } -long Integer() : {Token t ;} -{ - t = - { return integerValue(t.image) ; } -} Node TriplesNode(TripleCollectorMark acc) : { Node n ; } { - n = Collection(acc) { return n ; } + n = Collection(acc) { return n ; } | - n = BlankNodePropertyList(acc) { return n ; } + n = BlankNodePropertyList(acc) { return n ; } } Node BlankNodePropertyList(TripleCollector acc) : { Token t ; } { @@ -1130,9 +1171,9 @@ Node BlankNodePropertyList(TripleCollector acc) : { Token t ; } } Node TriplesNodePath(TripleCollectorMark acc) : { Node n ; } { - n = CollectionPath(acc) { return n ; } + n = CollectionPath(acc) { return n ; } | - n = BlankNodePropertyListPath(acc) { return n ; } + n = BlankNodePropertyListPath(acc) { return n ; } } Node BlankNodePropertyListPath(TripleCollector acc) : { Token t ; } { @@ -1190,80 +1231,177 @@ Node CollectionPath(TripleCollectorMark acc) : insert(acc, lastCell, nRDFrest, nRDFnil) ; return listHead ; } } -void AnnotationPath(TripleCollector acc, Node s, Node p, Path path, Node o) : {} +void AnnotationPath(TripleCollector acc, Node s, Node p, Path path, Node o) : { Node reifId = null ; } { ( - - { Node pAnn = preConditionAnnotation(s, p, path, o, token.beginLine, token.beginColumn) ; - Node x = createQuotedTriple(s, pAnn, o, token.beginLine, token.beginColumn); - } - PropertyListPathNotEmpty(x, acc) - - )? + { p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); } + reifId = Reifier() + { reifId = insertTripleReifier(acc, reifId, s, p, o, token.beginLine, token.beginColumn) ; } + { setReifierId(reifId); } + | + { + p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); + reifId = getOrAllocReifierId(acc, s, p, o, token.beginLine, token.beginColumn); + } + { clearReifierId(); } + AnnotationBlockPath(acc, reifId) + )* + { clearReifierId(); } +} +void AnnotationBlockPath(TripleCollector acc, Node reifId) : { } +{ + + PropertyListPathNotEmpty(reifId, acc) + } -void Annotation(TripleCollector acc, Node s, Node p, Path path, Node o) : { } +void Annotation(TripleCollector acc, Node s, Node p, Path path, Node o) : { Node reifId = null ; } { ( - - { Node pAnn = preConditionAnnotation(s, p, path, o, token.beginLine, token.beginColumn) ; - Node x = createQuotedTriple(s, p, o, token.beginLine, token.beginColumn); - } - PropertyListNotEmpty(x, acc) - - )? + { p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); } + reifId = Reifier() + { reifId = insertTripleReifier(acc, reifId, s, p, o, token.beginLine, token.beginColumn) ; } + { setReifierId(reifId); } + | + { + p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); + reifId = getOrAllocReifierId(acc, s, p, o, token.beginLine, token.beginColumn); + } + { clearReifierId(); } + AnnotationBlock(acc, reifId) + )* +} +void AnnotationBlock(TripleCollector acc, Node reifId) : { } +{ + + PropertyListNotEmpty(reifId, acc) + } Node GraphNode(TripleCollectorMark acc) : { Node n ; } { - n = VarOrTerm() { return n ; } + n = VarOrTerm() { return n ; } | - n = TriplesNode(acc) { return n ; } + n = TriplesNode(acc) { return n ; } + | + n = ReifiedTriple(acc) { return n ; } } Node GraphNodePath(TripleCollectorMark acc) : { Node n ; } { n = VarOrTerm() { return n ; } - | +| n = TriplesNodePath(acc) { return n ; } +| + n = ReifiedTriple(acc) { return n ; } } Node VarOrTerm() : { Node n = null ; String iri ; } { ( n = Var() | iri = iri() { return createNode(iri) ; } - | n = RDFLiteral() { return n ; } - | n = NumericLiteral() { return n ; } - | n = BooleanLiteral() { return n ; } - | n = BlankNode() { return n ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() | { return nRDFnil ; } - | n = QuotedTriple() -) - { return n ; } + | n = TripleTerm() { return n; } + ) + { return n; } } -Node QuotedTriple() : { Node n = null ; Token t ; Node s , p , o ; } +Node ReifiedTriple(TripleCollector acc) : { Node reifId = null ; Token tok ; Node s; Node p ; Node o ; } { - t = - s = VarOrTerm() + tok = + s = ReifiedTripleSubject(acc) p = Verb() - o = VarOrTerm() - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } + o = ReifiedTripleObject(acc) + (reifId = Reifier())? + { reifId = insertTripleReifier(acc, reifId, s, p, o, tok.beginLine, tok.beginColumn) ; } + { return reifId; } +} +Node ReifiedTripleSubject(TripleCollector acc) : { Node s; String iri; } +{ + ( s = Var() + | iri = iri() { s = createNode(iri) ; } + | s = RDFLiteral() + | s = NumericLiteral() + | s = BooleanLiteral() + | s = BlankNode() + | s = ReifiedTriple(acc) + ) + { return s; } +} +Node ReifiedTripleObject(TripleCollector acc) : { Node o; String iri; } +{ + ( o = Var() + | iri = iri() { o = createNode(iri) ; } + | o = RDFLiteral() + | o = NumericLiteral() + | o = BooleanLiteral() + | o = BlankNode() + | o = ReifiedTriple(acc) + | o = TripleTerm() + ) + { return o; } +} +Node TripleTerm() : { Node n = null ; Token openToken ; String iri ; Node s , p , o ; } +{ + openToken = + s = TripleTermSubject() + p = Verb() + o = TripleTermObject() + { n = createTripleTerm(s, p, o, openToken.beginLine, openToken.beginColumn); } + + { return n ; } +} +Node TripleTermSubject() : { Node n; String iri; } +{ + ( n = Var() + | iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() + ) { return n; } } -Node QuotedTripleData() : { Node n = null ; Token t ; String iri ; Node s , p , o ; } +Node TripleTermObject() : { Node n; String iri; } { - t = - ( s = DataValueTerm() ) - ( iri = iri() { p = createNode(iri) ; } | { p = nRDFtype ; } ) - ( o = DataValueTerm() ) - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } - + ( n = Var() + | iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() + | n = TripleTerm() { return n; } + ) { return n; } } -Node DataValueTerm() : { Node n = null ; String iri ; Node s , p , o ; } +Node TripleTermData() : { Node n = null ; Token openToken ; String iri ; Node s , p , o ; } { - iri = iri() { return createNode(iri) ; } -| n = RDFLiteral() { return n ; } -| n = NumericLiteral() { return n ; } -| n = BooleanLiteral() { return n ; } -| n = QuotedTripleData() { return n ; } + openToken = + s = TripleTermDataSubject() + ( iri = iri() { p = createNode(iri) ; } | { p = nRDFtype ; } ) + o = TripleTermDataObject() + { n = createTripleTerm(s, p, o, openToken.beginLine, openToken.beginColumn); } + + { return n ; } +} +Node TripleTermDataSubject() : { Node n = null; String iri; } +{ + ( iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + ) + { return n; } +} +Node TripleTermDataObject() : { Node n = null; String iri; } +{ + ( iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = TripleTermData() + ) + { return n; } } Node VarOrIri() : {Node n = null ; String iri ; } { @@ -1401,27 +1539,38 @@ Expr PrimaryExpression() : { Expr expr ; Node n ; } | n = NumericLiteral() { return asExpr(n) ; } | n = BooleanLiteral() { return asExpr(n) ; } | n = Var() { return asExpr(n) ; } - | n = ExprQuotedTriple() { return asExpr(n) ; } + | n = ExprTripleTerm() { return asExpr(n) ; } ) } -Node ExprVarOrTerm() : { Node n; String s;} +Node ExprTripleTerm() : { Token t ; Node s,p,o,n; } +{ + t = + s = ExprTripleTermSubject() + p = Verb() + o = ExprTripleTermObject() + { n = createTripleTerm(s, p, o, t.beginLine, t.beginColumn); } + + { return n; } +} +Node ExprTripleTermSubject() : { Node n; String iri; } { - ( s = iri() { n = createNode(s); } + ( iri = iri() { n = createNode(iri); } | n = RDFLiteral() | n = NumericLiteral() | n = BooleanLiteral() | n = Var() - | n = ExprQuotedTriple() ) { return n; } } -Node ExprQuotedTriple() : { Token t ; Node s,p,o,n; } -{ t = - s = ExprVarOrTerm() - p = Verb() - o = ExprVarOrTerm() - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } - +Node ExprTripleTermObject() : { Node n; String iri; } +{ + ( iri = iri() { n = createNode(iri); } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = Var() + | n = ExprTripleTerm() + ) { return n; } } Expr BrackettedExpression() : { Expr expr ; } @@ -1441,6 +1590,8 @@ Expr BuiltInCall() : { Expr expr ; | expr1 = Expression() expr2 = Expression() { return new E_LangMatches(expr1, expr2) ; } + | expr = Expression() + { return new E_LangDir(expr) ; } | expr = Expression() { return new E_Datatype(expr) ; } | gn = Var() @@ -1514,6 +1665,10 @@ Expr BuiltInCall() : { Expr expr ; { return new E_Conditional(expr, expr1, expr2) ; } | expr1 = Expression() expr2 = Expression() { return new E_StrLang(expr1, expr2) ; } + | expr1 = Expression() + expr2 = Expression() + expr3 = Expression() + { return new E_StrLangDir(expr1, expr2, expr3) ; } | expr1 = Expression() expr2 = Expression() { return new E_StrDatatype(expr1, expr2) ; } | expr1 = Expression() expr2 = Expression() @@ -1528,13 +1683,19 @@ Expr BuiltInCall() : { Expr expr ; { return new E_IsLiteral(expr) ; } | expr = Expression() { return new E_IsNumeric(expr) ; } + | expr = Expression() + { return new E_HasLang(expr) ; } + | expr = Expression() + { return new E_HasLangDir(expr) ; } | expr = RegexExpression() { return expr ; } | expr = ExistsFunc() { return expr ; } | expr = NotExistsFunc() { return expr ; } -| expr = Expression() + | expr = Expression() { return new E_IsTriple(expr) ; } - | expr1 = Expression() expr2 = Expression() expr3 = Expression() + | expr1 = Expression() + expr2 = Expression() + expr3 = Expression() { return new E_TripleFn(expr1, expr2, expr3) ; } | expr = Expression() { return new E_TripleSubject(expr) ; } @@ -1689,7 +1850,7 @@ Expr iriOrFunction() : { String iri ; Args a = null ; } return asExpr(createNode(iri)) ; if ( AggregateRegistry.isRegistered(iri) ) { if ( ! getAllowAggregatesInExpressions() ) - throwParseException("Aggregate expression not legal at this point : "+iri, -1, -1) ; + throwParseException("Aggregate expression not legal at this point : "+iri, token.beginLine, token.beginColumn) ; Aggregator agg = AggregatorFactory.createCustom(iri, a) ; Expr exprAgg = getQuery().allocAggregate(agg) ; return exprAgg ; @@ -1697,16 +1858,17 @@ Expr iriOrFunction() : { String iri ; Args a = null ; } return new E_Function(iri, a) ; } } -Node RDFLiteral() : { Token t ; String lex = null ; } +Node RDFLiteral() : { Token t ; String lex = null ; String uri = null ; } { lex = String() - { String lang = null ; String uri = null ; } ( - ( t = { lang = stripChars(t.image, 1) ; } ) + t = + { return createLiteralLang(lex, t.image, token.beginLine, token.beginColumn); } | - ( uri = iri() ) + uri = iri() + { return createLiteralDT(lex, uri, token.beginLine, token.beginColumn); } )? - { return createLiteral(lex, lang, uri) ; } + { return createLiteralString(lex, token.beginLine, token.beginColumn) ; } } Node NumericLiteral() : { Node n ; } { @@ -1802,7 +1964,7 @@ TOKEN: | | ["0"-"9"]) ((|".")* )? > | > | > -| ()+("-" ()+)* > +| ()+("-" ()+)* ( "--" ()* )? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } @@ -1837,7 +1999,9 @@ TOKEN [IGNORE_CASE] : | < BIND: "bind" > | < SERVICE: "service" > | < LET: "LET" > -| < LATERAL: "LATERAL" > +| < LATERAL: "lateral" > +| < SEMIJOIN: "semijoin" > +| < ANTIJOIN: "antijoin" > | < UNFOLD: "unfold" > | < TRIPLE: "TRIPLE" > | < IS_TRIPLE: "isTRIPLE" > @@ -1881,15 +2045,19 @@ TOKEN [IGNORE_CASE] : | < SHORTEST: "shortest" > | < STR: "str" > | < STRLANG: "strlang" > +| < STRLANGDIR: "strlangdir" > | < STRDT: "strdt" > | < DTYPE: "datatype" > | < LANG: "lang" > | < LANGMATCHES: "langmatches" > +| < LANGDIR: "langdir" > | < IS_URI: "isURI" > | < IS_IRI: "isIRI" > | < IS_BLANK: "isBlank" > | < IS_LITERAL: "isLiteral" > | < IS_NUMERIC: "isNumeric" > +| < HAS_LANG: "hasLang" > +| < HAS_LANGDIR: "hasLangDir" > | < REGEX: "regex" > | < SAME_TERM: "sameTerm" > | < RAND: "RAND" > @@ -2014,12 +2182,14 @@ TOKEN : | < LT: "<" > | < LE: "<=" > | < GE: ">=" > -| < GT2: ">>" > +| < L_TRIPLE: "<<(" > +| < R_TRIPLE: ")>>" > | < LT2: "<<" > -| -| -| < BANG: "!" > +| < GT2: ">>" > +| < L_ANN: "{|" > +| < R_ANN: "|}" > | < TILDE: "~" > +| < BANG: "!" > | < COLON: ":" > | < SC_OR: "||" > | < SC_AND: "&&" > diff --git a/jena-arq/Grammar/grammar b/jena-arq/Grammar/grammar index 08e07f7783a..92696e0d1dd 100755 --- a/jena-arq/Grammar/grammar +++ b/jena-arq/Grammar/grammar @@ -42,6 +42,7 @@ function grammar ## echo "---- Create HTML" ## jjdoc -OUTPUT_FILE=${FILE%%.jj}.html "${FILE}" + echo "---- Create text form" jjdoc -TEXT=true -OUTPUT_FILE=${FILE%%.jj}.txt "${FILE}" diff --git a/jena-arq/Grammar/jj2bnf b/jena-arq/Grammar/jj2bnf new file mode 100755 index 00000000000..0403e6e7dcd --- /dev/null +++ b/jena-arq/Grammar/jj2bnf @@ -0,0 +1,302 @@ +#!/usr/bin/perl +## Licensed to the Apache Software Foundation (ASF) under one +## or more contributor license agreements. See the NOTICE file +## distributed with this work for additional information +## regarding copyright ownership. The ASF licenses this file +## to you under the Apache License, Version 2.0 (the +## "License"); you may not use this file except in compliance +## with the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +## TODO (2024) +## [ ] Why does ruleBodyStr have a newline? + +## Consider rewriting, preserving the inlining. +## Parse lines (and continuation lines). Fixup. + +# Grammar into BNF text +# Reads in sparql.txt and the tokens.txt file + +if ( $#ARGV != 1 ) +{ + print STDERR "Usage: grammar.txt tokens.txt\n" ; + exit 1 ; +} + +$/ = undef ; +# Just table or full page. + +$grammarFile = $ARGV[0] ; +$tokensFile = $ARGV[1] ; + +$grammar = &readFile($grammarFile) ; +$tokens = &readFile($tokensFile) ; + +$grammar =~ s!DOCUMENT START!! ; +# $grammar =~ s!NON-TERMINALS!! ; +$grammar =~ s!DOCUMENT END!! ; +$grammar =~ s!TOKENS.*NON-TERMINALS!!s ; + +$grammar =~ s!//.*!!g ; +$grammar =~ s!\r!!g ; + +# remove leading whitespace +$grammar =~ s!^[\n\s]*!\n! ; + +# Merge alts +$grammar =~ s!\n\s*\|!\ |!g ; + +$tokens =~ s!//.*!!g ; +$tokens =~ s!\r!!g ; + + +## Grammar + +@g = split(/\n\s*/, $grammar) ; + +@rules = () ; +%ruleMap = () ; +%tokenMap = () ; +%inline = () ; + +# Grammar rules +# Direct from "jjdoc -TEXT=true" + +for $g (@g) { + ($rulename, $rulebody) = split(/:=/,$g) ; + + $rulename =~ s!^\s*!! ; + $rulename =~ s!\s*$!! ; + + $rulebody =~ s!^\s*!! ; + $rulebody =~ s!\s*$!! ; + + # Remove outer brackets +# $rulebody =~ s!^\((.*)\)$!$1! ; + + next if $rulename eq '' ; + + push @rules, $rulename ; + warn "Duplicate rule (grammar): $rulename\n" if defined($ruleMap{$rulename}) ; + $ruleMap{$rulename} = $rulebody ; + +## print "----------\n" ; +## print $rulename,"\n" ; +## print $rulebody,"\n" ; +} + +# Tokens +# Produced by "jj2tokens" +# Hand edited to indicate the inlines + +$tokens =~ s/\n+/\n/g ; +$tokens =~ s/^\n// ; + +@t = split(/\n(?=\<|\[)/, $tokens) ; + +for $t (@t) { + ($tokenname,$tokenbody) = split(/::=/, $t) ; + $tokenname =~ s!^\s*!! ; + $tokenname =~ s!\s*$!! ; + $tokenname =~ s/#// ; + + $tokenbody =~ s!^\s*!! ; + $tokenbody =~ s!\s*$!! ; + + # Inline? + if ( $tokenname =~ /^\[\<\w*\>\]/ ) { + warn "Duplicate inline (token): $tokenname\n" if defined($inline{$tokenname}) ; + $tokenname =~ s/^\[//g ; + $tokenname =~ s/\]$//g ; + $tokenbody =~ s/"/'/g ; # '" -- But not literal " -- how? + $tokenbody =~ s/\<\>\'\{\}/\<\>\"\{\}/ ; # '" IRI fixup + $inline{$tokenname} = $tokenbody ; + + #print "INLINE: ",$tokenname," => ",$tokenbody,"\n" ; + } else { + ## Remove < > aroudn a token name. + my $inlinePlain = $tokenname ; + $inlinePlain =~ s%^<%%; + $inlinePlain =~ s%>$%%; + $inline{$tokenname} = $inlinePlain ; + push @rules, $tokenname ; + warn "Duplicate rule (token): $tokenname\n" if defined($tokenMap{$tokenname}) ; + $ruleMap{$tokenname} = $tokenbody ; + } +} + +$ruleNum = 0 ; + +for $r (@rules) { + $DEBUG = 0 ; + $ruleNum++ ; + $rulename = $r ; + $rulebody = $ruleMap{$rulename} ; + + if ( $DEBUG ) { + print STDERR "\n" ; + print STDERR "Rule: $rulename\n" ; + print STDERR "Body: $rulebody\n" ; + } + + $ruleBodyStr = $rulebody ; +## # Escape HTML chars before adding markup. +# $ruleBodyStr = esc($ruleBodyStr) ; + + # Inlines + for $k (keys %inline) { +## $s = esc($inline{$k}) ; +## $k = esc($k) ; +## # Assumes escaped <> round tokens. +## $k = quotemeta $k ; +## $ruleBodyStr =~ s/$k/$s/g ; + + $s = $inline{$k}; + $ruleBodyStr =~ s/$k/$s/g ; + } + + if ( $DEBUG ) { + print STDERR "After inlining\n" ; + print STDERR $ruleBodyStr,"\n" ; ; + } + + # Add hrefs - issue if one is a substring of another \W helps. + + #exit if $ruleNum > 2 ; + + ##$ruleId = sane("r".$rulename) ; + + if($rulename eq 'IRIREF') { + print "\n"; + print "\@terminals\n"; + print "\n" ; + } + ## First part of output. + ## $rlabel = '[' . $ruleNum . ']' ; + + $rn = $rulename; + $rn =~ s!^$!!; + ## Second part of output + + ## Third part of the output. + $ruleBodyStr = fixupRule($rulename, $ruleBodyStr) ; + + ##Why no NL needed? + ## printf "%-5s %-25s ::= %s\n", $rlabel, $rn, $ruleBodyStr ; + printf "%-25s ::= %s\n", $rn, $ruleBodyStr ; + +# $rule{$rulename, $rulebody) ; +# print $rulename , "\n" ; +} + +sub readFile { + my $f = $_[0] ; + open(F, "$f") || die "$f: $!"; + my $s = ; + return $s ; +} + +sub esc { + my $s = $_[0] ; + $s =~ s/&/&/g ; + $s =~ s//>/g ; + return $s ; +} + +sub sane { + my $a = $_[0] ; + $a =~ s/\W//g ; + return $a ; +} + +sub fixupRule { + my $head = $_[0] ; + my $body = $_[1] ; + + # Remove unnecessary () + $body =~ s/\(\s*([^()| ]*) \)/$1/g ; + + # Remove outer matching () where there are no inner () + $body =~ s/^\(\s+([^\(]*)\s+\)$/$1/ ; + + # There aren't any of these +## $body =~ s!\(\s+(\S*)\s+\)!$1!g ; + + # Remove <> around tokens. + $body =~ s/<(\w+)>/$1/g ; + + # Specials + # Split long bodies + if ( $head eq "CallExpression" || + $head eq "UnaryExpression" || + $head eq "" || + $head eq "PatternElement" || + $head eq "BuiltInCall" || + $head eq "Aggregate" ) + { + ## Except BNODE, STRLEN etc + ## [138] RegexExpression ::= 'REGEX' '(' Expression ',' Expression ( ',' Expression )? ')' + ## [139] SubstringExpression ::= 'SUBSTR' '(' Expression ',' Expression ( ',' Expression )? ')' + ## [140] StrReplaceExpression ::= 'REPLACE' '(' Expression ',' Expression ',' Expression ( ',' Expression )? ')' + ## [141] ExistsFunc ::= 'EXISTS' GroupGraphPattern + ## [142] NotExistsFunc ::= 'NOT' 'EXISTS' GroupGraphPattern + ## | 'BNODE' ( '(' Expression ')' | ) + + print STDERR "A:",$body,"\n" if $p ; + + ## Fix up for BNODE that uses "|" - put in marker. + $body =~ s%'BNODE' \( '\(' Expression '\)' \| \ \)%XXX-BNODE-XXX%; + + ## | followed by + $body =~ s%\|\s*%\n | %g ; + + ## Replace marker + $body =~ s%XXX-BNODE-XXX%'BNODE' \( '(' Expression ')' \| \ \)%; + } + + if ( $head eq "Aggregate" ) + { + # Strip outer () + $body =~ s/^\(\s*(.*)\s*\)$/$1/ ; + $body =~ s/^\s+// ; + } + + # These failed the outer () test because they have nested () in them + if ( $head eq "QueryPattern" || + $head eq "OrderCondition" ) + { + # Remove outer () + $body =~ s/^\((.*)\)$/$1/ ; + } + + ## Split? +## if ( $head eq "Query" ) +## { +## } + + #Rules where an outer () is unnecessary. + if ( $head eq "GroupCondition" || + $head eq "LimitOffsetClauses" || + $head eq "GraphOrDefault" || + $head eq "ArgList" || + $head eq "ExpressionList" || + $head eq "PathPrimary" || + $head eq "PathMod" || + $head eq "PathPrimary" || + $head eq "PathNegatedPropertySet" || + $head eq "PathOneInPropertySet") + { + $body =~ s/^\(\s*(.*)\s*\)$/$1/ ; + } + + return $body ; +} diff --git a/jena-arq/Grammar/main.jj b/jena-arq/Grammar/main.jj index 9f14ca63300..928d7d94cad 100644 --- a/jena-arq/Grammar/main.jj +++ b/jena-arq/Grammar/main.jj @@ -216,10 +216,6 @@ void SelectClause() : { Var v ; Expr expr ; Node n ; } | n = NumericLiteral() { getQuery().addResultVar((Var)null, NodeValue.makeNode(n)) ; } | n = BooleanLiteral() { getQuery().addResultVar((Var)null, NodeValue.makeNode(n)) ; } ) - // @@ PROBLEMS: expr = FunctionCall() - // (?x) - // looks like a function call and also a " ( ?v )" - //| expr = FunctionCall() { getQuery().addResultVar((Var)null, expr) ; } | #endif // Expressions with () @@ -894,21 +890,35 @@ Element TriplesBlock(ElementPathBlock acc) : { } { return acc ; } } +Node ReifiedTripleBlock(TripleCollector acc) : { Node reifId ; } +{ + reifId = ReifiedTriple(acc) + PropertyList(reifId, acc) // Maybe empty + { return reifId ; } +} + +Node ReifiedTripleBlockPath(TripleCollector acc) : { Node reifId ; } +{ + reifId = ReifiedTriple(acc) + PropertyListPath(reifId, acc) // Maybe empty + { return reifId ; } +} + // ----- Element GraphPatternNotTriples() : { Element el = null ; } { ( -// el = GroupGraphPattern() -// | -// el = UnionGraphPattern() -// | el = GroupOrUnionGraphPattern() | el = OptionalGraphPattern() #ifdef ARQ | el = LateralGraphPattern() + | + el = SemiJoinGraphPattern() + | + el = AntiJoinGraphPattern() #endif | el = MinusGraphPattern() @@ -948,6 +958,16 @@ Element LateralGraphPattern() : { Element el ; } { el = GroupGraphPattern() { return new ElementLateral(el) ; } } + +Element SemiJoinGraphPattern() : { Element el ; } +{ el = GroupGraphPattern() + { return new ElementSemiJoin(el) ; } +} + +Element AntiJoinGraphPattern() : { Element el ; } +{ el = GroupGraphPattern() + { return new ElementAntiJoin(el) ; } +} #endif Element GraphGraphPattern() : { Element el ; Node n ;} @@ -1046,8 +1066,49 @@ Node DataBlockValue() : { Node n ; String iri ; } | n = NumericLiteral() { return n ; } | n = BooleanLiteral() { return n ; } | { return null ; } -| n = QuotedTripleData() { return n ; } +| n = TripleTermData() { return n ; } + +} + +// ---- Reifier, various cases. +// May return null. + +Node Reifier() : { Token tok = null ; Node reifId = null ; } +{ + tok = + (reifId = VarOrReifierId() )? + { return reifId; } +} + +// // No variables. +// Node ReifierData() : { Token tok = null ; Node reifId = null ; } +// { +// tok = +// (reifId = ReifierId() )? +// { return reifId; } +// } +// +// Node ReifierId() : { Node n = null ; String iri ; } +// { +// // No variable +// ( iri = iri() { return createNode(iri) ; } +// | n = BlankNode() { return n ; } +// ) +// } +// +// Node VarOrReifierId() : { Node n = null; } +// { +// ( n = Var() { return n; } +// | n = ReifierId() { return n; } +// ) +// } +Node VarOrReifierId() : { Node n = null; String iri = null; } +{ + ( n = Var() { return n; } + | iri = iri() { return createNode(iri) ; } + | n = BlankNode() { return n ; } + ) } #ifdef ARQ @@ -1152,7 +1213,7 @@ Expr FunctionCall() : { String fname ; Args a ; } if ( AggregateRegistry.isRegistered(fname) ) { // aggregates if ( ! getAllowAggregatesInExpressions() ) - throwParseException("Aggregate expression not legal at this point : "+fname, -1, -1) ; + throwParseException("Aggregate expression not legal at this point : "+fname, token.beginLine, token.beginColumn) ; Aggregator agg = AggregatorFactory.createCustom(fname, a) ; Expr exprAgg = getQuery().allocAggregate(agg) ; return exprAgg ; @@ -1195,8 +1256,6 @@ ExprList ExpressionList() : { Expr expr = null ; ExprList exprList = new ExprLis { return exprList ; } } - - // -------- Construct patterns #ifdef ARQ Template ConstructTemplate() : { QuadAcc acc = new QuadAcc() ; @@ -1251,6 +1310,8 @@ void TriplesSameSubject(TripleCollector acc) : { Node s ; } s = TriplesNode(tempAcc) PropertyList(s, tempAcc) { insert(acc, tempAcc) ; } +| + ReifiedTripleBlock(acc) } void PropertyList(Node s, TripleCollector acc) : { } @@ -1289,7 +1350,7 @@ void Object(Node s, Node p, Path path, TripleCollector acc): { Node o ; } { { ElementPathBlock tempAcc = new ElementPathBlock() ; int mark = tempAcc.mark() ; } o = GraphNode(tempAcc) - { insert(tempAcc, mark, s, p, path, o) ; insert(acc, tempAcc) ; } + { insert(tempAcc, mark, s, p, path, o) ; insert(acc, tempAcc) ; } Annotation(acc, s, p, path, o) } @@ -1306,6 +1367,8 @@ void TriplesSameSubjectPath(TripleCollector acc) : { Node s ; } s = TriplesNodePath(tempAcc) PropertyListPath(s, tempAcc) { insert(acc, tempAcc) ; } +| + ReifiedTripleBlockPath(acc) } void PropertyListPath(Node s, TripleCollector acc) : { } @@ -1328,12 +1391,8 @@ void PropertyListPathNotEmpty(Node s, TripleCollector acc) : | p = VerbSimple() ) ObjectListPath(s, p, path, acc) - )? + )? )* -#if 0 - | - Reification(s, acc) -#endif } Path VerbPath() : {Node p ; Path path ; } @@ -1349,7 +1408,7 @@ Node VerbSimple() : { Node p ; } { return p ; } } -void ObjectListPath(Node s, Node p, Path path, TripleCollector acc): { Node o ; } +void ObjectListPath(Node s, Node p, Path path, TripleCollector acc): { Node o ; } { ObjectPath(s, p, path, acc) ( ObjectPath(s, p, path, acc) )* @@ -1461,6 +1520,16 @@ Path PathMod(Path p) : { long i1 ; long i2 ; } ) } +#ifdef ARQ +long Integer() : {Token t ;} +{ + t = + { return integerValue(t.image) ; } +} +#endif + + + Path PathPrimary() : { String str ; Path p ; Node n ; } { ( @@ -1518,12 +1587,6 @@ P_Path0 PathOneInPropertySet() : { String str ; Node n ; } ) } -long Integer() : {Token t ;} -{ - t = - { return integerValue(t.image) ; } -} - // -------- Triple expansions // Anything that can stand in a node slot and which is @@ -1531,13 +1594,9 @@ long Integer() : {Token t ;} Node TriplesNode(TripleCollectorMark acc) : { Node n ; } { - n = Collection(acc) { return n ; } - | - n = BlankNodePropertyList(acc) { return n ; } -#if 0 + n = Collection(acc) { return n ; } | - n = Reification(null, acc) { return n ; } -#endif + n = BlankNodePropertyList(acc) { return n ; } } Node BlankNodePropertyList(TripleCollector acc) : { Token t ; } @@ -1551,13 +1610,9 @@ Node BlankNodePropertyList(TripleCollector acc) : { Token t ; } Node TriplesNodePath(TripleCollectorMark acc) : { Node n ; } { - n = CollectionPath(acc) { return n ; } - | - n = BlankNodePropertyListPath(acc) { return n ; } -#if 0 + n = CollectionPath(acc) { return n ; } | - n = Reification(null, acc) { return n ; } -#endif + n = BlankNodePropertyListPath(acc) { return n ; } } Node BlankNodePropertyListPath(TripleCollector acc) : { Token t ; } @@ -1569,32 +1624,6 @@ Node BlankNodePropertyListPath(TripleCollector acc) : { Token t ; } { return n ; } } -// << >> as reification. Allows any subject/predicate/object nodes. -#if 0 -Node Reification(Node id, TripleCollectorMark acc) : - { Node s , p , o ; int mark ; Token t ; } -{ - // Insert reification triple before the resulting subtriples (if any) - t = - { int beginLine = t.beginLine; int beginColumn = t.beginColumn; t = null; } - { if ( id == null ) - id = createBNode(beginLine, beginColumn() ; - mark = acc.mark() ; } - s = GraphNode(acc) - { insert(acc, mark, id, nRDFsubject, s) ; - mark = acc.mark() ; - } - p = GraphNode(acc) - { insert(acc, mark, id, nRDFpredicate, p) ; - mark = acc.mark() ; - } - o = GraphNode(acc) - { insert(acc, mark, id, nRDFobject, o) ; } - - { return id ; } -} -#endif - // ------- RDF collections Node Collection(TripleCollectorMark acc) : @@ -1649,95 +1678,229 @@ Node CollectionPath(TripleCollectorMark acc) : return listHead ; } } -// RDF-star Annotation Syntax -void AnnotationPath(TripleCollector acc, Node s, Node p, Path path, Node o) : {} +// ---- RDF-star Annotation Syntax +// Allow paths. Query graph patterns. +void AnnotationPath(TripleCollector acc, Node s, Node p, Path path, Node o) : { Node reifId = null ; } { ( - - { Node pAnn = preConditionAnnotation(s, p, path, o, token.beginLine, token.beginColumn) ; - Node x = createQuotedTriple(s, pAnn, o, token.beginLine, token.beginColumn); - } - PropertyListPathNotEmpty(x, acc) - - )? + { p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); } + reifId = Reifier() + { reifId = insertTripleReifier(acc, reifId, s, p, o, token.beginLine, token.beginColumn) ; } + { setReifierId(reifId); } + | + { + p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); + reifId = getOrAllocReifierId(acc, s, p, o, token.beginLine, token.beginColumn); + } + { clearReifierId(); } + AnnotationBlockPath(acc, reifId) + )* + { clearReifierId(); } } -// RDF-star Annotation Syntax -void Annotation(TripleCollector acc, Node s, Node p, Path path, Node o) : { } +void AnnotationBlockPath(TripleCollector acc, Node reifId) : { } +{ + + PropertyListPathNotEmpty(reifId, acc) + +} + +// ---- RDF-star Annotation Syntax +// No paths. Construct templates. +void Annotation(TripleCollector acc, Node s, Node p, Path path, Node o) : { Node reifId = null ; } { - // path should be null. ( - - { Node pAnn = preConditionAnnotation(s, p, path, o, token.beginLine, token.beginColumn) ; - Node x = createQuotedTriple(s, p, o, token.beginLine, token.beginColumn); - } - PropertyListNotEmpty(x, acc) - - )? + { p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); } + reifId = Reifier() + // @@ Rewrite java? + { reifId = insertTripleReifier(acc, reifId, s, p, o, token.beginLine, token.beginColumn) ; } + { setReifierId(reifId); } + | + { + p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); + reifId = getOrAllocReifierId(acc, s, p, o, token.beginLine, token.beginColumn); + } + { clearReifierId(); } + AnnotationBlock(acc, reifId) + )* +} + +void AnnotationBlock(TripleCollector acc, Node reifId) : { } +{ + // Roll back into Annotation? + + PropertyListNotEmpty(reifId, acc) + } // -------- Nodes in a graph pattern or template +// Object position RDF term, element of a list +// No paths in TriplesNode Node GraphNode(TripleCollectorMark acc) : { Node n ; } { - n = VarOrTerm() { return n ; } + n = VarOrTerm() { return n ; } + | + n = TriplesNode(acc) { return n ; } | - n = TriplesNode(acc) { return n ; } + n = ReifiedTriple(acc) { return n ; } } +// Object position RDF term. NEW-SYNTAX Rename? Node GraphNodePath(TripleCollectorMark acc) : { Node n ; } { n = VarOrTerm() { return n ; } - | +| n = TriplesNodePath(acc) { return n ; } +| + n = ReifiedTriple(acc) { return n ; } } Node VarOrTerm() : { Node n = null ; String iri ; } { ( n = Var() | iri = iri() { return createNode(iri) ; } - | n = RDFLiteral() { return n ; } - | n = NumericLiteral() { return n ; } - | n = BooleanLiteral() { return n ; } - | n = BlankNode() { return n ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() // { return nRDFnil ; } | { return nRDFnil ; } - | n = QuotedTriple() -) - { return n ; } + | n = TripleTerm() { return n; } + ) + { return n; } } -Node QuotedTriple() : { Node n = null ; Token t ; Node s , p , o ; } +Node ReifiedTriple(TripleCollector acc) : { Node reifId = null ; Token tok ; Node s; Node p ; Node o ; } { - t = - s = VarOrTerm() + tok = + s = ReifiedTripleSubject(acc) p = Verb() - o = VarOrTerm() - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } + o = ReifiedTripleObject(acc) + (reifId = Reifier())? + { reifId = insertTripleReifier(acc, reifId, s, p, o, tok.beginLine, tok.beginColumn) ; } + { return reifId; } +} + +Node ReifiedTripleSubject(TripleCollector acc) : { Node s; String iri; } +{ + ( s = Var() + | iri = iri() { s = createNode(iri) ; } + | s = RDFLiteral() + | s = NumericLiteral() + | s = BooleanLiteral() + | s = BlankNode() + | s = ReifiedTriple(acc) + // { s = RDFnil ; } + //| { s = nRDFnil ; } + ) + { return s; } +} + +Node ReifiedTripleObject(TripleCollector acc) : { Node o; String iri; } +{ + ( o = Var() + | iri = iri() { o = createNode(iri) ; } + | o = RDFLiteral() + | o = NumericLiteral() + | o = BooleanLiteral() + | o = BlankNode() + | o = ReifiedTriple(acc) + // { o = nRDFnil ; } + //| { o = nRDFnil ; } + | o = TripleTerm() + ) + { return o; } +} + +Node TripleTerm() : { Node n = null ; Token openToken ; String iri ; Node s , p , o ; } +{ + openToken = + // @@ Alt name. ttSubject()? + s = TripleTermSubject() + p = Verb() + o = TripleTermObject() + { n = createTripleTerm(s, p, o, openToken.beginLine, openToken.beginColumn); } + + { return n ; } +} + +Node TripleTermSubject() : { Node n; String iri; } +{ + ( n = Var() + | iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() + // { return nRDFnil ; } + //| { return nRDFnil ; } + //| n = TripleTerm() { return n; } + ) { return n; } } -Node QuotedTripleData() : { Node n = null ; Token t ; String iri ; Node s , p , o ; } +Node TripleTermObject() : { Node n; String iri; } { - t = - ( s = DataValueTerm() ) - ( iri = iri() { p = createNode(iri) ; } | { p = nRDFtype ; } ) - ( o = DataValueTerm() ) - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } - + ( n = Var() + | iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() + // { return nRDFnil ; } + //| { return nRDFnil ; } + | n = TripleTerm() { return n; } + ) { return n; } } -Node DataValueTerm() : { Node n = null ; String iri ; Node s , p , o ; } +Node TripleTermData() : { Node n = null ; Token openToken ; String iri ; Node s , p , o ; } { - iri = iri() { return createNode(iri) ; } -| n = RDFLiteral() { return n ; } -| n = NumericLiteral() { return n ; } -| n = BooleanLiteral() { return n ; } -| n = QuotedTripleData() { return n ; } + openToken = + // @@ Alt name? ttDataValueTerm + s = TripleTermDataSubject() + ( iri = iri() { p = createNode(iri) ; } | { p = nRDFtype ; } ) + o = TripleTermDataObject() + { n = createTripleTerm(s, p, o, openToken.beginLine, openToken.beginColumn); } + + { return n ; } +} + +Node TripleTermDataSubject() : { Node n = null; String iri; } +{ + ( iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + //| n = TripleTermData() + ) + { return n; } } +Node TripleTermDataObject() : { Node n = null; String iri; } +{ + ( iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = TripleTermData() + ) + { return n; } +} + +// // VarOrTerm except no var, no bnodes +// // @@ Rename? +// Node DataValueTerm() : { Node n = null ; String iri ; Node s , p , o ; } +// { +// iri = iri() { return createNode(iri) ; } +// | n = RDFLiteral() { return n ; } +// | n = NumericLiteral() { return n ; } +// | n = BooleanLiteral() { return n ; } +// | n = TripleTermData() { return n; } +// } + // e.g. Property (if no bNodes) + DESCRIBE Node VarOrIri() : {Node n = null ; String iri ; } { @@ -1893,15 +2056,14 @@ Expr PrimaryExpression() : { Expr expr ; Node n ; } ( expr = BrackettedExpression() { return expr ; } | expr = BuiltInCall() { return expr ; } | expr = iriOrFunction() { return expr ; } -// NOT | n = VarOrTerm() { return asExpr(n) ; } +// NOT | n = ExprVarOrTerm() { return asExpr(n) ; } // Because of iriOrFunction // Can't use iri() here | n = RDFLiteral() { return asExpr(n) ; } | n = NumericLiteral() { return asExpr(n) ; } | n = BooleanLiteral() { return asExpr(n) ; } | n = Var() { return asExpr(n) ; } - | n = ExprQuotedTriple() { return asExpr(n) ; } - + | n = ExprTripleTerm() { return asExpr(n) ; } // and not SPARQL 12 // needs checking. // Use this for ?var(args) @@ -1909,28 +2071,56 @@ Expr PrimaryExpression() : { Expr expr ; Node n ; } ) } -Node ExprVarOrTerm() : { Node n; String s;} +// Node ExprVarOrTerm() : { Node n; String s;} +// { +// ( s = iri() { n = createNode(s); } +// | n = RDFLiteral() +// | n = NumericLiteral() +// | n = BooleanLiteral() +// | n = Var() +// | n = ExprTripleTerm() +// ) +// { return n; } +// } + +Node ExprTripleTerm() : { Token t ; Node s,p,o,n; } { - ( s = iri() { n = createNode(s); } + t = + s = ExprTripleTermSubject() + p = Verb() + o = ExprTripleTermObject() + { n = createTripleTerm(s, p, o, t.beginLine, t.beginColumn); } + + { return n; } +} + +Node ExprTripleTermSubject() : { Node n; String iri; } +{ + ( iri = iri() { n = createNode(iri); } | n = RDFLiteral() | n = NumericLiteral() | n = BooleanLiteral() | n = Var() - | n = ExprQuotedTriple() + //| n = ExprTripleTerm() ) { return n; } } -Node ExprQuotedTriple() : { Token t ; Node s,p,o,n; } -{ t = - s = ExprVarOrTerm() - p = Verb() - o = ExprVarOrTerm() - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } - +Node ExprTripleTermObject() : { Node n; String iri; } +{ + ( iri = iri() { n = createNode(iri); } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = Var() + | n = ExprTripleTerm() + ) { return n; } } + + + Expr BrackettedExpression() : { Expr expr ; } { expr = Expression() { return expr ; } @@ -1944,6 +2134,7 @@ Expr BuiltInCall() : { Expr expr ; | expr = Expression() { return new E_Str(expr) ; } + | expr = Expression() { return new E_Lang(expr) ; } @@ -1951,6 +2142,9 @@ Expr BuiltInCall() : { Expr expr ; expr1 = Expression() expr2 = Expression() { return new E_LangMatches(expr1, expr2) ; } + | expr = Expression() + { return new E_LangDir(expr) ; } + | expr = Expression() { return new E_Datatype(expr) ; } @@ -2094,6 +2288,11 @@ Expr BuiltInCall() : { Expr expr ; | expr1 = Expression() expr2 = Expression() { return new E_StrLang(expr1, expr2) ; } + | expr1 = Expression() + expr2 = Expression() + expr3 = Expression() + { return new E_StrLangDir(expr1, expr2, expr3) ; } + | expr1 = Expression() expr2 = Expression() { return new E_StrDatatype(expr1, expr2) ; } @@ -2114,6 +2313,13 @@ Expr BuiltInCall() : { Expr expr ; | expr = Expression() { return new E_IsNumeric(expr) ; } + + | expr = Expression() + { return new E_HasLang(expr) ; } + + | expr = Expression() + { return new E_HasLangDir(expr) ; } + | // Regular expression matcher expr = RegexExpression() { return expr ; } @@ -2121,10 +2327,12 @@ Expr BuiltInCall() : { Expr expr ; | expr = NotExistsFunc() { return expr ; } -| expr = Expression() + | expr = Expression() { return new E_IsTriple(expr) ; } - | expr1 = Expression() expr2 = Expression() expr3 = Expression() + | expr1 = Expression() + expr2 = Expression() + expr3 = Expression() { return new E_TripleFn(expr1, expr2, expr3) ; } | expr = Expression() @@ -2327,7 +2535,7 @@ Expr iriOrFunction() : { String iri ; Args a = null ; } if ( AggregateRegistry.isRegistered(iri) ) { // aggregates if ( ! getAllowAggregatesInExpressions() ) - throwParseException("Aggregate expression not legal at this point : "+iri, -1, -1) ; + throwParseException("Aggregate expression not legal at this point : "+iri, token.beginLine, token.beginColumn) ; Aggregator agg = AggregatorFactory.createCustom(iri, a) ; Expr exprAgg = getQuery().allocAggregate(agg) ; return exprAgg ; @@ -2351,17 +2559,18 @@ Expr VarOrFunction() : { Var v ; ExprList a = null ; } #endif -Node RDFLiteral() : { Token t ; String lex = null ; } +Node RDFLiteral() : { Token t ; String lex = null ; String uri = null ; } { lex = String() // Optional lang tag and datatype. - { String lang = null ; String uri = null ; } ( - ( t = { lang = stripChars(t.image, 1) ; } ) + t = + { return createLiteralLang(lex, t.image, token.beginLine, token.beginColumn); } | - ( uri = iri() ) + uri = iri() + { return createLiteralDT(lex, uri, token.beginLine, token.beginColumn); } )? - { return createLiteral(lex, lang, uri) ; } + { return createLiteralString(lex, token.beginLine, token.beginColumn) ; } } @@ -2501,7 +2710,7 @@ TOKEN: | | ["0"-"9"]) ((|".")* )? > | > | > -| ()+("-" ()+)* > +| ()+("-" ()+)* ( "--" ()* )? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } @@ -2556,7 +2765,9 @@ TOKEN [IGNORE_CASE] : #ifdef ARQ | < LET: "LET" > -| < LATERAL: "LATERAL" > +| < LATERAL: "lateral" > +| < SEMIJOIN: "semijoin" > +| < ANTIJOIN: "antijoin" > | < UNFOLD: "unfold" > #endif @@ -2615,15 +2826,20 @@ TOKEN [IGNORE_CASE] : | < STR: "str" > | < STRLANG: "strlang" > +| < STRLANGDIR: "strlangdir" > + | < STRDT: "strdt" > | < DTYPE: "datatype" > | < LANG: "lang" > | < LANGMATCHES: "langmatches" > +| < LANGDIR: "langdir" > | < IS_URI: "isURI" > | < IS_IRI: "isIRI" > | < IS_BLANK: "isBlank" > | < IS_LITERAL: "isLiteral" > | < IS_NUMERIC: "isNumeric" > +| < HAS_LANG: "hasLang" > +| < HAS_LANGDIR: "hasLangDir" > | < REGEX: "regex" > | < SAME_TERM: "sameTerm" > @@ -2796,13 +3012,15 @@ TOKEN : | < LE: "<=" > // Maybe: | "=>" > | < GE: ">=" > // Maybe: | "=<" > -| < GT2: ">>" > +| < L_TRIPLE: "<<(" > +| < R_TRIPLE: ")>>" > | < LT2: "<<" > -| -| +| < GT2: ">>" > +| < L_ANN: "{|" > +| < R_ANN: "|}" > -| < BANG: "!" > | < TILDE: "~" > +| < BANG: "!" > | < COLON: ":" > | < SC_OR: "||" > diff --git a/jena-arq/Grammar/sparql2bnf b/jena-arq/Grammar/sparql2bnf new file mode 100755 index 00000000000..74b1b9034db --- /dev/null +++ b/jena-arq/Grammar/sparql2bnf @@ -0,0 +1,7 @@ +#!/usr/bin/bash +## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +echo "Grammar to sparql.bnf (text)" + +( + jj2bnf 'sparql_12.txt' 'tokens.txt' +) > sparql.bnf diff --git a/jena-arq/Grammar/sparql2html b/jena-arq/Grammar/sparql2html index 19320cd406a..e2d449bf978 100755 --- a/jena-arq/Grammar/sparql2html +++ b/jena-arq/Grammar/sparql2html @@ -1,13 +1,21 @@ +#!/usr/bin/bash ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 -echo "Grammar to X.html, fragments file to Y.html" +echo "Grammar output in sparql-grammar.html" ##jj2html 'sparql_11.txt' 'tokens.txt' > X11.html ##grammarExtracts < X11.html > Y11.html -jj2html 'sparql_12.txt' 'tokens.txt' > X12.html -grammarExtracts < X12.html > Y12.html +( + echo " " + jj2html 'sparql_12.txt' 'tokens.txt' + echo " " +) > sparql-grammar.html + +## Produce a standlone web page +## mv sparql-grammar.html X12.hmtl +## grammarExtracts < sparql-grammar.html > sparql-html.thml ## jj2html 'arq_12.txt' 'tokens.txt' > X.html ## grammarExtracts < X.html > Y.html -echo "Check X and Y for IRI_REF because \" became '" \ No newline at end of file +##?? echo "Check X and Y for IRI_REF because \" became '" diff --git a/jena-arq/Grammar/sparql_12.jj b/jena-arq/Grammar/sparql_12.jj index 0073d7880b8..8154839f003 100644 --- a/jena-arq/Grammar/sparql_12.jj +++ b/jena-arq/Grammar/sparql_12.jj @@ -525,6 +525,18 @@ Element TriplesBlock(ElementPathBlock acc) : { } ( (TriplesBlock(acc))? )? { return acc ; } } +Node ReifiedTripleBlock(TripleCollector acc) : { Node reifId ; } +{ + reifId = ReifiedTriple(acc) + PropertyList(reifId, acc) + { return reifId ; } +} +Node ReifiedTripleBlockPath(TripleCollector acc) : { Node reifId ; } +{ + reifId = ReifiedTriple(acc) + PropertyListPath(reifId, acc) + { return reifId ; } +} Element GraphPatternNotTriples() : { Element el = null ; } { ( @@ -639,7 +651,20 @@ Node DataBlockValue() : { Node n ; String iri ; } | n = NumericLiteral() { return n ; } | n = BooleanLiteral() { return n ; } | { return null ; } -| n = QuotedTripleData() { return n ; } +| n = TripleTermData() { return n ; } +} +Node Reifier() : { Token tok = null ; Node reifId = null ; } +{ + tok = + (reifId = VarOrReifierId() )? + { return reifId; } +} +Node VarOrReifierId() : { Node n = null; String iri = null; } +{ + ( n = Var() { return n; } + | iri = iri() { return createNode(iri) ; } + | n = BlankNode() { return n ; } + ) } Element MinusGraphPattern() : { Element el ; } { @@ -683,7 +708,7 @@ Expr FunctionCall() : { String fname ; Args a ; } { if ( AggregateRegistry.isRegistered(fname) ) { if ( ! getAllowAggregatesInExpressions() ) - throwParseException("Aggregate expression not legal at this point : "+fname, -1, -1) ; + throwParseException("Aggregate expression not legal at this point : "+fname, token.beginLine, token.beginColumn) ; Aggregator agg = AggregatorFactory.createCustom(fname, a) ; Expr exprAgg = getQuery().allocAggregate(agg) ; return exprAgg ; @@ -747,6 +772,8 @@ void TriplesSameSubject(TripleCollector acc) : { Node s ; } s = TriplesNode(tempAcc) PropertyList(s, tempAcc) { insert(acc, tempAcc) ; } +| + ReifiedTripleBlock(acc) } void PropertyList(Node s, TripleCollector acc) : { } { @@ -790,6 +817,8 @@ void TriplesSameSubjectPath(TripleCollector acc) : { Node s ; } s = TriplesNodePath(tempAcc) PropertyListPath(s, tempAcc) { insert(acc, tempAcc) ; } +| + ReifiedTripleBlockPath(acc) } void PropertyListPath(Node s, TripleCollector acc) : { } { @@ -911,16 +940,11 @@ P_Path0 PathOneInPropertySet() : { String str ; Node n ; } ) ) } -long Integer() : {Token t ;} -{ - t = - { return integerValue(t.image) ; } -} Node TriplesNode(TripleCollectorMark acc) : { Node n ; } { - n = Collection(acc) { return n ; } + n = Collection(acc) { return n ; } | - n = BlankNodePropertyList(acc) { return n ; } + n = BlankNodePropertyList(acc) { return n ; } } Node BlankNodePropertyList(TripleCollector acc) : { Token t ; } { @@ -932,9 +956,9 @@ Node BlankNodePropertyList(TripleCollector acc) : { Token t ; } } Node TriplesNodePath(TripleCollectorMark acc) : { Node n ; } { - n = CollectionPath(acc) { return n ; } + n = CollectionPath(acc) { return n ; } | - n = BlankNodePropertyListPath(acc) { return n ; } + n = BlankNodePropertyListPath(acc) { return n ; } } Node BlankNodePropertyListPath(TripleCollector acc) : { Token t ; } { @@ -992,80 +1016,177 @@ Node CollectionPath(TripleCollectorMark acc) : insert(acc, lastCell, nRDFrest, nRDFnil) ; return listHead ; } } -void AnnotationPath(TripleCollector acc, Node s, Node p, Path path, Node o) : {} +void AnnotationPath(TripleCollector acc, Node s, Node p, Path path, Node o) : { Node reifId = null ; } { ( - - { Node pAnn = preConditionAnnotation(s, p, path, o, token.beginLine, token.beginColumn) ; - Node x = createQuotedTriple(s, pAnn, o, token.beginLine, token.beginColumn); - } - PropertyListPathNotEmpty(x, acc) - - )? + { p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); } + reifId = Reifier() + { reifId = insertTripleReifier(acc, reifId, s, p, o, token.beginLine, token.beginColumn) ; } + { setReifierId(reifId); } + | + { + p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); + reifId = getOrAllocReifierId(acc, s, p, o, token.beginLine, token.beginColumn); + } + { clearReifierId(); } + AnnotationBlockPath(acc, reifId) + )* + { clearReifierId(); } +} +void AnnotationBlockPath(TripleCollector acc, Node reifId) : { } +{ + + PropertyListPathNotEmpty(reifId, acc) + } -void Annotation(TripleCollector acc, Node s, Node p, Path path, Node o) : { } +void Annotation(TripleCollector acc, Node s, Node p, Path path, Node o) : { Node reifId = null ; } { ( - - { Node pAnn = preConditionAnnotation(s, p, path, o, token.beginLine, token.beginColumn) ; - Node x = createQuotedTriple(s, p, o, token.beginLine, token.beginColumn); - } - PropertyListNotEmpty(x, acc) - - )? + { p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); } + reifId = Reifier() + { reifId = insertTripleReifier(acc, reifId, s, p, o, token.beginLine, token.beginColumn) ; } + { setReifierId(reifId); } + | + { + p = preConditionReifier(s, p, path, o, token.beginLine, token.beginColumn); + reifId = getOrAllocReifierId(acc, s, p, o, token.beginLine, token.beginColumn); + } + { clearReifierId(); } + AnnotationBlock(acc, reifId) + )* +} +void AnnotationBlock(TripleCollector acc, Node reifId) : { } +{ + + PropertyListNotEmpty(reifId, acc) + } Node GraphNode(TripleCollectorMark acc) : { Node n ; } { - n = VarOrTerm() { return n ; } + n = VarOrTerm() { return n ; } + | + n = TriplesNode(acc) { return n ; } | - n = TriplesNode(acc) { return n ; } + n = ReifiedTriple(acc) { return n ; } } Node GraphNodePath(TripleCollectorMark acc) : { Node n ; } { n = VarOrTerm() { return n ; } - | +| n = TriplesNodePath(acc) { return n ; } +| + n = ReifiedTriple(acc) { return n ; } } Node VarOrTerm() : { Node n = null ; String iri ; } { ( n = Var() | iri = iri() { return createNode(iri) ; } - | n = RDFLiteral() { return n ; } - | n = NumericLiteral() { return n ; } - | n = BooleanLiteral() { return n ; } - | n = BlankNode() { return n ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() | { return nRDFnil ; } - | n = QuotedTriple() -) - { return n ; } + | n = TripleTerm() { return n; } + ) + { return n; } } -Node QuotedTriple() : { Node n = null ; Token t ; Node s , p , o ; } +Node ReifiedTriple(TripleCollector acc) : { Node reifId = null ; Token tok ; Node s; Node p ; Node o ; } { - t = - s = VarOrTerm() + tok = + s = ReifiedTripleSubject(acc) p = Verb() - o = VarOrTerm() - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } + o = ReifiedTripleObject(acc) + (reifId = Reifier())? + { reifId = insertTripleReifier(acc, reifId, s, p, o, tok.beginLine, tok.beginColumn) ; } + { return reifId; } +} +Node ReifiedTripleSubject(TripleCollector acc) : { Node s; String iri; } +{ + ( s = Var() + | iri = iri() { s = createNode(iri) ; } + | s = RDFLiteral() + | s = NumericLiteral() + | s = BooleanLiteral() + | s = BlankNode() + | s = ReifiedTriple(acc) + ) + { return s; } +} +Node ReifiedTripleObject(TripleCollector acc) : { Node o; String iri; } +{ + ( o = Var() + | iri = iri() { o = createNode(iri) ; } + | o = RDFLiteral() + | o = NumericLiteral() + | o = BooleanLiteral() + | o = BlankNode() + | o = ReifiedTriple(acc) + | o = TripleTerm() + ) + { return o; } +} +Node TripleTerm() : { Node n = null ; Token openToken ; String iri ; Node s , p , o ; } +{ + openToken = + s = TripleTermSubject() + p = Verb() + o = TripleTermObject() + { n = createTripleTerm(s, p, o, openToken.beginLine, openToken.beginColumn); } + + { return n ; } +} +Node TripleTermSubject() : { Node n; String iri; } +{ + ( n = Var() + | iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() + ) { return n; } } -Node QuotedTripleData() : { Node n = null ; Token t ; String iri ; Node s , p , o ; } +Node TripleTermObject() : { Node n; String iri; } { - t = - ( s = DataValueTerm() ) - ( iri = iri() { p = createNode(iri) ; } | { p = nRDFtype ; } ) - ( o = DataValueTerm() ) - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } - + ( n = Var() + | iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = BlankNode() + | n = TripleTerm() { return n; } + ) { return n; } } -Node DataValueTerm() : { Node n = null ; String iri ; Node s , p , o ; } +Node TripleTermData() : { Node n = null ; Token openToken ; String iri ; Node s , p , o ; } { - iri = iri() { return createNode(iri) ; } -| n = RDFLiteral() { return n ; } -| n = NumericLiteral() { return n ; } -| n = BooleanLiteral() { return n ; } -| n = QuotedTripleData() { return n ; } + openToken = + s = TripleTermDataSubject() + ( iri = iri() { p = createNode(iri) ; } | { p = nRDFtype ; } ) + o = TripleTermDataObject() + { n = createTripleTerm(s, p, o, openToken.beginLine, openToken.beginColumn); } + + { return n ; } +} +Node TripleTermDataSubject() : { Node n = null; String iri; } +{ + ( iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + ) + { return n; } +} +Node TripleTermDataObject() : { Node n = null; String iri; } +{ + ( iri = iri() { n = createNode(iri) ; } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = TripleTermData() + ) + { return n; } } Node VarOrIri() : {Node n = null ; String iri ; } { @@ -1194,27 +1315,38 @@ Expr PrimaryExpression() : { Expr expr ; Node n ; } | n = NumericLiteral() { return asExpr(n) ; } | n = BooleanLiteral() { return asExpr(n) ; } | n = Var() { return asExpr(n) ; } - | n = ExprQuotedTriple() { return asExpr(n) ; } + | n = ExprTripleTerm() { return asExpr(n) ; } ) } -Node ExprVarOrTerm() : { Node n; String s;} +Node ExprTripleTerm() : { Token t ; Node s,p,o,n; } { - ( s = iri() { n = createNode(s); } + t = + s = ExprTripleTermSubject() + p = Verb() + o = ExprTripleTermObject() + { n = createTripleTerm(s, p, o, t.beginLine, t.beginColumn); } + + { return n; } +} +Node ExprTripleTermSubject() : { Node n; String iri; } +{ + ( iri = iri() { n = createNode(iri); } | n = RDFLiteral() | n = NumericLiteral() | n = BooleanLiteral() | n = Var() - | n = ExprQuotedTriple() ) { return n; } } -Node ExprQuotedTriple() : { Token t ; Node s,p,o,n; } -{ t = - s = ExprVarOrTerm() - p = Verb() - o = ExprVarOrTerm() - { n = createQuotedTriple(s, p, o, t.beginLine, t.beginColumn); } - +Node ExprTripleTermObject() : { Node n; String iri; } +{ + ( iri = iri() { n = createNode(iri); } + | n = RDFLiteral() + | n = NumericLiteral() + | n = BooleanLiteral() + | n = Var() + | n = ExprTripleTerm() + ) { return n; } } Expr BrackettedExpression() : { Expr expr ; } @@ -1234,6 +1366,8 @@ Expr BuiltInCall() : { Expr expr ; | expr1 = Expression() expr2 = Expression() { return new E_LangMatches(expr1, expr2) ; } + | expr = Expression() + { return new E_LangDir(expr) ; } | expr = Expression() { return new E_Datatype(expr) ; } | gn = Var() @@ -1294,6 +1428,10 @@ Expr BuiltInCall() : { Expr expr ; { return new E_Conditional(expr, expr1, expr2) ; } | expr1 = Expression() expr2 = Expression() { return new E_StrLang(expr1, expr2) ; } + | expr1 = Expression() + expr2 = Expression() + expr3 = Expression() + { return new E_StrLangDir(expr1, expr2, expr3) ; } | expr1 = Expression() expr2 = Expression() { return new E_StrDatatype(expr1, expr2) ; } | expr1 = Expression() expr2 = Expression() @@ -1308,13 +1446,19 @@ Expr BuiltInCall() : { Expr expr ; { return new E_IsLiteral(expr) ; } | expr = Expression() { return new E_IsNumeric(expr) ; } + | expr = Expression() + { return new E_HasLang(expr) ; } + | expr = Expression() + { return new E_HasLangDir(expr) ; } | expr = RegexExpression() { return expr ; } | expr = ExistsFunc() { return expr ; } | expr = NotExistsFunc() { return expr ; } -| expr = Expression() + | expr = Expression() { return new E_IsTriple(expr) ; } - | expr1 = Expression() expr2 = Expression() expr3 = Expression() + | expr1 = Expression() + expr2 = Expression() + expr3 = Expression() { return new E_TripleFn(expr1, expr2, expr3) ; } | expr = Expression() { return new E_TripleSubject(expr) ; } @@ -1421,7 +1565,7 @@ Expr iriOrFunction() : { String iri ; Args a = null ; } return asExpr(createNode(iri)) ; if ( AggregateRegistry.isRegistered(iri) ) { if ( ! getAllowAggregatesInExpressions() ) - throwParseException("Aggregate expression not legal at this point : "+iri, -1, -1) ; + throwParseException("Aggregate expression not legal at this point : "+iri, token.beginLine, token.beginColumn) ; Aggregator agg = AggregatorFactory.createCustom(iri, a) ; Expr exprAgg = getQuery().allocAggregate(agg) ; return exprAgg ; @@ -1429,16 +1573,17 @@ Expr iriOrFunction() : { String iri ; Args a = null ; } return new E_Function(iri, a) ; } } -Node RDFLiteral() : { Token t ; String lex = null ; } +Node RDFLiteral() : { Token t ; String lex = null ; String uri = null ; } { lex = String() - { String lang = null ; String uri = null ; } ( - ( t = { lang = stripChars(t.image, 1) ; } ) + t = + { return createLiteralLang(lex, t.image, token.beginLine, token.beginColumn); } | - ( uri = iri() ) + uri = iri() + { return createLiteralDT(lex, uri, token.beginLine, token.beginColumn); } )? - { return createLiteral(lex, lang, uri) ; } + { return createLiteralString(lex, token.beginLine, token.beginColumn) ; } } Node NumericLiteral() : { Node n ; } { @@ -1532,7 +1677,7 @@ TOKEN: | | ["0"-"9"]) ((|".")* )? > | > | > -| ()+("-" ()+)* > +| ()+("-" ()+)* ( "--" ()* )? > | <#A2Z: ["a"-"z","A"-"Z"]> | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> } @@ -1600,15 +1745,19 @@ TOKEN [IGNORE_CASE] : | < URI: "uri" > | < STR: "str" > | < STRLANG: "strlang" > +| < STRLANGDIR: "strlangdir" > | < STRDT: "strdt" > | < DTYPE: "datatype" > | < LANG: "lang" > | < LANGMATCHES: "langmatches" > +| < LANGDIR: "langdir" > | < IS_URI: "isURI" > | < IS_IRI: "isIRI" > | < IS_BLANK: "isBlank" > | < IS_LITERAL: "isLiteral" > | < IS_NUMERIC: "isNumeric" > +| < HAS_LANG: "hasLang" > +| < HAS_LANGDIR: "hasLangDir" > | < REGEX: "regex" > | < SAME_TERM: "sameTerm" > | < RAND: "RAND" > @@ -1729,12 +1878,14 @@ TOKEN : | < LT: "<" > | < LE: "<=" > | < GE: ">=" > -| < GT2: ">>" > +| < L_TRIPLE: "<<(" > +| < R_TRIPLE: ")>>" > | < LT2: "<<" > -| -| -| < BANG: "!" > +| < GT2: ">>" > +| < L_ANN: "{|" > +| < R_ANN: "|}" > | < TILDE: "~" > +| < BANG: "!" > | < COLON: ":" > | < SC_OR: "||" > | < SC_AND: "&&" > diff --git a/jena-arq/Grammar/sparql_12.txt b/jena-arq/Grammar/sparql_12.txt deleted file mode 100644 index 98411b5f43c..00000000000 --- a/jena-arq/Grammar/sparql_12.txt +++ /dev/null @@ -1,486 +0,0 @@ - -DOCUMENT START -TOKENS - SKIP : { -" " -| "\t" -| "\n" -| "\r" -| "\f" -} - - SPECIAL : { - -} - - TOKEN : { -<#WS: " " | "\t" | "\n" | "\r" | "\f"> -| <#WSC: | > -} - - TOKEN : { -","<","\"","{","}","^","\\","|","`","\u0000"-" "] | )* ">"> -| )? ":"> -| > -| | ["0"-"9"]) (( | ".")* )?> -| > -| > -| ()+ ("-" ()+)*> -| <#A2Z: ["a"-"z","A"-"Z"]> -| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> -} - - TOKEN : { - -} - - TOKEN [IGNORE_CASE] : { - -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -} - - TOKEN [IGNORE_CASE] : { - -| -| -| ()* > -| ()* > -| ()* > -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -} - - TOKEN : { -<#DIGITS: (["0"-"9"])+> -| > -| )? "." > -| | "." (["0"-"9"])+ | (["0"-"9"])+ > -| > -| > -| > -| > -| > -| > -| <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+> -| <#QUOTE_3D: "\"\"\""> -| <#QUOTE_3S: "\'\'\'"> -| -| <#UCHAR: > -| <#UCHAR4: "\\" "u" > -| <#UCHAR8: "\\" "U" > -| | )* "\'"> -| | )* "\""> -| (("\'" | "\'\'")? (~["\'","\\"] | | ))* > -| (("\"" | "\"\"")? (~["\"","\\"] | | ))* > -} - - TOKEN : { - -| -| ()* > -| -| -| -| -| ()* > -| -| -| -| "> -| -| ="> -| >"> -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| "> -| -| -} - - TOKEN : { -<#PN_CHARS_BASE: ["A"-"Z"] | ["a"-"z"] | ["\u00c0"-"\u00d6"] | ["\u00d8"-"\u00f6"] | ["\u00f8"-"\u02ff"] | ["\u0370"-"\u037d"] | ["\u037f"-"\u1fff"] | ["\u200c"-"\u200d"] | ["\u2070"-"\u218f"] | ["\u2c00"-"\u2fef"] | ["\u3001"-"\ud7ff"] | ["\uf900"-"\ufffd"]> -| <#PN_CHARS_U: | "_"> -| <#PN_CHARS: | "-" | ["0"-"9"] | "\u00b7" | ["\u0300"-"\u036f"] | ["\u203f"-"\u2040"]> -| <#PN_PREFIX: (( | ".")* )?> -| <#PN_LOCAL: ( | ":" | ["0"-"9"] | ) (( | "." | ":" | )* ( | ":" | ))?> -| <#VARNAME: ( | ["0"-"9"]) ( | ["0"-"9"] | "\u00b7" | ["\u0300"-"\u036f"] | ["\u203f"-"\u2040"])*> -| <#PN_LOCAL_ESC: "\\" ("_" | "~" | "." | "-" | "!" | "$" | "&" | "\'" | "(" | ")" | "*" | "+" | "," | ";" | "=" | "/" | "?" | "#" | "@" | "%")> -| <#PLX: | > -| <#HEX: ["0"-"9"] | ["A"-"F"] | ["a"-"f"]> -| <#PERCENT: "%" > -} - - TOKEN : { -<#UNKNOWN: (~[" ","\t","\n","\r","\f"])+> -} - -NON-TERMINALS - QueryUnit := Query - Query := Prologue ( SelectQuery | ConstructQuery | DescribeQuery | AskQuery ) ValuesClause - UpdateUnit := Update - Prologue := ( BaseDecl | PrefixDecl )* - BaseDecl := IRIREF - PrefixDecl := IRIREF - SelectQuery := SelectClause ( DatasetClause )* WhereClause SolutionModifier - SubSelect := SelectClause WhereClause SolutionModifier ValuesClause - SelectClause :=