Skip to content

Commit

Permalink
apacheGH-2887: Tolerate non-NFC IRIs (IRIProviderJenaIRI)
Browse files Browse the repository at this point in the history
  • Loading branch information
afs committed Dec 9, 2024
1 parent 3d34ac9 commit 35cacd0
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -249,23 +249,24 @@ private static IRI exceptions(IRI iri, String iriStr) {
// Global settings below; this section is for conditional filtering.
// See also Checker.iriViolations for WARN filtering.
switch(code) {
case Violation.PROHIBITED_COMPONENT_PRESENT:
case Violation.PROHIBITED_COMPONENT_PRESENT->{
// Allow "u:p@" when non-strict.
// Jena3 compatibility.
if ( isHTTP(iri) && ! STRICT_HTTP && v.getComponent() == IRIComponents.USER )
continue;
break;
case Violation.SCHEME_PATTERN_MATCH_FAILED:
}
case Violation.SCHEME_PATTERN_MATCH_FAILED->{
if ( isURN(iri) && ! STRICT_URN )
continue;
if ( isFILE(iri) )
continue;
break;
case Violation.REQUIRED_COMPONENT_MISSING:
}
case Violation.REQUIRED_COMPONENT_MISSING->{
// jena-iri handling of "file:" URIs is only for (an interpretation of) RFC 1738.
// RFC8089 allows relative file URIs and a wider use of characters.
if ( isFILE(iri) )
continue;
}
}
// Signal first error.
String msg = v.getShortMessage();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,11 @@ public static IRIFactory iriCheckerFactory() {
setErrorWarning(iriCheckerFactory, ViolationCodes.NON_INITIAL_DOT_SEGMENT, false, false);

// == Character related.
//setErrorWarning(iriFactoryInst, ViolationCodes.NOT_NFC, false, false);
// Causes confusion! And this is only advice in RDF Concepts.
setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFC, false, false);
// NFKC is not mentioned in RDF 1.1. Switch off.
setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFKC, false, false);
// The MAYBE ViolationCodes are never generated.

// ** Applies to various unicode blocks.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@

/**
* Test of parsing and schema violations.
* <p>s
* This is the test suite that compares result with jena-iri.
* <p>
* This is the test suite that compares results with jena-iri.
* See also {@link TestIRIxSyntaxRFC3986} for RDF 3986 syntax only parsing.
*/
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
Expand Down Expand Up @@ -90,6 +90,9 @@ public TestIRIxJenaSystem(String name, IRIProvider provider) {

@Test public void parse_18() { good("/z/a:b"); }

// This character is from a report on users@jena.
@Test public void parse_nfc() { good("http://host/ή"); }

// ---- bad

// Leading ':'
Expand Down

0 comments on commit 35cacd0

Please sign in to comment.