diff --git a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java index 222e0e18938..e6c60ceb35a 100644 --- a/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java +++ b/jena-core/src/main/java/org/apache/jena/irix/IRIProviderJenaIRI.java @@ -249,23 +249,24 @@ private static IRI exceptions(IRI iri, String iriStr) { // Global settings below; this section is for conditional filtering. // See also Checker.iriViolations for WARN filtering. switch(code) { - case Violation.PROHIBITED_COMPONENT_PRESENT: + case Violation.PROHIBITED_COMPONENT_PRESENT->{ // Allow "u:p@" when non-strict. // Jena3 compatibility. if ( isHTTP(iri) && ! STRICT_HTTP && v.getComponent() == IRIComponents.USER ) continue; - break; - case Violation.SCHEME_PATTERN_MATCH_FAILED: + } + case Violation.SCHEME_PATTERN_MATCH_FAILED->{ if ( isURN(iri) && ! STRICT_URN ) continue; if ( isFILE(iri) ) continue; - break; - case Violation.REQUIRED_COMPONENT_MISSING: + } + case Violation.REQUIRED_COMPONENT_MISSING->{ // jena-iri handling of "file:" URIs is only for (an interpretation of) RFC 1738. // RFC8089 allows relative file URIs and a wider use of characters. if ( isFILE(iri) ) continue; + } } // Signal first error. String msg = v.getShortMessage(); diff --git a/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java b/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java index 16392369806..968e48a7ee0 100644 --- a/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java +++ b/jena-core/src/main/java/org/apache/jena/irix/SetupJenaIRI.java @@ -99,9 +99,11 @@ public static IRIFactory iriCheckerFactory() { setErrorWarning(iriCheckerFactory, ViolationCodes.NON_INITIAL_DOT_SEGMENT, false, false); // == Character related. - //setErrorWarning(iriFactoryInst, ViolationCodes.NOT_NFC, false, false); + // Causes confusion! And this is only advice in RDF Concepts. + setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFC, false, false); // NFKC is not mentioned in RDF 1.1. Switch off. setErrorWarning(iriCheckerFactory, ViolationCodes.NOT_NFKC, false, false); + // The MAYBE ViolationCodes are never generated. // ** Applies to various unicode blocks. diff --git a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java index cb3543f3f7a..86076feb265 100644 --- a/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java +++ b/jena-core/src/test/java/org/apache/jena/irix/TestIRIxJenaSystem.java @@ -33,8 +33,8 @@ /** * Test of parsing and schema violations. - *

s - * This is the test suite that compares result with jena-iri. + *

+ * This is the test suite that compares results with jena-iri. * See also {@link TestIRIxSyntaxRFC3986} for RDF 3986 syntax only parsing. */ @FixMethodOrder(MethodSorters.NAME_ASCENDING) @@ -90,6 +90,9 @@ public TestIRIxJenaSystem(String name, IRIProvider provider) { @Test public void parse_18() { good("/z/a:b"); } + // This character is from a report on users@jena. + @Test public void parse_nfc() { good("http://host/ή"); } + // ---- bad // Leading ':'