From cdfe7dc41d05e5dbbf3ec4199b318f68f0c8e4df Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Mon, 29 Apr 2024 18:29:19 +0400 Subject: [PATCH] Add email and idn-email format support (#103) Related to #54 --- README.md | 21 +-- .../general/FormatAssertionFactory.kt | 4 + .../formats/AbstractEmailFormatValidator.kt | 127 ++++++++++++++++++ .../internal/formats/EmailFormatValidator.kt | 3 + .../formats/IdnEmailFormatValidator.kt | 13 ++ .../json/schema/internal/formats/UriSpec.kt | 7 +- .../formats/UriTemplateFormatValidator.kt | 27 +--- .../schema/internal/formats/Validation.kt | 29 ++++ .../json/schema/internal/util/UnicodeUtil.kt | 9 ++ .../JsonSchemaEmailFormatValidationTest.kt | 40 ++++++ .../JsonSchemaIdnEmailFormatValidationTest.kt | 21 +++ ...onSchemaUriTemplateFormatValidationTest.kt | 3 +- .../schema/suite/AbstractSchemaTestSuite.kt | 2 - 13 files changed, 258 insertions(+), 48 deletions(-) create mode 100644 src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/AbstractEmailFormatValidator.kt create mode 100644 src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/EmailFormatValidator.kt create mode 100644 src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnEmailFormatValidator.kt create mode 100644 src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt create mode 100644 src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaEmailFormatValidationTest.kt create mode 100644 src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaIdnEmailFormatValidationTest.kt diff --git a/README.md b/README.md index 0f720db9..38a132d7 100644 --- a/README.md +++ b/README.md @@ -332,23 +332,10 @@ val valid = schema.validate(elementToValidate, errors::add) ## Format assertion -The library supports `format` assertion. Not all formats are supported yet. The supported formats are: -* date -* time -* date-time -* duration -* json-pointer -* relative-json-pointer -* ipv4 -* ipv6 -* uuid -* hostname -* idn-hostname -* uri -* uri-reference -* uri-template -* iri -* iri-reference +The library supports `format` assertion. +Almost all formats from [JSON schema draft 2020-12](https://json-schema.org/draft/2020-12/draft-bhutton-json-schema-validation-01#section-7.3) are supported. +Unsupported formats: +* regex But there is an API to implement the user's defined format validation. The [FormatValidator](src/commonMain/kotlin/io/github/optimumcode/json/schema/ValidationError.kt) interface can be user for that. diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt index 95b70f89..c38eecf0 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/factories/general/FormatAssertionFactory.kt @@ -16,7 +16,9 @@ import io.github.optimumcode.json.schema.internal.factories.AbstractAssertionFac import io.github.optimumcode.json.schema.internal.formats.DateFormatValidator import io.github.optimumcode.json.schema.internal.formats.DateTimeFormatValidator import io.github.optimumcode.json.schema.internal.formats.DurationFormatValidator +import io.github.optimumcode.json.schema.internal.formats.EmailFormatValidator import io.github.optimumcode.json.schema.internal.formats.HostnameFormatValidator +import io.github.optimumcode.json.schema.internal.formats.IdnEmailFormatValidator import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValidator import io.github.optimumcode.json.schema.internal.formats.IpV4FormatValidator import io.github.optimumcode.json.schema.internal.formats.IpV6FormatValidator @@ -82,6 +84,8 @@ internal sealed class FormatAssertionFactory( "iri" to IriFormatValidator, "iri-reference" to IriReferenceFormatValidator, "uri-template" to UriTemplateFormatValidator, + "email" to EmailFormatValidator, + "idn-email" to IdnEmailFormatValidator, ) } } diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/AbstractEmailFormatValidator.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/AbstractEmailFormatValidator.kt new file mode 100644 index 00000000..2fddbed1 --- /dev/null +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/AbstractEmailFormatValidator.kt @@ -0,0 +1,127 @@ +package io.github.optimumcode.json.schema.internal.formats + +import de.cketti.codepoints.CodePoints +import de.cketti.codepoints.codePointAt +import io.github.optimumcode.json.schema.FormatValidationResult +import io.github.optimumcode.json.schema.FormatValidator +import io.github.optimumcode.json.schema.internal.util.allCodepoints + +private const val AT_CHAR = '@' +private const val IP_PART_START = '[' +private const val IP_PART_END = ']' +private const val QUOTE = '"' +private const val BACK_SLASH = '\\'.code +private const val IPV6_PREFIX = "IPv6:" +internal const val MAX_ASCII_CODEPOINT = 0x7F + +internal abstract class AbstractEmailFormatValidator( + private val hostnameValidator: AbstractStringFormatValidator, +) : AbstractStringFormatValidator() { + override fun validate(value: String): FormatValidationResult { + if (value.isEmpty()) { + return FormatValidator.Invalid() + } + val delimiterIndex = value.lastIndexOf(AT_CHAR) + if (delimiterIndex <= 0 || delimiterIndex == value.lastIndex) { + // either local-part of domain is empty + return FormatValidator.Invalid() + } + val localPart = value.substring(0, delimiterIndex) + val domainPart = value.substring(delimiterIndex + 1) + return if (isValidLocalPart(localPart) && isValidDomainPart(domainPart)) { + FormatValidator.Valid() + } else { + FormatValidator.Invalid() + } + } + + private fun isValidDomainPart(domainPart: String): Boolean { + return if (domainPart.run { startsWith(IP_PART_START) && endsWith(IP_PART_END) }) { + val ipPart = domainPart.substring(1, domainPart.lastIndex) + isValidIpPart(ipPart) + } else { + hostnameValidator.validate(domainPart).isValid() + } + } + + private fun isValidIpPart(ipPart: String): Boolean { + return if (ipPart.startsWith(IPV6_PREFIX)) { + IpV6FormatValidator.validate(ipPart.removePrefix(IPV6_PREFIX)) + } else { + IpV4FormatValidator.validate(ipPart) + }.isValid() + } + + private fun isValidLocalPart(localPart: String): Boolean { + return if (localPart.run { startsWith(QUOTE) || endsWith(QUOTE) }) { + isValidQuotedString(localPart) + } else { + isValidDotString(localPart) + } + } + + private fun isValidDotString(localPart: String): Boolean { + return Validation.eachSeparatedPart(localPart, separator = '.') { + it.isNotEmpty() && it.allCodepoints(::isAText) + } + } + + protected open fun isAText(codepoint: Int): Boolean { + if (codepoint > MAX_ASCII_CODEPOINT) { + return false + } + val asChar = codepoint.toChar() + return Validation.isAlpha(asChar) || Validation.isDigit(asChar) || isSpecialCharacter(asChar) + } + + private fun isSpecialCharacter(codepoint: Char): Boolean = + codepoint == '!' || codepoint == '#' || codepoint == '$' || codepoint == '%' || + codepoint == '&' || codepoint == '\'' || codepoint == '*' || codepoint == '+' || + codepoint == '-' || codepoint == '/' || codepoint == '=' || codepoint == '?' || + codepoint == '^' || codepoint == '_' || codepoint == '`' || codepoint == '{' || + codepoint == '}' || codepoint == '~' || codepoint == '|' + + private fun isValidQuotedString(localPart: String): Boolean { + if (localPart.length <= 2) { + return false + } + if (localPart.run { !startsWith(QUOTE) || !endsWith(QUOTE) }) { + return false + } + val quotedContent = localPart.substring(1, localPart.lastIndex) + return isValidQuotedContent(quotedContent) + } + + private fun isValidQuotedContent(quotedContent: String): Boolean { + // cannot be empty at this point + var index = 0 + val length = quotedContent.length + while (index < length) { + val codePoint = quotedContent.codePointAt(index) + index += CodePoints.charCount(codePoint) + if (codePoint != BACK_SLASH) { + if (isValidQText(codePoint)) { + continue + } + return false + } + if (index >= length) { + // last backslash is not allowed + // E.g.: "\" + return false + } + val nextChar = quotedContent.codePointAt(index) + if (nextChar !in ' '.code..'~'.code) { + // invalid quote pair + return false + } + // always one because of condition above + index += 1 + } + return true + } + + protected open fun isValidQText(codepoint: Int): Boolean = + // \ is checked explicitly + codepoint == ' '.code || codepoint == '!'.code || codepoint in '#'.code..'~'.code +} \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/EmailFormatValidator.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/EmailFormatValidator.kt new file mode 100644 index 00000000..657ba2cc --- /dev/null +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/EmailFormatValidator.kt @@ -0,0 +1,3 @@ +package io.github.optimumcode.json.schema.internal.formats + +internal object EmailFormatValidator : AbstractEmailFormatValidator(HostnameFormatValidator) \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnEmailFormatValidator.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnEmailFormatValidator.kt new file mode 100644 index 00000000..1991572f --- /dev/null +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnEmailFormatValidator.kt @@ -0,0 +1,13 @@ +package io.github.optimumcode.json.schema.internal.formats + +internal object IdnEmailFormatValidator : AbstractEmailFormatValidator(IdnHostnameFormatValidator) { + override fun isAText(codepoint: Int): Boolean = super.isAText(codepoint) || isUtf8NonAscii(codepoint) + + override fun isValidQText(codepoint: Int): Boolean = super.isValidQText(codepoint) || isUtf8NonAscii(codepoint) + + /** + * The spec is quite clear about which codepoints are allowed. + * So, this method allows all codepoints that are greater than 0x7F + */ + private fun isUtf8NonAscii(codepoint: Int): Boolean = codepoint > MAX_ASCII_CODEPOINT +} \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt index 47db8b98..08c629ce 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriSpec.kt @@ -1,5 +1,8 @@ package io.github.optimumcode.json.schema.internal.formats +import io.github.optimumcode.json.schema.internal.formats.Validation.isAlpha +import io.github.optimumcode.json.schema.internal.formats.Validation.isDigit + internal object UriSpec { const val SCHEMA_DELIMITER = ':' const val QUERY_DELIMITER = '?' @@ -268,10 +271,6 @@ internal object UriSpec { return str[index] == '%' && isHexDigit(str[index + 1]) && isHexDigit(str[index + 2]) } - fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z' - - fun isDigit(c: Char): Boolean = c in '0'..'9' - private fun isPChar(c: Char): Boolean = isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@' private fun isUnreserved(c: Char): Boolean = isAlpha(c) || isDigit(c) || c == '_' || c == '-' || c == '.' || c == '~' diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriTemplateFormatValidator.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriTemplateFormatValidator.kt index 230c3855..5ee92775 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriTemplateFormatValidator.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/UriTemplateFormatValidator.kt @@ -4,6 +4,7 @@ import de.cketti.codepoints.CodePoints import de.cketti.codepoints.codePointAt import io.github.optimumcode.json.schema.FormatValidationResult import io.github.optimumcode.json.schema.FormatValidator +import io.github.optimumcode.json.schema.internal.formats.Validation.eachSeparatedPart internal object UriTemplateFormatValidator : AbstractStringFormatValidator() { private const val EXPRESSION_START = '{'.code @@ -121,28 +122,6 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() { return eachSeparatedPart(varList, separator = ',', ::isValidVarSpec) } - private inline fun eachSeparatedPart( - value: String, - separator: Char, - isValid: (String) -> Boolean, - ): Boolean { - var lastSeparator = -1 - do { - val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1) - val part = - if (separatorIndex < 0) { - value.substring(lastSeparator + 1) - } else { - value.substring(lastSeparator + 1, separatorIndex) - } - if (!isValid(part)) { - return false - } - lastSeparator = separatorIndex - } while (separatorIndex > 0) - return true - } - private fun isValidVarSpec(varSpec: String): Boolean { if (varSpec.isEmpty()) { return false @@ -172,7 +151,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() { return eachSeparatedPart(varName, separator = '.') { part -> part.isNotEmpty() && UriSpec.hasValidCharsOrPctEncoded(part) { - UriSpec.isAlpha(it) || UriSpec.isDigit(it) || it == '_' + Validation.isAlpha(it) || Validation.isDigit(it) || it == '_' } } } @@ -186,7 +165,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() { // to long value return false } - return maxLength.all(UriSpec::isDigit) + return maxLength.all(Validation::isDigit) } private fun isOperator(char: Char): Boolean = diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt new file mode 100644 index 00000000..f9fa6876 --- /dev/null +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt @@ -0,0 +1,29 @@ +package io.github.optimumcode.json.schema.internal.formats + +internal object Validation { + fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z' + + fun isDigit(c: Char): Boolean = c in '0'..'9' + + inline fun eachSeparatedPart( + value: String, + separator: Char, + isValid: (String) -> Boolean, + ): Boolean { + var lastSeparator = -1 + do { + val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1) + val part = + if (separatorIndex < 0) { + value.substring(lastSeparator + 1) + } else { + value.substring(lastSeparator + 1, separatorIndex) + } + if (!isValid(part)) { + return false + } + lastSeparator = separatorIndex + } while (separatorIndex > 0) + return true + } +} \ No newline at end of file diff --git a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/util/UnicodeUtil.kt b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/util/UnicodeUtil.kt index a28e319d..06e12f90 100644 --- a/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/util/UnicodeUtil.kt +++ b/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/util/UnicodeUtil.kt @@ -43,4 +43,13 @@ internal inline fun CharSequence.forEachCodePointIndexed( } block(startIndex, firstChar.code) } +} + +internal fun CharSequence.allCodepoints(condition: (Int) -> Boolean): Boolean { + forEachCodePointIndexed { _, codePoint -> + if (!condition(codePoint)) { + return false + } + } + return true } \ No newline at end of file diff --git a/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaEmailFormatValidationTest.kt b/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaEmailFormatValidationTest.kt new file mode 100644 index 00000000..122b4b66 --- /dev/null +++ b/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaEmailFormatValidationTest.kt @@ -0,0 +1,40 @@ +package io.github.optimumcode.json.schema.assertions.general.format + +import io.kotest.core.spec.style.FunSpec + +class JsonSchemaEmailFormatValidationTest : FunSpec() { + init { + formatValidationTestSuite( + format = "email", + validTestCases = + listOf( + "a2!#$%&'*+-/=?^_`{}~|@domain.com", + "\"\\\"\\ \\@\\!\\#\\[\\]\\~\"@example.com", + "\" !#[]~a2\"@example.com", + "test@[127.0.0.1]", + "test@[IPv6:FF01::101]", + ), + invalidTestCases = + listOf( + TestCase("", "empty email"), + TestCase("@example.com", "empty local part"), + TestCase("test@", "empty domain part"), + TestCase("\"\"@example.com", "empty quoted string"), + TestCase("\"test@example.com", "only start quote"), + TestCase("test\"@example.com", "only end quote"), + TestCase("\"test\\\"@example.com", "quoted last quote"), + TestCase("\"te\\\nst\"@example.com", "invalid quoted character < space"), + TestCase("\"te\\\u007fst\"@example.com", "invalid quoted character > ~"), + TestCase("\"te\"st\"@example.com", "invalid character in quoted string"), + TestCase("test@[127.0.0.300]", "invalid IPv4 in domain part"), + TestCase("test@[IPv6:1:2:3:4:5:6:7:8:9]", "invalid IPv6 in domain part"), + TestCase("test@[FF01::101]", "valid IPv6 in domain part without prefix"), + TestCase("test@hostname.", "valid hostname in domain part"), + TestCase("te\nst@hostname", "invalid character < space"), + TestCase("te\u007fst@hostname", "invalid character > ~"), + TestCase("\"te\nst\"@hostname", "invalid character in quoted local part < space"), + TestCase("\"te\u007fst\"@hostname", "invalid character in quoted local part > ~"), + ), + ) + } +} \ No newline at end of file diff --git a/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaIdnEmailFormatValidationTest.kt b/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaIdnEmailFormatValidationTest.kt new file mode 100644 index 00000000..797bb02f --- /dev/null +++ b/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaIdnEmailFormatValidationTest.kt @@ -0,0 +1,21 @@ +package io.github.optimumcode.json.schema.assertions.general.format + +import io.kotest.core.spec.style.FunSpec + +class JsonSchemaIdnEmailFormatValidationTest : FunSpec() { + init { + formatValidationTestSuite( + format = "idn-email", + validTestCases = + listOf( + "실례@실례.테스트", + "\"실a\\~례\"@실례.테스트", + ), + invalidTestCases = + listOf( + TestCase("실\u007F례@실례.테스트", "invalid codepoint in local part"), + TestCase("\"실\u007F례\"@실례.테스트", "invalid codepoint in quoted local part"), + ), + ) + } +} \ No newline at end of file diff --git a/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaUriTemplateFormatValidationTest.kt b/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaUriTemplateFormatValidationTest.kt index 5ebf15f2..396324de 100644 --- a/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaUriTemplateFormatValidationTest.kt +++ b/src/commonTest/kotlin/io/github/optimumcode/json/schema/assertions/general/format/JsonSchemaUriTemplateFormatValidationTest.kt @@ -15,7 +15,7 @@ class JsonSchemaUriTemplateFormatValidationTest : FunSpec() { "https://simple.uri", "https://test%20uri.com", "https://testname/{first%20name}", - "https://testname/{first.name}", + "https://testname/{name_1.name_2}", "https://\u00a0\ud7ff\uf900\ufdcf\ufdf0\uffef\uf8ff", ), invalidTestCases = @@ -23,6 +23,7 @@ class JsonSchemaUriTemplateFormatValidationTest : FunSpec() { TestCase("https://example.com/{}", "empty expression"), TestCase("https://example.com/{,}", "empty expression with var delimiter"), TestCase("https://example.com/{test.}", "empty expression with name delimiter"), + TestCase("https://example.com/{te~st}", "invalid character in var name"), TestCase("https://example.com/}", "end expression without start"), TestCase("https://example.com/{t{e}st}", "expression inside expression"), TestCase("https://example.com/{test:0}", "leading zero"), diff --git a/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt b/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt index 89b90f95..7c70cac9 100644 --- a/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt +++ b/test-suites/src/commonTest/kotlin/io/github/optimumcode/json/schema/suite/AbstractSchemaTestSuite.kt @@ -47,8 +47,6 @@ internal val COMMON_FORMAT_FILTER = TestFilter( excludeSuites = mapOf( - "email" to emptySet(), - "idn-email" to emptySet(), "regex" to emptySet(), ), )