-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add email and idn-email format support (#103)
Related to #54
- Loading branch information
1 parent
ddaf1bd
commit cdfe7dc
Showing
13 changed files
with
258 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
127 changes: 127 additions & 0 deletions
127
...kotlin/io/github/optimumcode/json/schema/internal/formats/AbstractEmailFormatValidator.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
package io.github.optimumcode.json.schema.internal.formats | ||
|
||
import de.cketti.codepoints.CodePoints | ||
import de.cketti.codepoints.codePointAt | ||
import io.github.optimumcode.json.schema.FormatValidationResult | ||
import io.github.optimumcode.json.schema.FormatValidator | ||
import io.github.optimumcode.json.schema.internal.util.allCodepoints | ||
|
||
private const val AT_CHAR = '@' | ||
private const val IP_PART_START = '[' | ||
private const val IP_PART_END = ']' | ||
private const val QUOTE = '"' | ||
private const val BACK_SLASH = '\\'.code | ||
private const val IPV6_PREFIX = "IPv6:" | ||
internal const val MAX_ASCII_CODEPOINT = 0x7F | ||
|
||
internal abstract class AbstractEmailFormatValidator( | ||
private val hostnameValidator: AbstractStringFormatValidator, | ||
) : AbstractStringFormatValidator() { | ||
override fun validate(value: String): FormatValidationResult { | ||
if (value.isEmpty()) { | ||
return FormatValidator.Invalid() | ||
} | ||
val delimiterIndex = value.lastIndexOf(AT_CHAR) | ||
if (delimiterIndex <= 0 || delimiterIndex == value.lastIndex) { | ||
// either local-part of domain is empty | ||
return FormatValidator.Invalid() | ||
} | ||
val localPart = value.substring(0, delimiterIndex) | ||
val domainPart = value.substring(delimiterIndex + 1) | ||
return if (isValidLocalPart(localPart) && isValidDomainPart(domainPart)) { | ||
FormatValidator.Valid() | ||
} else { | ||
FormatValidator.Invalid() | ||
} | ||
} | ||
|
||
private fun isValidDomainPart(domainPart: String): Boolean { | ||
return if (domainPart.run { startsWith(IP_PART_START) && endsWith(IP_PART_END) }) { | ||
val ipPart = domainPart.substring(1, domainPart.lastIndex) | ||
isValidIpPart(ipPart) | ||
} else { | ||
hostnameValidator.validate(domainPart).isValid() | ||
} | ||
} | ||
|
||
private fun isValidIpPart(ipPart: String): Boolean { | ||
return if (ipPart.startsWith(IPV6_PREFIX)) { | ||
IpV6FormatValidator.validate(ipPart.removePrefix(IPV6_PREFIX)) | ||
} else { | ||
IpV4FormatValidator.validate(ipPart) | ||
}.isValid() | ||
} | ||
|
||
private fun isValidLocalPart(localPart: String): Boolean { | ||
return if (localPart.run { startsWith(QUOTE) || endsWith(QUOTE) }) { | ||
isValidQuotedString(localPart) | ||
} else { | ||
isValidDotString(localPart) | ||
} | ||
} | ||
|
||
private fun isValidDotString(localPart: String): Boolean { | ||
return Validation.eachSeparatedPart(localPart, separator = '.') { | ||
it.isNotEmpty() && it.allCodepoints(::isAText) | ||
} | ||
} | ||
|
||
protected open fun isAText(codepoint: Int): Boolean { | ||
if (codepoint > MAX_ASCII_CODEPOINT) { | ||
return false | ||
} | ||
val asChar = codepoint.toChar() | ||
return Validation.isAlpha(asChar) || Validation.isDigit(asChar) || isSpecialCharacter(asChar) | ||
} | ||
|
||
private fun isSpecialCharacter(codepoint: Char): Boolean = | ||
codepoint == '!' || codepoint == '#' || codepoint == '$' || codepoint == '%' || | ||
codepoint == '&' || codepoint == '\'' || codepoint == '*' || codepoint == '+' || | ||
codepoint == '-' || codepoint == '/' || codepoint == '=' || codepoint == '?' || | ||
codepoint == '^' || codepoint == '_' || codepoint == '`' || codepoint == '{' || | ||
codepoint == '}' || codepoint == '~' || codepoint == '|' | ||
|
||
private fun isValidQuotedString(localPart: String): Boolean { | ||
if (localPart.length <= 2) { | ||
return false | ||
} | ||
if (localPart.run { !startsWith(QUOTE) || !endsWith(QUOTE) }) { | ||
return false | ||
} | ||
val quotedContent = localPart.substring(1, localPart.lastIndex) | ||
return isValidQuotedContent(quotedContent) | ||
} | ||
|
||
private fun isValidQuotedContent(quotedContent: String): Boolean { | ||
// cannot be empty at this point | ||
var index = 0 | ||
val length = quotedContent.length | ||
while (index < length) { | ||
val codePoint = quotedContent.codePointAt(index) | ||
index += CodePoints.charCount(codePoint) | ||
if (codePoint != BACK_SLASH) { | ||
if (isValidQText(codePoint)) { | ||
continue | ||
} | ||
return false | ||
} | ||
if (index >= length) { | ||
// last backslash is not allowed | ||
// E.g.: "\" | ||
return false | ||
} | ||
val nextChar = quotedContent.codePointAt(index) | ||
if (nextChar !in ' '.code..'~'.code) { | ||
// invalid quote pair | ||
return false | ||
} | ||
// always one because of condition above | ||
index += 1 | ||
} | ||
return true | ||
} | ||
|
||
protected open fun isValidQText(codepoint: Int): Boolean = | ||
// \ is checked explicitly | ||
codepoint == ' '.code || codepoint == '!'.code || codepoint in '#'.code..'~'.code | ||
} |
3 changes: 3 additions & 0 deletions
3
...monMain/kotlin/io/github/optimumcode/json/schema/internal/formats/EmailFormatValidator.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
package io.github.optimumcode.json.schema.internal.formats | ||
|
||
internal object EmailFormatValidator : AbstractEmailFormatValidator(HostnameFormatValidator) |
13 changes: 13 additions & 0 deletions
13
...Main/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnEmailFormatValidator.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package io.github.optimumcode.json.schema.internal.formats | ||
|
||
internal object IdnEmailFormatValidator : AbstractEmailFormatValidator(IdnHostnameFormatValidator) { | ||
override fun isAText(codepoint: Int): Boolean = super.isAText(codepoint) || isUtf8NonAscii(codepoint) | ||
|
||
override fun isValidQText(codepoint: Int): Boolean = super.isValidQText(codepoint) || isUtf8NonAscii(codepoint) | ||
|
||
/** | ||
* The spec is quite clear about which codepoints are allowed. | ||
* So, this method allows all codepoints that are greater than 0x7F | ||
*/ | ||
private fun isUtf8NonAscii(codepoint: Int): Boolean = codepoint > MAX_ASCII_CODEPOINT | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/Validation.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package io.github.optimumcode.json.schema.internal.formats | ||
|
||
internal object Validation { | ||
fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z' | ||
|
||
fun isDigit(c: Char): Boolean = c in '0'..'9' | ||
|
||
inline fun eachSeparatedPart( | ||
value: String, | ||
separator: Char, | ||
isValid: (String) -> Boolean, | ||
): Boolean { | ||
var lastSeparator = -1 | ||
do { | ||
val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1) | ||
val part = | ||
if (separatorIndex < 0) { | ||
value.substring(lastSeparator + 1) | ||
} else { | ||
value.substring(lastSeparator + 1, separatorIndex) | ||
} | ||
if (!isValid(part)) { | ||
return false | ||
} | ||
lastSeparator = separatorIndex | ||
} while (separatorIndex > 0) | ||
return true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
.../optimumcode/json/schema/assertions/general/format/JsonSchemaEmailFormatValidationTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
package io.github.optimumcode.json.schema.assertions.general.format | ||
|
||
import io.kotest.core.spec.style.FunSpec | ||
|
||
class JsonSchemaEmailFormatValidationTest : FunSpec() { | ||
init { | ||
formatValidationTestSuite( | ||
format = "email", | ||
validTestCases = | ||
listOf( | ||
"a2!#$%&'*+-/=?^_`{}~|@domain.com", | ||
"\"\\\"\\ \\@\\!\\#\\[\\]\\~\"@example.com", | ||
"\" !#[]~a2\"@example.com", | ||
"test@[127.0.0.1]", | ||
"test@[IPv6:FF01::101]", | ||
), | ||
invalidTestCases = | ||
listOf( | ||
TestCase("", "empty email"), | ||
TestCase("@example.com", "empty local part"), | ||
TestCase("test@", "empty domain part"), | ||
TestCase("\"\"@example.com", "empty quoted string"), | ||
TestCase("\"test@example.com", "only start quote"), | ||
TestCase("test\"@example.com", "only end quote"), | ||
TestCase("\"test\\\"@example.com", "quoted last quote"), | ||
TestCase("\"te\\\nst\"@example.com", "invalid quoted character < space"), | ||
TestCase("\"te\\\u007fst\"@example.com", "invalid quoted character > ~"), | ||
TestCase("\"te\"st\"@example.com", "invalid character in quoted string"), | ||
TestCase("test@[127.0.0.300]", "invalid IPv4 in domain part"), | ||
TestCase("test@[IPv6:1:2:3:4:5:6:7:8:9]", "invalid IPv6 in domain part"), | ||
TestCase("test@[FF01::101]", "valid IPv6 in domain part without prefix"), | ||
TestCase("test@hostname.", "valid hostname in domain part"), | ||
TestCase("te\nst@hostname", "invalid character < space"), | ||
TestCase("te\u007fst@hostname", "invalid character > ~"), | ||
TestCase("\"te\nst\"@hostname", "invalid character in quoted local part < space"), | ||
TestCase("\"te\u007fst\"@hostname", "invalid character in quoted local part > ~"), | ||
), | ||
) | ||
} | ||
} |
21 changes: 21 additions & 0 deletions
21
...timumcode/json/schema/assertions/general/format/JsonSchemaIdnEmailFormatValidationTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package io.github.optimumcode.json.schema.assertions.general.format | ||
|
||
import io.kotest.core.spec.style.FunSpec | ||
|
||
class JsonSchemaIdnEmailFormatValidationTest : FunSpec() { | ||
init { | ||
formatValidationTestSuite( | ||
format = "idn-email", | ||
validTestCases = | ||
listOf( | ||
"실례@실례.테스트", | ||
"\"실a\\~례\"@실례.테스트", | ||
), | ||
invalidTestCases = | ||
listOf( | ||
TestCase("실\u007F례@실례.테스트", "invalid codepoint in local part"), | ||
TestCase("\"실\u007F례\"@실례.테스트", "invalid codepoint in quoted local part"), | ||
), | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.