Skip to content

Commit

Permalink
Merge pull request #132 from davishmcclurg/email
Browse files Browse the repository at this point in the history
Better email validation
  • Loading branch information
davishmcclurg authored Jun 14, 2023
2 parents 8ac15d8 + bbb49a4 commit c2b1e1c
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 9 deletions.
1 change: 1 addition & 0 deletions lib/json_schemer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
require 'json_schemer/version'
require 'json_schemer/format/hostname'
require 'json_schemer/format/uri_template'
require 'json_schemer/format/email'
require 'json_schemer/format'
require 'json_schemer/errors'
require 'json_schemer/cached_resolver'
Expand Down
9 changes: 1 addition & 8 deletions lib/json_schemer/format.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# frozen_string_literal: true
module JSONSchemer
module Format
include Email
include Hostname
include URITemplate

# this is no good
EMAIL_REGEX = /\A[^@\s]+@([\p{L}\d-]+\.)+[\p{L}\d\-]{2,}\z/i.freeze
JSON_POINTER_REGEX_STRING = '(\/([^~\/]|~[01])*)*'
JSON_POINTER_REGEX = /\A#{JSON_POINTER_REGEX_STRING}\z/.freeze
RELATIVE_JSON_POINTER_REGEX = /\A(0|[1-9]\d*)(#|#{JSON_POINTER_REGEX_STRING})?\z/.freeze
Expand Down Expand Up @@ -72,12 +71,6 @@ def valid_date_time?(data)
false
end

def valid_email?(data)
return false unless EMAIL_REGEX.match?(data)
local, _domain = data.partition('@')
!local.start_with?('.') && !local.end_with?('.') && !local.include?('..')
end

def valid_ip?(data, family)
IPAddr.new(data, family)
IP_REGEX.match?(data)
Expand Down
56 changes: 56 additions & 0 deletions lib/json_schemer/format/email.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# frozen_string_literal: true
module JSONSchemer
module Format
module Email
# https://datatracker.ietf.org/doc/html/rfc6531#section-3.3
# I think this is the same as "UTF8-non-ascii"? (https://datatracker.ietf.org/doc/html/rfc6532#section-3.1)
UTF8_NON_ASCII = '[^[:ascii:]]'
# https://datatracker.ietf.org/doc/html/rfc5321#section-4.1.2
A_TEXT = "([\\w!#$%&'*+\\-/=?\\^`{|}~]|#{UTF8_NON_ASCII})" # atext = ALPHA / DIGIT / ; Printable US-ASCII
# "!" / "#" / ; characters not including
# "$" / "%" / ; specials. Used for atoms.
# "&" / "'" /
# "*" / "+" /
# "-" / "/" /
# "=" / "?" /
# "^" / "_" /
# "`" / "{" /
# "|" / "}" /
# "~"
Q_TEXT_SMTP = "([\\x20-\\x21\\x23-\\x5B\\x5D-\\x7E]|#{UTF8_NON_ASCII})" # qtextSMTP = %d32-33 / %d35-91 / %d93-126
# ; i.e., within a quoted string, any
# ; ASCII graphic or space is permitted
# ; without blackslash-quoting except
# ; double-quote and the backslash itself.
QUOTED_PAIR_SMTP = '\x5C[\x20-\x7E]' # quoted-pairSMTP = %d92 %d32-126
# ; i.e., backslash followed by any ASCII
# ; graphic (including itself) or SPace
Q_CONTENT_SMTP = "#{Q_TEXT_SMTP}|#{QUOTED_PAIR_SMTP}" # QcontentSMTP = qtextSMTP / quoted-pairSMTP
QUOTED_STRING = "\"(#{Q_CONTENT_SMTP})*\"" # Quoted-string = DQUOTE *QcontentSMTP DQUOTE
ATOM = "#{A_TEXT}+" # Atom = 1*atext
DOT_STRING = "#{ATOM}(\\.#{ATOM})*" # Dot-string = Atom *("." Atom)
LOCAL_PART = "#{DOT_STRING}|#{QUOTED_STRING}" # Local-part = Dot-string / Quoted-string
# ; MAY be case-sensitive
# IPv4-address-literal = Snum 3("." Snum)
# using `valid_id?` to check ip addresses because it's complicated. # IPv6-address-literal = "IPv6:" IPv6-addr
ADDRESS_LITERAL = '\[(IPv6:(?<ipv6>[\h:]+)|(?<ipv4>[\d.]+))\]' # address-literal = "[" ( IPv4-address-literal /
# IPv6-address-literal /
# General-address-literal ) "]"
# ; See Section 4.1.3
# using `valid_hostname?` to check domain because it's complicated
MAILBOX = "(#{LOCAL_PART})@(#{ADDRESS_LITERAL}|(?<domain>.+))" # Mailbox = Local-part "@" ( Domain / address-literal )
EMAIL_REGEX = /\A#{MAILBOX}\z/

def valid_email?(data)
return false unless match = EMAIL_REGEX.match(data)
if ipv4 = match.named_captures.fetch('ipv4')
valid_ip?(ipv4, Socket::AF_INET)
elsif ipv6 = match.named_captures.fetch('ipv6')
valid_ip?(ipv6, Socket::AF_INET6)
else
valid_hostname?(match.named_captures.fetch('domain'))
end
end
end
end
end
25 changes: 25 additions & 0 deletions test/format_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,29 @@ def test_it_allows_callable_custom_format
assert(schema.valid?('valid'))
refute(schema.valid?('invalid'))
end

def test_email_format
schema = JSONSchemer.schema({ 'format' => 'email' })

{
"joe.bloggs@example.com" => true,
"2962" => false,
"te~st@example.com" => true,
"~test@example.com" => true,
"test~@example.com" => true,
"\"joe bloggs\"@example.com" => true,
"\"joe..bloggs\"@example.com" => true,
"\"joe@bloggs\"@example.com" => true,
"joe.bloggs@[127.0.0.1]" => true,
"joe.bloggs@[IPv6:::1]" => true,
".test@example.com" => false,
"test.@example.com" => false,
"te.s.t@example.com" => true,
"te..st@example.com" => false,
"joe.bloggs@invalid=domain.com" => false,
"joe.bloggs@[127.0.0.300]" => false
}.each do |email, valid|
assert_equal(valid, schema.valid?(email))
end
end
end
2 changes: 1 addition & 1 deletion test/uri_template_test.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
require 'test_helper'

class PrettyErrorsTest < Minitest::Test
class UriTemplateTest < Minitest::Test
def test_uri_template_format
schema = JSONSchemer.schema({ 'type' => 'string', 'format' => 'uri-template' })

Expand Down

0 comments on commit c2b1e1c

Please sign in to comment.