Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add email validation function to lower bounce rates #1845

Merged
merged 6 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions internal/mailer/validate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package mailer

import (
"context"
"errors"
"net"
"net/mail"
"strings"
"time"
)

var invalidEmailMap = map[string]bool{

// People type these often enough to be special cased.
"test@gmail.com": true,
"test@email.com": true,
}

var invalidHostSuffixes = []string{

// These are a directly from Section 2 of RFC2606[1].
J0 marked this conversation as resolved.
Show resolved Hide resolved
//
// [1] https://www.rfc-editor.org/rfc/rfc2606.html#section-2
".test",
".example",
".invalid",
".local",
".localhost",
}

var invalidHostMap = map[string]bool{

// These exist here too for when they are typed as "test@test"
"test": true,
"example": true,
"invalid": true,
"local": true,
"localhost": true,

// These are commonly typed and have DNS records which cause a
// large enough volume of bounce backs to special case.
"test.com": true,
"example.com": true,
"example.net": true,
"example.org": true,

// Hundreds of typos per day for this.
"gamil.com": true,

// These are not email providers, but people often use them.
"anonymous.com": true,
"email.com": true,
}

const (
validateEmailTimeout = 500 * time.Millisecond
)

var (
// We use the default resolver for this.
validateEmailResolver net.Resolver
)

var (
ErrInvalidEmailFormat = errors.New("invalid email format")
ErrInvalidEmailAddress = errors.New("invalid email address")
)

// ValidateEmail returns a nil error in all cases but the following:
// - `email` cannot be parsed by mail.ParseAddress
// - `email` has a domain with no DNS configured
func ValidateEmail(ctx context.Context, email string) error {
ctx, cancel := context.WithTimeout(ctx, validateEmailTimeout)
defer cancel()

return validateEmail(ctx, email)
}

func validateEmail(ctx context.Context, email string) error {
ea, err := mail.ParseAddress(email)
if err != nil {
return ErrInvalidEmailFormat
}

i := strings.LastIndex(ea.Address, "@")
if i == -1 {
return ErrInvalidEmailFormat
}

// few static lookups that are typed constantly and known to be invalid.
if invalidEmailMap[email] {
return ErrInvalidEmailAddress
}

host := email[i+1:]
if invalidHostMap[host] {
return ErrInvalidEmailAddress
}

for i := range invalidHostSuffixes {
if strings.HasSuffix(host, invalidHostSuffixes[i]) {
return ErrInvalidEmailAddress
}
}

name := email[:i]
if err := validateProviders(name, host); err != nil {
return err
}

if err := validateHost(ctx, host); err != nil {
return err
}
return nil
}

func validateProviders(name, host string) error {
switch host {
case "gmail.com":
// Based on a sample of internal data, this reduces the number of
// bounced emails by 23%. Gmail documentation specifies that the
cstockton marked this conversation as resolved.
Show resolved Hide resolved
// min user name length is 6 characters. There may be some accounts
// from early gmail beta with shorter email addresses, but I think
// this reduces bounce rates enough to be worth adding for now.
if len(name) < 6 {
return ErrInvalidEmailAddress
}
}
return nil
}

func validateHost(ctx context.Context, host string) error {
_, err := validateEmailResolver.LookupMX(ctx, host)
if !isHostNotFound(err) {
return nil
}

_, err = validateEmailResolver.LookupHost(ctx, host)
if !isHostNotFound(err) {
return nil
}

// No addrs or mx records were found
return ErrInvalidEmailAddress
}

func isHostNotFound(err error) bool {
if err == nil {
// We had no err, so we treat it as valid. We don't check the mx records
// because RFC 5321 specifies that if an empty list of MX's are returned
J0 marked this conversation as resolved.
Show resolved Hide resolved
// the host should be treated as the MX[1].
//
// See section 2 and 3 of: https://www.rfc-editor.org/rfc/rfc2606
// [1] https://www.rfc-editor.org/rfc/rfc5321.html#section-5.1
return false
}

// No names present, we will try to get a positive assertion that the
// domain is not configured to receive email.
var dnsError *net.DNSError
if !errors.As(err, &dnsError) {
// We will be unable to determine with absolute certainy the email was
// invalid so we will err on the side of caution and return nil.
return false
}

// The type of err is dnsError, inspect it to see if we can be certain
// the domain has no mx records currently. For this we require that
// the error was not temporary or a timeout. If those are both false
// we trust the value in IsNotFound.
if !dnsError.IsTemporary && !dnsError.IsTimeout && dnsError.IsNotFound {
return true
}
return false
}
99 changes: 99 additions & 0 deletions internal/mailer/validate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package mailer

import (
"context"
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestValidateEmail(t *testing.T) {
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, time.Second*60)
defer cancel()

cases := []struct {
email string
timeout time.Duration
err string
}{
// valid (has mx record)
{email: "a@supabase.io"},
{email: "support@supabase.io"},
{email: "chris.stockton@supabase.io"},

// bad format
{email: "", err: "invalid email format"},
{email: "io", err: "invalid email format"},
{email: "supabase.io", err: "invalid email format"},
{email: "@supabase.io", err: "invalid email format"},
{email: "test@.supabase.io", err: "invalid email format"},

// invalid: valid mx records, but invalid and often typed
// (invalidEmailMap)
{email: "test@test.com", err: "invalid email address"},
{email: "test@gmail.com", err: "invalid email address"},
{email: "test@email.com", err: "invalid email address"},

// very common typo
{email: "test@gamil.com", err: "invalid email address"},

// invalid: valid mx records, but invalid and often typed
// (invalidHostMap)
{email: "a@example.com", err: "invalid email address"},
{email: "a@example.net", err: "invalid email address"},
{email: "a@example.org", err: "invalid email address"},

// invalid: no mx records
{email: "a@test", err: "invalid email address"},
{email: "test@local", err: "invalid email address"},
{email: "test@test.local", err: "invalid email address"},
{email: "test@example", err: "invalid email address"},
{email: "test@invalid", err: "invalid email address"},

// valid but not actually valid and typed a lot
{email: "a@invalid", err: "invalid email address"},
{email: "a@a.invalid", err: "invalid email address"},
{email: "test@invalid", err: "invalid email address"},

// various invalid emails
{email: "test@test.localhost", err: "invalid email address"},
{email: "test@invalid.example.com", err: "invalid email address"},
{email: "test@no.such.email.host.supabase.io", err: "invalid email address"},

// this low timeout should simulate a dns timeout, which should
// not be treated as an invalid email.
{email: "validemail@probablyaaaaaaaanotarealdomain.com",
timeout: time.Millisecond},

// likewise for a valid email
{email: "support@supabase.io", timeout: time.Millisecond},
}
for idx, tc := range cases {
func(timeout time.Duration) {
if timeout == 0 {
timeout = validateEmailTimeout
}

ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

now := time.Now()
err := validateEmail(ctx, tc.email)
dur := time.Since(now)
if max := timeout + (time.Millisecond * 50); max < dur {
t.Fatal("timeout was not respected")
}

t.Logf("tc #%v - email %q", idx, tc.email)
if tc.err != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tc.err)
return
}
require.NoError(t, err)

}(tc.timeout)
}
}
Loading