-
Notifications
You must be signed in to change notification settings - Fork 5
/
emailaddress.go
250 lines (209 loc) · 8.23 KB
/
emailaddress.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
// Copyright 2018 The go-emailaddress AUTHORS. All rights reserved.
//
// Use of this source code is governed by a MIT
// license that can be found in the LICENSE file.
/*
Package emailaddress provides a tiny library for finding, parsing and validation of email
addresses. This library is tested for Go v1.9 and above.
go get -u github.com/mcnijman/go-emailaddress
# Local validation
Parse and validate the email locally using RFC 5322 regex, note that when err == nil it doesn't
necessarily mean the email address actually exists.
import "github.com/mcnijman/go-emailaddress"
email, err := emailaddress.Parse("foo@bar.com")
if err != nil {
fmt.Println("invalid email")
}
fmt.Println(email.LocalPart) // foo
fmt.Println(email.Domain) // bar.com
fmt.Println(email) // foo@bar.com
fmt.Println(email.String()) // foo@bar.com
# Host validation
Host validation will first attempt to resolve the domain and then verify if we can start a mail
transaction with the host. This is relatively slow as it will contact the host several times.
Note that when err == nil it doesn't necessarily mean the email address actually exists.
import "github.com/mcnijman/go-emailaddress"
email, err := emailaddress.Parse("foo@bar.com")
if err != nil {
fmt.Println("invalid email")
}
err := email.ValidateHost()
if err != nil {
fmt.Println("invalid host")
}
# Finding emails
This will look for emails in a byte array (ie text or an html response).
import "github.com/mcnijman/go-emailaddress"
text := []byte(`Send me an email at foo@bar.com.`)
validateHost := false
emails := emailaddress.Find(text, validateHost)
for _, e := range emails {
fmt.Println(e)
}
// foo@bar.com
As RFC 5322 is really broad this method will likely match images and urls that contain
the '@' character (ie. !--logo@2x.png). For more reliable results, you can use the following method.
import "github.com/mcnijman/go-emailaddress"
text := []byte(`Send me an email at foo@bar.com or fake@domain.foobar.`)
validateHost := false
emails := emailaddress.FindWithIcannSuffix(text, validateHost)
for _, e := range emails {
fmt.Println(e)
}
// foo@bar.com
*/
package emailaddress
import (
"fmt"
"net"
"net/smtp"
"regexp"
"strings"
"golang.org/x/net/publicsuffix"
)
var (
// rfc5322 is a RFC 5322 regex, as per: https://stackoverflow.com/a/201378/5405453.
// Note that this can't verify that the address is an actual working email address.
// Use ValidateHost as a starter and/or send them one :-).
rfc5322 = "(?i)(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])"
validRfc5322Regexp = regexp.MustCompile(fmt.Sprintf("^%s*$", rfc5322))
findRfc5322Regexp = regexp.MustCompile(rfc5322)
// findCommonRegexp is a stricter regex than the RFC 5322 and matches emails that
// are more likely to be real.
findCommonRegexp = regexp.MustCompile("(?i)([A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,24})")
)
// EmailAddress is a structure that stores the address local-part@domain parts.
type EmailAddress struct {
// LocalPart usually the username of an email address.
LocalPart string
// Domain is the part of the email address after the last @.
// This should be DNS resolvable to an email server.
Domain string
}
func (e EmailAddress) String() string {
if e.LocalPart == "" || e.Domain == "" {
return ""
}
return fmt.Sprintf("%s@%s", e.LocalPart, e.Domain)
}
// ValidateHost will test if the email address is actually reachable. It will first try to resolve
// the host and then start a mail transaction.
func (e EmailAddress) ValidateHost() error {
host, err := LookupHost(e.Domain)
if err != nil {
return err
}
return TryHost(host, e)
}
// ValidateIcanSuffix will test if the public suffix of the domain is managed by ICANN using
// the golang.org/x/net/publicsuffix package. If not it will return an error. Note that if this
// method returns an error it does not necessarily mean that the email address is invalid. Also the
// suffix list in the standard package is embedded and thereby not up to date.
func (e EmailAddress) ValidateIcanSuffix() error {
d := strings.ToLower(e.Domain)
if s, icann := publicsuffix.PublicSuffix(d); !icann {
return fmt.Errorf("public suffix is not managed by ICANN, got %s", s)
}
return nil
}
// Find uses the a stricter regex than the RFC 5322 and matches emails that are more likely to be
// real. Since the RFC 5322 spec is looser, it can miss emails that are real, but will more likely
// have better results. See examples in the tests.
func Find(haystack []byte, validateHost bool) (emails []*EmailAddress) {
results := findCommonRegexp.FindAll(haystack, -1)
for _, r := range results {
if e, err := Parse(string(r)); err == nil {
if validateHost {
if err := e.ValidateHost(); err != nil {
continue
}
}
emails = append(emails, e)
}
}
return emails
}
// FindWithRFC5322 uses the RFC 5322 regex to match, parse and validate any email addresses found in a string.
// If the validateHost boolean is true it will call the validate host for every email address
// encountered. As RFC 5322 is really broad this method will likely match images and urls that
// contain the '@' character.
func FindWithRFC5322(haystack []byte, validateHost bool) (emails []*EmailAddress) {
results := findRfc5322Regexp.FindAll(haystack, -1)
for _, r := range results {
if e, err := Parse(string(r)); err == nil {
if validateHost {
if err := e.ValidateHost(); err != nil {
continue
}
}
emails = append(emails, e)
}
}
return emails
}
// FindWithIcannSuffix uses the RFC 5322 regex to match, parse and validate any email addresses
// found in a string. It will return emails if its eTLD is managed by the ICANN organization.
// If the validateHost boolean is true it will call the validate host for every email address
// encountered. As RFC 5322 is really broad this method will likely match images and urls that
// contain the '@' character.
func FindWithIcannSuffix(haystack []byte, validateHost bool) (emails []*EmailAddress) {
results := Find(haystack, false)
for _, e := range results {
if err := e.ValidateIcanSuffix(); err == nil {
if validateHost {
if err := e.ValidateHost(); err != nil {
continue
}
}
emails = append(emails, e)
}
}
return emails
}
// Parse will parse the input and validate the email locally. If you want to validate the host of
// this email address remotely call the ValidateHost method.
func Parse(email string) (*EmailAddress, error) {
if !validRfc5322Regexp.MatchString(email) {
return nil, fmt.Errorf("format is incorrect for %s", email)
}
i := strings.LastIndexByte(email, '@')
e := &EmailAddress{
LocalPart: email[:i],
Domain: email[i+1:],
}
if e.Domain == "" {
return nil, fmt.Errorf("format is incorrect for %s", email)
}
return e, nil
}
// LookupHost first checks if any MX records are available and if not, it will check
// if A records are available as they can resolve email server hosts. An error indicates
// that non of the A or MX records are available.
func LookupHost(domain string) (string, error) {
if mx, err := net.LookupMX(domain); err == nil {
return mx[0].Host, nil
}
if ips, err := net.LookupIP(domain); err == nil {
return ips[0].String(), nil // randomly returns IPv4 or IPv6 (when available)
}
return "", fmt.Errorf("failed finding MX and A records for domain %s", domain)
}
// TryHost will verify if we can start a mail transaction with the host. A lot of
// hosts block this method so don't expect much from it.
func TryHost(host string, e EmailAddress) error {
client, err := smtp.Dial(fmt.Sprintf("%s:%d", host, 587))
if err != nil {
return err
}
defer client.Close()
if err = client.Hello(e.Domain); err == nil {
if err = client.Mail(fmt.Sprintf("hello@%s", e.Domain)); err == nil {
if err = client.Rcpt(e.String()); err == nil {
client.Reset() // #nosec
client.Quit() // #nosec
return nil
}
}
}
return err
}