forked from aawilson/linklink
-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
172 lines (143 loc) · 7 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
module.exports = {
// Static values used in creating the URL regexes
RE_EMAIL_PATTERN: "[a-z0-9!#$%&'*+/=?^_`{|}~\\-]+(?:[.][a-z0-9!#$%&'*+/=?^_`{|}~\\-]+)*@(?:[a-z0-9](?:[a-z0-9\\-]*[a-z0-9])?[.])+[a-z0-9](?:[a-z0-9\\-]*[a-z0-9])?",
// adapted from: http://daringfireball.net/2010/07/improved_regex_for_matching_urls
RE_URL:
'\\b' +
'(' +
'(?:' +
'((?!javascript)[a-z][\\w\\-]+:' + // URL protocol and colon (not javascript:) -- captured so we can add http if needed
'(?:' +
'/{1,3}' + // 1-3 slashes
'))' +
'|' +
'(?:[a-z0-9.\\-]+[.]' +
'(?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)' +
'[/#?])' +
')' +
'[\\w#?]' + // force a word char or #? char after the url scheme
'(?:' +
'(?:%[0-9a-f]{2})' + // allow url encoded values
'|' +
'(?::\\d+)' + // :port, insert no colons allowed joke here
'|' +
'[a-z0-9\\-._~/?#[\\]@!$&\'*+,;=]' + // allowable chars per http://tools.ietf.org/html/rfc3986#section-2
'|' +
'(?:\\([^\\s\\()]*?\\([^\\s\\()]+\\)[^\\s\\()]*?\\))' + // balanced parens, one level deep: (…(…)…)
'|' +
'(?:\\([^\\s]+?\\))' + // balanced parens, non-recursive: (…)
')+' +
'(?:' +
'(?:\\([^\\s()]*?\\([^\\s()]+\\)[^\\s()]*?\\))' + //balanced parens, one level deep: (…(…)…)
'|' +
'(?:\\([^\\s]+?\\))' + // balanced parens, non-recursive: (…)
'|' +
'[^\\s`!()\\[\\]:{};\'".,<>?«»“”‘’]' + // not a space or one of these punct chars
')' +
'|' + // OR, the following to match naked domains:
'(?:' +
'[a-z0-9]+' +
'(?:[.\\-][a-z0-9]+)*' +
'[.]' +
// removed py and sh to avoid linking script filenames
'(?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|si|sj|ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)' +
'\\b/?' +
'(?!@)' + // not succeeded by a @, avoid matching "foo.na" in "foo.na@example.com"
')' +
')',
// Add <wbr> tags to displayed text to allow wrapping?
add_wbrs: true,
// Truncate displayed links to this length
truncate_length: null,
// Target for links
link_target: "_blank",
// Add titles for links?
link_titles: true,
/**
* Linkify a string, replacing text urls with <a href="url">url</a>
* Note: init must be called before this function can be used
*
* @param text - String to be linkified
* @param matched_links - Return param (pass by ref) - Array of links matched during linkification
**/
linkify: function(text, matched_links) {
text = this.match_and_replace(this.RE_EMAIL_PATTERN, text, true, false, matched_links);
text = this.match_and_replace(this.RE_URL, text, false, true, matched_links);
return text;
},
/**
* Internal helper function for linkification
**/
match_and_replace: function(pattern, input, is_email, add_http, matched_links) {
var start = 0;
var offset = 0;
var match_length = 0;
var end_tag_pos = 0;
var close_anchor_re = /<\/[aA]>/;
var re = new RegExp(pattern, "gi");
var match = {};
var max_iter = 20;
var cur_iter = 0;
while (match = re.exec(input)) {
cur_iter++;
if (cur_iter > max_iter)
break;
start = match.index; // start of match
match_length = match[0].length;
// If we find an opening a tag, advance to its end and continue looking
var substr = input.substring(offset, start);
if (substr.search(/<[aA]/) >= 0) {
close_anchor_re.lastIndex = offset;
var close_anchor_pos = input.substring(offset, input.length).search(close_anchor_re);
// If we find an opening tag without a matching closing tag, just return the input we have
if (close_anchor_pos < 0)
return input;
end_tag_pos = close_anchor_pos + offset;
// RegExp.lastIndex is used to tell the regexp where to start matching
re.lastIndex = end_tag_pos + 4;
offset = end_tag_pos + 4;
continue;
}
// Do the actual replecement of text with anchor tag
var address = input.substr(start, match_length);
var actual = address;
if (add_http && !match[2]) {
actual = 'http://'+actual;
}
var replacement = '<a';
// link target?
if (this.link_target) {
replacement += ' target="'+this.link_target+'"';
}
replacement += ' href="';
if (is_email) {
replacement += 'mailto:';
}
actual = actual.replace(/"/g, '%22');
replacement += actual+'"';
// add title
if (this.link_titles) {
var title = is_email ? 'Email ' + actual : actual;
replacement += ' title="'+title+'"';
}
// Truncate displayed text if requested
if (this.truncate_length && address.length > this.truncate_length) {
address = address.substr(0, this.truncate_length) + '...';
}
// Add word break tags to allow wrapping where appropriate
if (this.add_wbrs) {
address = address.replace(new RegExp("([/=])", 'g'), "<wbr>$1");
}
replacement += '>'+address+'</a>';
// Record what was matched
if (matched_links) {
matched_links.push(actual);
}
// Do the replacement
input = input.slice(0, start) + replacement + input.slice(start+match_length, input.length);
re.lastIndex = start + replacement.length;
offset = start + replacement.length;
}
return input;
}
};