Skip to content

Commit

Permalink
Improved parsing of URLs with symbols (#432)
Browse files Browse the repository at this point in the history
* Improved parsing of URLs with symbols

* Remove NUM from LocalPartAccepting token group

Interferes with domain tokens when parsing numbers such as www.drive1.com

* Complex email HTML test
  • Loading branch information
Nick Frasser authored Mar 25, 2023
1 parent faab3df commit afc51f1
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 8 deletions.
8 changes: 5 additions & 3 deletions packages/linkifyjs/src/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ export function init({ groups }) {
tk.DOLLAR,
tk.EQUALS,
tk.HYPHEN,
tk.NUM,
tk.OPENBRACE,
tk.PERCENT,
tk.PIPE,
Expand Down Expand Up @@ -184,11 +183,11 @@ export function init({ groups }) {
tt(DomainDotTldColonPort, tk.SLASH, Url);

// Note that domains that begin with schemes are treated slighly differently
const UriPrefix = tt(Scheme, tk.COLON); // e.g., 'mailto:' or 'http://'
const SchemeColon = tt(Scheme, tk.COLON); // e.g., 'mailto:'
const SlashSchemeColon = tt(SlashScheme, tk.COLON); // e.g., 'http:'
const SlashSchemeColonSlash = tt(SlashSchemeColon, tk.SLASH); // e.g., 'http:/'

tt(SlashSchemeColonSlash, tk.SLASH, UriPrefix);
const UriPrefix = tt(SlashSchemeColonSlash, tk.SLASH); // e.g., 'http://'

// Scheme states can transition to domain states
ta(Scheme, groups.domain, Domain);
Expand All @@ -199,7 +198,10 @@ export function init({ groups }) {
tt(SlashScheme, tk.HYPHEN, DomainHyphen);

// Force URL with scheme prefix followed by anything sane
ta(SchemeColon, groups.domain, Url);
tt(SchemeColon, tk.SLASH, Url);
ta(UriPrefix, groups.domain, Url);
ta(UriPrefix, qsAccepting, Url);
tt(UriPrefix, tk.SLASH, Url);

// URL, followed by an opening bracket
Expand Down
36 changes: 36 additions & 0 deletions test/spec/html/email.html

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/spec/html/options.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export default {
.split('\n'),

extra: fs.readFileSync(__dirname + '/extra.html', 'utf8').trim(), // for jQuery plugin tests
email: fs.readFileSync(__dirname + '/email.html', 'utf8').trim(), // for linkify-html performance tests
altOptions: {
className: 'linkified',
rel: 'nofollow',
Expand Down
4 changes: 4 additions & 0 deletions test/spec/linkify-html.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,8 @@ describe('linkify-html', () => {
const input = '這禮拜是我們新的循環 (3/23-4/19), 我將於這週日給 Jeffrey 補課,並且我們會在這期間選另外一個可以上課的日期。';
expect(linkifyHtml(input)).to.be.ok;
});

it('Handles complex email page', () => {
expect(linkifyHtml(htmlOptions.email)).to.be.ok;
});
});
18 changes: 13 additions & 5 deletions test/spec/linkifyjs/parser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ const tests = [
'~@example.org',
[Email],
['~@example.org']
], [
'~emersion/soju-dev@lists.sr.ht',
[Email],
['~emersion/soju-dev@lists.sr.ht']
], [
'test@example2.com',
[Email],
Expand All @@ -228,10 +232,6 @@ const tests = [
'noreply@500px.so',
[Email],
['noreply@500px.so']
], [
'~emersion/soju-dev@lists.sr.ht',
[Email],
['~emersion/soju-dev@lists.sr.ht']
], [
'http@example.com',
[Email],
Expand Down Expand Up @@ -264,7 +264,15 @@ const tests = [
'Hello\nWorld',
[Text, Nl, Text],
['Hello', '\n', 'World'],
]
], [
'And http://↑↑↓↓←→←→ba.tk/ is also a URL',
[Text, Url, Text],
['And ', 'http://↑↑↓↓←→←→ba.tk/', ' is also a URL']
], [
'This Url www.drive1.com with www and digits also www.500px.com',
[Text, Url, Text, Url],
['This Url ', 'www.drive1.com', ' with www and digits also ', 'www.500px.com']
],
];


Expand Down

0 comments on commit afc51f1

Please sign in to comment.