-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,12 +7,13 @@ | |
// | ||
|
||
import { buildRegExp } from '../builders'; | ||
import { endOfString, startOfString, wordBoundary } from '../constructs/anchors'; | ||
import { endOfString, startOfString } from '../constructs/anchors'; | ||
import { anyOf, charClass, charRange, digit } from '../constructs/character-class'; | ||
import { choiceOf } from '../constructs/choice-of'; | ||
import { repeat } from '../constructs/repeat'; | ||
import { capture } from '../constructs/capture'; | ||
import { oneOrMore, optional } from '../constructs/quantifiers'; | ||
import type { RegexSequence } from '../types'; | ||
// import type { RegexElement, RegexSequence } from '../types'; | ||
// import { lookahead } from '../constructs/lookahead'; | ||
|
||
|
@@ -29,7 +30,7 @@ const alphabetical = charClass(lowercase, uppercase); | |
const specialChars = anyOf('._%+-'); | ||
const portSeperator = ':'; | ||
const schemeSeperator = ':'; | ||
//const doubleSlash = '//'; | ||
const doubleSlash = '//'; | ||
|
||
const pathSeparator = '/'; | ||
const querySeparator = '?'; | ||
|
@@ -71,104 +72,39 @@ export const UrlSchemeValidator = buildRegExp([startOfString, capture(urlScheme) | |
// 3. An optional port number, preceded by a colon (:) | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
PaulJPhilp
via email
Author
Owner
|
||
// Authority = [userinfo "@"] host [":" port] | ||
|
||
/*** | ||
// Host: No Repeat, Eager Version | ||
const hostnameEager = capture(repeat(hostnameChars, { min: 1, max: 255, greedy: false })); | ||
const hostEagerNoRepeat = capture([hostnameEager, period, hostnameEager]); | ||
export const hostEagerNoRepeatFinder = buildRegExp(hostEagerNoRepeat, { | ||
ignoreCase: true, | ||
global: true, | ||
}); | ||
export const hostEagerNoRepeatValidator = buildRegExp( | ||
[startOfString, hostEagerNoRepeat, endOfString], | ||
{ ignoreCase: true }, | ||
); | ||
// Host: No Repeat, Greedy Version | ||
const hostnameGreedy = capture(repeat(hostnameChars, { min: 1, max: 255, greedy: true })); | ||
const hostGreedyNoRepeat = capture([hostnameGreedy, period, hostnameGreedy]); | ||
export const hostGreedyNoRepeatFinder = buildRegExp(hostEagerNoRepeat, { | ||
ignoreCase: true, | ||
global: true, | ||
}); | ||
export const hostGreedyNoRepeatValidator = buildRegExp( | ||
[startOfString, hostGreedyNoRepeat, endOfString], | ||
{ ignoreCase: true }, | ||
); | ||
// Host: ZeroOrMore, Eager Version | ||
const hostEagerZeroOrMore = capture([hostnameEager, zeroOrMore([period, hostnameEager])]); | ||
export const hostEagerZeroOrMoreFinder = buildRegExp(hostEagerZeroOrMore, { | ||
ignoreCase: true, | ||
global: true, | ||
}); | ||
export const hostEagerZeroOrMoreValidator = buildRegExp( | ||
[startOfString, hostEagerZeroOrMore, endOfString], | ||
{ ignoreCase: true }, | ||
); | ||
// Host: with Repeat, Greedy Version | ||
const hostGreedyWithRepeat = capture([hostnameGreedy, repeat([period, hostnameGreedy], { min: 1, max: 255 })]); | ||
export const hostGreedyWithRepeatFinder = buildRegExp(hostGreedyWithRepeat, { | ||
ignoreCase: true, | ||
global: true, | ||
}); | ||
export const hostGreedyWithRepeatValidator = buildRegExp( | ||
[startOfString, hostGreedyWithRepeat, endOfString], | ||
{ ignoreCase: true }, | ||
); | ||
// Host: ZeroOrMore, Greedy Version | ||
const hostGreedyZeroOrMore = capture([hostnameGreedy, zeroOrMore([period, hostnameGreedy])]); | ||
export const hostGreedyZeroOrMoreFinder = buildRegExp(hostGreedyZeroOrMore, { | ||
ignoreCase: true, | ||
global: true, | ||
}); | ||
export const hostGreedyZeroOrMoreValidator = buildRegExp( | ||
[startOfString, hostGreedyZeroOrMore, endOfString], | ||
{ ignoreCase: true }, | ||
); | ||
***/ | ||
|
||
const userInfo = oneOrMore(usernameChars); | ||
const portNumber = capture(repeat(digit, { min: 1, max: 5, greedy: false })); | ||
const port = capture([portSeperator, portNumber]); | ||
const host = capture(repeat(hostnameChars, { min: 1, max: 255, greedy: false })); | ||
const hostname = capture([host, optional(repeat([period, host], { min: 1, max: 255 }))]); | ||
const urlAuthority = capture([optional([userInfo, at]), hostname, optional(port)]); | ||
const portNumber = repeat(digit, { min: 1, max: 5, greedy: false }); | ||
const port = [portSeperator, portNumber]; | ||
const host = repeat(hostnameChars, { min: 1, max: 255, greedy: false }); | ||
const hostname = [host, optional(repeat([period, host], { min: 1, max: 255 }))]; | ||
|
||
const urlAuthority: RegexSequence = [optional([userInfo, at]), choiceOf(hostname), optional(port)]; | ||
This comment has been minimized.
Sorry, something went wrong.
mdjastrzebski
|
||
|
||
export const UrlAuthorityFinder = buildRegExp(urlAuthority, { | ||
ignoreCase: true, | ||
global: true, | ||
}); | ||
|
||
export const UrlAuthorityValidator = buildRegExp([startOfString, urlAuthority, endOfString], { | ||
ignoreCase: true, | ||
}); | ||
export const UrlAuthorityValidator = buildRegExp( | ||
[startOfString, choiceOf(urlAuthority), endOfString], | ||
{ | ||
ignoreCase: true, | ||
}, | ||
); | ||
|
||
// | ||
// Convenience Pattern - Host: | ||
// A hostname (e.g. www.google.com) | ||
// | ||
|
||
const urlHost = [host, choiceOf([pathSeparator, wordBoundary, endOfString])]; | ||
const urlHost = choiceOf(hostname); | ||
|
||
export const UrlHostFinder = buildRegExp(capture(urlHost), { | ||
export const UrlHostFinder = buildRegExp(urlHost, { | ||
ignoreCase: true, | ||
global: true, | ||
}); | ||
|
||
export const UrlHostValidator = buildRegExp(capture(urlHost), { | ||
ignoreCase: true, | ||
}); | ||
export const UrlHostValidator = buildRegExp(urlHost, { ignoreCase: true }); | ||
|
||
// Path: | ||
// The path is the part of the URL that comes after the authority and before the query. | ||
|
@@ -228,16 +164,14 @@ export const UrlFragmentValidator = buildRegExp(urlFragment, { | |
ignoreCase: true, | ||
}); | ||
|
||
const url = capture([ | ||
startOfString, | ||
const url = [ | ||
This comment has been minimized.
Sorry, something went wrong.
mdjastrzebski
|
||
optional(urlScheme), | ||
schemeSeperator, | ||
optional(urlAuthority), | ||
optional([doubleSlash, choiceOf(urlAuthority)]), | ||
urlPath, | ||
optional(urlQuery), | ||
optional(urlFragment), | ||
endOfString, | ||
]); | ||
]; | ||
|
||
/*** | ||
*** Find URL strings in a text. | ||
|
@@ -252,4 +186,6 @@ export const urlFinder = buildRegExp(url, { | |
*** Check that given text is a valid URL. | ||
***/ | ||
|
||
export const urlValidator = buildRegExp([startOfString, url, endOfString], { ignoreCase: true }); | ||
export const urlValidator = buildRegExp([startOfString, choiceOf(url), endOfString], { | ||
ignoreCase: true, | ||
}); |
2 comments
on commit 0fdbf6c
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@PaulJPhilp looks like this is going in the right direction. URL is a complex pattern which should be decomposed into smaller building blocks. These building blocks might be also userful to form other patterns, e.g. email would benefit from hostname, userIfno, etc./
When possible, let's stick with the official RFC naming for these parts.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be hand to include links to URL specs, so that it's easy to reference the implmentation.