forked from highlightjs/highlight.js
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregex.js
131 lines (118 loc) · 3.15 KB
/
regex.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/**
* @param {string} value
* @returns {RegExp}
* */
export function escape(value) {
return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
export function source(re) {
if (!re) return null;
if (typeof re === "string") return re;
return re.source;
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
export function lookahead(re) {
return concat('(?=', re, ')');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
export function anyNumberOfTimes(re) {
return concat('(', re, ')*');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
export function optional(re) {
return concat('(', re, ')?');
}
/**
* @param {...(RegExp | string) } args
* @returns {string}
*/
export function concat(...args) {
const joined = args.map((x) => source(x)).join("");
return joined;
}
/**
* Any of the passed expresssions may match
*
* Creates a huge this | this | that | that match
* @param {(RegExp | string)[] } args
* @returns {string}
*/
export function either(...args) {
const joined = '(' + args.map((x) => source(x)).join("|") + ")";
return joined;
}
/**
* @param {RegExp} re
* @returns {number}
*/
export function countMatchGroups(re) {
return (new RegExp(re.toString() + '|')).exec('').length - 1;
}
/**
* Does lexeme start with a regular expression match at the beginning
* @param {RegExp} re
* @param {string} lexeme
*/
export function startsWith(re, lexeme) {
const match = re && re.exec(lexeme);
return match && match.index === 0;
}
// BACKREF_RE matches an open parenthesis or backreference. To avoid
// an incorrect parse, it additionally matches the following:
// - [...] elements, where the meaning of parentheses and escapes change
// - other escape sequences, so we do not misparse escape sequences as
// interesting elements
// - non-matching or lookahead parentheses, which do not capture. These
// follow the '(' with a '?'.
const BACKREF_RE = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./;
// join logically computes regexps.join(separator), but fixes the
// backreferences so they continue to match.
// it also places each individual regular expression into it's own
// match group, keeping track of the sequencing of those match groups
// is currently an exercise for the caller. :-)
/**
* @param {(string | RegExp)[]} regexps
* @param {string} separator
* @returns {string}
*/
export function join(regexps, separator = "|") {
let numCaptures = 0;
return regexps.map((regex) => {
numCaptures += 1;
const offset = numCaptures;
let re = source(regex);
let out = '';
while (re.length > 0) {
const match = BACKREF_RE.exec(re);
if (!match) {
out += re;
break;
}
out += re.substring(0, match.index);
re = re.substring(match.index + match[0].length);
if (match[0][0] === '\\' && match[1]) {
// Adjust the backreference.
out += '\\' + String(Number(match[1]) + offset);
} else {
out += match[0];
if (match[0] === '(') {
numCaptures++;
}
}
}
return out;
}).map(re => `(${re})`).join(separator);
}