-
Notifications
You must be signed in to change notification settings - Fork 72
/
transforms.js
266 lines (247 loc) · 10.1 KB
/
transforms.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
// @ts-check
import {
FERAL_REG_EXP,
SyntaxError,
stringReplace,
stringSearch,
stringSlice,
stringSplit,
freeze,
} from './commons.js';
import { getSourceURL } from './get-source-url.js';
/**
* Find the first occurence of the given pattern and return
* the location as the approximate line number.
*
* @param {string} src
* @param {RegExp} pattern
* @returns {number}
*/
function getLineNumber(src, pattern) {
const index = stringSearch(src, pattern);
if (index < 0) {
return -1;
}
// The importPattern incidentally captures an initial \n in
// an attempt to reject a . prefix, so we need to offset
// the line number in that case.
const adjustment = src[index] === '\n' ? 1 : 0;
return stringSplit(stringSlice(src, 0, index), '\n').length + adjustment;
}
// /////////////////////////////////////////////////////////////////////////////
const htmlCommentPattern = new FERAL_REG_EXP(`(?:${'<'}!--|--${'>'})`, 'g');
/**
* Conservatively reject the source text if it may contain text that some
* JavaScript parsers may treat as an html-like comment. To reject without
* parsing, `rejectHtmlComments` will also reject some other text as well.
*
* https://www.ecma-international.org/ecma-262/9.0/index.html#sec-html-like-comments
* explains that JavaScript parsers may or may not recognize html
* comment tokens "<" immediately followed by "!--" and "--"
* immediately followed by ">" in non-module source text, and treat
* them as a kind of line comment. Since otherwise both of these can
* appear in normal JavaScript source code as a sequence of operators,
* we have the terrifying possibility of the same source code parsing
* one way on one correct JavaScript implementation, and another way
* on another.
*
* This shim takes the conservative strategy of just rejecting source
* text that contains these strings anywhere. Note that this very
* source file is written strangely to avoid mentioning these
* character strings explicitly.
*
* We do not write the regexp in a straightforward way, so that an
* apparennt html comment does not appear in this file. Thus, we avoid
* rejection by the overly eager rejectDangerousSources.
*
* @param {string} src
* @returns {string}
*/
export const rejectHtmlComments = src => {
const lineNumber = getLineNumber(src, htmlCommentPattern);
if (lineNumber < 0) {
return src;
}
const name = getSourceURL(src);
// See https://github.com/endojs/endo/blob/master/packages/ses/error-codes/SES_HTML_COMMENT_REJECTED.md
throw SyntaxError(
`Possible HTML comment rejected at ${name}:${lineNumber}. (SES_HTML_COMMENT_REJECTED)`,
);
};
/**
* An optional transform to place ahead of `rejectHtmlComments` to evade *that*
* rejection. However, it may change the meaning of the program.
*
* This evasion replaces each alleged html comment with the space-separated
* JavaScript operator sequence that it may mean, assuming that it appears
* outside of a comment or literal string, in source code where the JS
* parser makes no special case for html comments (like module source code).
* In that case, this evasion preserves the meaning of the program, though it
* does change the souce column numbers on each effected line.
*
* If the html comment appeared in a literal (a string literal, regexp literal,
* or a template literal), then this evasion will change the meaning of the
* program by changing the text of that literal.
*
* If the html comment appeared in a JavaScript comment, then this evasion does
* not change the meaning of the program because it only changes the contents of
* those comments.
*
* @param {string} src
* @returns {string}
*/
export const evadeHtmlCommentTest = src => {
const replaceFn = match => (match[0] === '<' ? '< ! --' : '-- >');
return stringReplace(src, htmlCommentPattern, replaceFn);
};
// /////////////////////////////////////////////////////////////////////////////
const importPattern = new FERAL_REG_EXP(
'(^|[^.])\\bimport(\\s*(?:\\(|/[/*]))',
'g',
);
/**
* Conservatively reject the source text if it may contain a dynamic
* import expression. To reject without parsing, `rejectImportExpressions` will
* also reject some other text as well.
*
* The proposed dynamic import expression is the only syntax currently
* proposed, that can appear in non-module JavaScript code, that
* enables direct access to the outside world that cannot be
* suppressed or intercepted without parsing and rewriting. Instead,
* this shim conservatively rejects any source text that seems to
* contain such an expression. To do this safely without parsing, we
* must also reject some valid programs, i.e., those containing
* apparent import expressions in literal strings or comments.
*
* The current conservative rule looks for the identifier "import"
* followed by either an open paren or something that looks like the
* beginning of a comment. We assume that we do not need to worry
* about html comment syntax because that was already rejected by
* rejectHtmlComments.
*
* this \s *must* match all kinds of syntax-defined whitespace. If e.g.
* U+2028 (LINE SEPARATOR) or U+2029 (PARAGRAPH SEPARATOR) is treated as
* whitespace by the parser, but not matched by /\s/, then this would admit
* an attack like: import\u2028('power.js') . We're trying to distinguish
* something like that from something like importnotreally('power.js') which
* is perfectly safe.
*
* @param {string} src
* @returns {string}
*/
export const rejectImportExpressions = src => {
const lineNumber = getLineNumber(src, importPattern);
if (lineNumber < 0) {
return src;
}
const name = getSourceURL(src);
// See https://github.com/endojs/endo/blob/master/packages/ses/error-codes/SES_IMPORT_REJECTED.md
throw SyntaxError(
`Possible import expression rejected at ${name}:${lineNumber}. (SES_IMPORT_REJECTED)`,
);
};
/**
* An optional transform to place ahead of `rejectImportExpressions` to evade
* *that* rejection. However, it may change the meaning of the program.
*
* This evasion replaces each suspicious `import` identifier with `__import__`.
* If the alleged import expression appears in a JavaScript comment, this
* evasion will not change the meaning of the program. If it appears in a
* literal (string literal, regexp literal, or a template literal), then this
* evasion will change the contents of that literal. If it appears as code
* where it would be parsed as an expression, then it might or might not change
* the meaning of the program, depending on the binding, if any, of the lexical
* variable `__import__`.
*
* @param {string} src
* @returns {string}
*/
export const evadeImportExpressionTest = src => {
const replaceFn = (_, p1, p2) => `${p1}__import__${p2}`;
return stringReplace(src, importPattern, replaceFn);
};
// /////////////////////////////////////////////////////////////////////////////
const someDirectEvalPattern = new FERAL_REG_EXP(
'(^|[^.])\\beval(\\s*\\()',
'g',
);
/**
* Heuristically reject some text that seems to contain a direct eval
* expression, with both false positives and false negavives. To reject without
* parsing, `rejectSomeDirectEvalExpressions` may will also reject some other
* text as well. It may also accept source text that contains a direct eval
* written oddly, such as `(eval)(src)`. This false negative is not a security
* vulnerability. Rather it is a compat hazard because it will execute as
* an indirect eval under the SES-shim but as a direct eval on platforms that
* support SES directly (like XS).
*
* The shim cannot correctly emulate a direct eval as explained at
* https://github.com/Agoric/realms-shim/issues/12
* If we did not reject direct eval syntax, we would
* accidentally evaluate these with an emulation of indirect eval. To
* prevent future compatibility problems, in shifting from use of the
* shim to genuine platform support for the proposal, we should
* instead statically reject code that seems to contain a direct eval
* expression.
*
* As with the dynamic import expression, to avoid a full parse, we do
* this approximately with a regexp, that will also reject strings
* that appear safely in comments or strings. Unlike dynamic import,
* if we miss some, this only creates future compat problems, not
* security problems. Thus, we are only trying to catch innocent
* occurrences, not malicious one. In particular, `(eval)(...)` is
* direct eval syntax that would not be caught by the following regexp.
*
* Exported for unit tests.
*
* @param {string} src
* @returns {string}
*/
export const rejectSomeDirectEvalExpressions = src => {
const lineNumber = getLineNumber(src, someDirectEvalPattern);
if (lineNumber < 0) {
return src;
}
const name = getSourceURL(src);
// See https://github.com/endojs/endo/blob/master/packages/ses/error-codes/SES_EVAL_REJECTED.md
throw SyntaxError(
`Possible direct eval expression rejected at ${name}:${lineNumber}. (SES_EVAL_REJECTED)`,
);
};
// /////////////////////////////////////////////////////////////////////////////
/**
* A transform that bundles together the transforms that must unconditionally
* happen last in order to ensure safe evaluation without parsing.
*
* @param {string} source
* @returns {string}
*/
export const mandatoryTransforms = source => {
source = rejectHtmlComments(source);
source = rejectImportExpressions(source);
return source;
};
/**
* Starting with `source`, apply each transform to the result of the
* previous one, returning the result of the last transformation.
*
* @param {string} source
* @param {((str: string) => string)[]} transforms
* @returns {string}
*/
export const applyTransforms = (source, transforms) => {
for (const transform of transforms) {
source = transform(source);
}
return source;
};
// export all as a frozen object
export const transforms = freeze({
rejectHtmlComments: freeze(rejectHtmlComments),
evadeHtmlCommentTest: freeze(evadeHtmlCommentTest),
rejectImportExpressions: freeze(rejectImportExpressions),
evadeImportExpressionTest: freeze(evadeImportExpressionTest),
rejectSomeDirectEvalExpressions: freeze(rejectSomeDirectEvalExpressions),
mandatoryTransforms: freeze(mandatoryTransforms),
applyTransforms: freeze(applyTransforms),
});