Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Individually track unsafe getters #29

Merged
merged 1 commit into from
Nov 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ lib/lexer.wasm: include-wasm/cjs-module-lexer.h src/lexer.c
@mkdir -p lib
../wasi-sdk-11.0/bin/clang src/lexer.c -I include-wasm --sysroot=../wasi-sdk-11.0/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \
-Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,--export=__heap_base,\
--export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ree \
--export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ru,--export=us,--export=ue \
-Wno-logical-op-parentheses -Wno-parentheses \
-Oz

Expand Down
68 changes: 46 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ EXPORTS_SPREAD: `...` (IDENTIFIER | REQUIRE)

EXPORTS_MEMBER: EXPORTS_DOT_ASSIGN | EXPORTS_LITERAL_COMPUTED_ASSIGN

EXPORTS_DEFINE: `Object` `.` `defineProperty `(` IDENTIFIER_STRING `, {`
EXPORTS_DEFINE: `Object` `.` `defineProperty `(` EXPORTS_IDENFITIER `,` IDENTIFIER_STRING

EXPORTS_DEFINE_VALUE: EXPORTS_DEFINE `, {`
(`enumerable: true,`)?
(
`value:` |
Expand Down Expand Up @@ -121,7 +123,9 @@ EXPORT_STAR_LIB: `Object.keys(` IDENTIFIER$1 `).forEach(function (` IDENTIFIER$2

Spacing between tokens is taken to be any ECMA-262 whitespace, ECMA-262 block comment or ECMA-262 line comment.

* The returned export names are taken to be the combination of the `IDENTIFIER` and `IDENTIFIER_STRING` slots for all `EXPORTS_MEMBER`, `EXPORTS_LITERAL` and `EXPORTS_DEFINE` matches.
* The returned export names are taken to be the combination of:
1. All `IDENTIFIER` and `IDENTIFIER_STRING` slots for `EXPORTS_MEMBER` and `EXPORTS_LITERAL` matches.
2. The first `IDENTIFIER_STRING` slot for all `EXPORTS_DEFINE_VALUE` matches where that same string is not an `EXPORTS_DEFINE` match that is not also an `EXPORTS_DEFINE_VALUE` match.
* The reexport specifiers are taken to be the the combination of:
1. The `REQUIRE` matches of the last matched of either `MODULE_EXPORTS_ASSIGN` or `EXPORTS_LITERAL`.
2. All _top-level_ `EXPORT_STAR` `REQUIRE` matches and `EXPORTS_ASSIGN` matches whose `IDENTIFIER` also matches the first `IDENTIFIER` in `EXPORT_STAR_LIB`.
Expand Down Expand Up @@ -162,6 +166,8 @@ It will in turn underclassify in cases where the identifiers are renamed:
})(exports);
```

#### Getter Exports Parsing

`Object.defineProperty` is detected for specifically value and getter forms returning an identifier or member expression:

```js
Expand All @@ -188,6 +194,24 @@ Object.defineProperty(exports, 'd', { value: 'd' });
Object.defineProperty(exports, '__esModule', { value: true });
```

To avoid matching getters that have side effects, any getter for an export name that does not support the forms above will
opt-out of the getter matching:

```js
// DETECTS: NO EXPORTS
Object.defineProperty(exports, 'a', {
value: 'no problem'
});

if (false) {
Object.defineProperty(module.exports, 'a', {
get () {
return dynamic();
}
})
}
```

Alternative object definition structures or getter function bodies are not detected:

```js
Expand Down Expand Up @@ -337,63 +361,63 @@ JS Build:

```
Module load time
> 5ms
> 4ms
Cold Run, All Samples
test/samples/*.js (3635 KiB)
> 323ms
> 299ms

Warm Runs (average of 25 runs)
test/samples/angular.js (1410 KiB)
> 14.84ms
> 13.96ms
test/samples/angular.min.js (303 KiB)
> 4.8ms
> 4.72ms
test/samples/d3.js (553 KiB)
> 7.84ms
> 6.76ms
test/samples/d3.min.js (250 KiB)
> 4ms
test/samples/magic-string.js (34 KiB)
> 0.72ms
> 0.64ms
test/samples/magic-string.min.js (20 KiB)
> 0.4ms
> 0ms
test/samples/rollup.js (698 KiB)
> 9.32ms
> 8.48ms
test/samples/rollup.min.js (367 KiB)
> 6.52ms
> 5.36ms

Warm Runs, All Samples (average of 25 runs)
test/samples/*.js (3635 KiB)
> 44ms
> 40.28ms
```

Wasm Build:
```
Module load time
> 11ms
> 10ms
Cold Run, All Samples
test/samples/*.js (3635 KiB)
> 42ms
> 43ms

Warm Runs (average of 25 runs)
test/samples/angular.js (1410 KiB)
> 9.92ms
> 9.32ms
test/samples/angular.min.js (303 KiB)
> 3.2ms
> 3.16ms
test/samples/d3.js (553 KiB)
> 5.2ms
> 5ms
test/samples/d3.min.js (250 KiB)
> 2.52ms
> 2.32ms
test/samples/magic-string.js (34 KiB)
> 0.16ms
test/samples/magic-string.min.js (20 KiB)
> 0.04ms
> 0ms
test/samples/rollup.js (698 KiB)
> 6.44ms
> 6.28ms
test/samples/rollup.min.js (367 KiB)
> 3.96ms
> 3.6ms

Warm Runs, All Samples (average of 25 runs)
test/samples/*.js (3635 KiB)
> 30.48ms
> 27.76ms
```

### Wasm Build Steps
Expand Down
2 changes: 1 addition & 1 deletion bench/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Promise.resolve().then(async () => {
console.log('Module load time');
{
const start = process.hrtime.bigint();
var { default: parse } = await import('../lexer.js');
var { parse } = await import('../lexer.js');
console.log(`> ${c.bold.green(Math.round(Number(process.hrtime.bigint() - start) / 1e6) + 'ms')}`);
}

Expand Down
39 changes: 38 additions & 1 deletion include-wasm/cjs-module-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ Slice* export_write_head = NULL;
Slice* first_reexport = NULL;
Slice* reexport_read_head = NULL;
Slice* reexport_write_head = NULL;
Slice* first_unsafe_getter = NULL;
Slice* unsafe_getter_read_head = NULL;
Slice* unsafe_getter_write_head = NULL;
void* analysis_base;
void* analysis_head;

Expand All @@ -48,6 +51,9 @@ const uint16_t* sa (uint32_t utf16Len) {
first_reexport = NULL;
reexport_write_head = NULL;
reexport_read_head = NULL;
first_unsafe_getter = NULL;
unsafe_getter_write_head = NULL;
unsafe_getter_read_head = NULL;
return source;
}

Expand All @@ -72,6 +78,14 @@ uint32_t res () {
uint32_t ree () {
return reexport_read_head->end - source;
}
// getUnsafeGetterStart
uint32_t us () {
return unsafe_getter_read_head->start - source;
}
// getUnsafeGetterEnd
uint32_t ue () {
return unsafe_getter_read_head->end - source;
}
// readExport
bool re () {
if (export_read_head == NULL)
Expand All @@ -92,6 +106,16 @@ bool rre () {
return false;
return true;
}
// readUnsafeGetter
bool ru () {
if (unsafe_getter_read_head == NULL)
unsafe_getter_read_head = first_unsafe_getter;
else
unsafe_getter_read_head = unsafe_getter_read_head->next;
if (unsafe_getter_read_head == NULL)
return false;
return true;
}

bool parse (uint32_t point);

Expand Down Expand Up @@ -119,14 +143,27 @@ void _addReexport (const uint16_t* start, const uint16_t* end) {
reexport->end = end;
reexport->next = NULL;
}
void _addUnsafeGetter (const uint16_t* start, const uint16_t* end) {
Slice* unsafe_getter = (Slice*)(analysis_head);
analysis_head = analysis_head + sizeof(Slice);
if (unsafe_getter_write_head == NULL)
first_unsafe_getter = unsafe_getter;
else
unsafe_getter_write_head->next = unsafe_getter;
unsafe_getter_write_head = unsafe_getter;
unsafe_getter->start = start;
unsafe_getter->end = end;
unsafe_getter->next = NULL;
}
void _clearReexports () {
reexport_write_head = NULL;
first_reexport = NULL;
}
void (*addExport)(const uint16_t*, const uint16_t*) = &_addExport;
void (*addReexport)(const uint16_t*, const uint16_t*) = &_addReexport;
void (*addUnsafeGetter)(const uint16_t*, const uint16_t*) = &_addUnsafeGetter;
void (*clearReexports)() = &_clearReexports;
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*clearReexports)());
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)());

enum RequireType {
Import,
Expand Down
2 changes: 1 addition & 1 deletion include/cjs-module-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ typedef struct StarExportBinding StarExportBinding;

void bail (uint32_t err);

bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*clearReexports)());
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)());

enum RequireType {
Import,
Expand Down
14 changes: 10 additions & 4 deletions lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ let openTokenDepth,
starExportMap,
lastStarExportSpecifier,
_exports,
unsafeGetters,
reexports;

function resetState () {
Expand All @@ -27,6 +28,7 @@ function resetState () {
lastStarExportSpecifier = null;

_exports = new Set();
unsafeGetters = new Set();
reexports = new Set();
}

Expand All @@ -47,7 +49,7 @@ function parseCJS (source, name = '@') {
e.loc = pos;
throw e;
}
const result = { exports: [..._exports], reexports: [...reexports] };
const result = { exports: [..._exports].filter(expt => !unsafeGetters.has(expt)), reexports: [...reexports] };
resetState();
return result;
}
Expand Down Expand Up @@ -260,6 +262,7 @@ function tryParseObjectDefineOrKeys (keys) {
pos++;
ch = commentWhitespace();
if (ch === 100/*d*/ && source.startsWith('efineProperty', pos + 1)) {
let expt;
while (true) {
pos += 14;
revertPos = pos - 1;
Expand All @@ -276,7 +279,7 @@ function tryParseObjectDefineOrKeys (keys) {
let quot = ch;
const exportPos = ++pos;
if (!identifier() || source.charCodeAt(pos) !== quot) break;
const expt = source.slice(exportPos, pos);
expt = source.slice(exportPos, pos);
pos++;
ch = commentWhitespace();
if (ch !== 44/*,*/) break;
Expand Down Expand Up @@ -304,9 +307,9 @@ function tryParseObjectDefineOrKeys (keys) {
pos += 5;
ch = commentWhitespace();
if (ch !== 58/*:*/) break;
pos++;
addExport(expt);
break;
pos = revertPos;
return;
}
else if (ch === 103/*g*/) {
if (!source.startsWith('et', pos + 1)) break;
Expand Down Expand Up @@ -372,6 +375,9 @@ function tryParseObjectDefineOrKeys (keys) {
}
break;
}
if (expt) {
unsafeGetters.add(expt);
}
}
else if (keys && ch === 107/*k*/ && source.startsWith('eys', pos + 1)) {
while (true) {
Expand Down
18 changes: 13 additions & 5 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,19 @@ const StarExportBinding* STAR_EXPORT_STACK_END = &starExportStack_[MAX_STAR_EXPO

void (*addExport)(const uint16_t*, const uint16_t*);
void (*addReexport)(const uint16_t*, const uint16_t*);
void (*addUnsafeGetter)(const uint16_t*, const uint16_t*);
void (*clearReexports)();

// Note: parsing is based on the _assumption_ that the source is already valid
bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) {
bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) {
source = _source;
sourceLen = _sourceLen;
if (_addExport)
addExport = _addExport;
if (_addReexport)
addReexport = _addReexport;
if (_addUnsafeGetter)
addUnsafeGetter = _addUnsafeGetter;

templateStackDepth = 0;
openTokenDepth = 0;
Expand Down Expand Up @@ -272,6 +275,8 @@ void tryParseObjectDefineOrKeys (bool keys) {
pos++;
ch = commentWhitespace();
if (ch == 'd' && str_eq13(pos + 1, 'e', 'f', 'i', 'n', 'e', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'y')) {
uint16_t* exportStart = 0;
uint16_t* exportEnd = 0;
while (true) {
pos += 14;
revertPos = pos - 1;
Expand All @@ -286,9 +291,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
ch = commentWhitespace();
if (ch != '\'' && ch != '"') break;
uint16_t quot = ch;
uint16_t* exportStart = ++pos;
exportStart = ++pos;
if (!identifier(*pos) || *pos != quot) break;
uint16_t* exportEnd = pos;
exportEnd = pos;
pos++;
ch = commentWhitespace();
if (ch != ',') break;
Expand Down Expand Up @@ -316,9 +321,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
pos += 5;
ch = commentWhitespace();
if (ch != ':') break;
pos++;
addExport(exportStart, exportEnd);
break;
pos = revertPos;
return;
}
else if (ch == 'g') {
if (!str_eq2(pos + 1, 'e', 't')) break;
Expand Down Expand Up @@ -388,6 +393,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
}
break;
}
if (exportEnd > 0) {
addUnsafeGetter(exportStart, exportEnd);
}
}
else if (keys && ch == 'k' && str_eq3(pos + 1, 'e', 'y', 's')) {
while (true) {
Expand Down
9 changes: 6 additions & 3 deletions src/lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@ export function parse (source, name = '@') {
const addr = wasm.sa(len);
(isLE ? copyLE : copyBE)(source, new Uint16Array(wasm.memory.buffer, addr, len));

if (!wasm.parseCJS(addr, source.length, 0, 0))
if (!wasm.parseCJS(addr, source.length, 0, 0, 0))
throw Object.assign(new Error(`Parse error ${name}${wasm.e()}:${source.slice(0, wasm.e()).split('\n').length}:${wasm.e() - source.lastIndexOf('\n', wasm.e() - 1)}`), { idx: wasm.e() });

let exports = new Set(), reexports = new Set();
let exports = new Set(), reexports = new Set(), unsafeGetters = new Set();

while (wasm.rre())
reexports.add(source.slice(wasm.res(), wasm.ree()));
while (wasm.ru())
unsafeGetters.add(source.slice(wasm.us(), wasm.ue()));
while (wasm.re()) {
let exptStr = source.slice(wasm.es(), wasm.ee());
if (!strictReserved.has(exptStr))
if (!strictReserved.has(exptStr) && !unsafeGetters.has(exptStr))
exports.add(exptStr);
}

Expand Down
Loading