Skip to content

Commit

Permalink
Fix JS Scanner (#57)
Browse files Browse the repository at this point in the history
* fix: refactor js_scanner to split at first non-exported token

* chore: add changeset

* refactor: add missing utils

* refactor: cleanup printer tests to match new render body behavior

* fix: lint

* Gets tests passing

Co-authored-by: Matthew Phillips <matthew@skypack.dev>
  • Loading branch information
natemoo-re and matthewp authored Oct 14, 2021
1 parent d11c8c2 commit 7ab9148
Show file tree
Hide file tree
Showing 8 changed files with 293 additions and 103 deletions.
5 changes: 5 additions & 0 deletions .changeset/wise-onions-hang.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@astrojs/compiler": patch
---

Improve JS scanning algorithm to be more fault tolerant, less error prone
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/lithammer/dedent v1.1.0
github.com/norunners/vert v0.0.0-20210320050952-39b24b3cdf94
github.com/tdewolff/parse v2.3.4+incompatible
github.com/tdewolff/test v1.0.6 // indirect
github.com/tdewolff/parse/v2 v2.5.22
golang.org/x/net v0.0.0-20210716203947-853a461950ff
)

Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ github.com/natemoo-re/vert v0.0.0-natemoo-re.7 h1:nhfKslS16o2Uruqt8Bwv8ZFYUuf+PW
github.com/natemoo-re/vert v0.0.0-natemoo-re.7/go.mod h1:67MuD9cDWe6pmhyQrElFlSNMMzL0CMUdFURKxJSvxUM=
github.com/tdewolff/parse v2.3.4+incompatible h1:x05/cnGwIMf4ceLuDMBOdQ1qGniMoxpP46ghf0Qzh38=
github.com/tdewolff/parse v2.3.4+incompatible/go.mod h1:8oBwCsVmUkgHO8M5iCzSIDtpzXOT0WXX9cWhz+bIzJQ=
github.com/tdewolff/parse/v2 v2.5.22 h1:KXMHTyx4VTL6Zu9a94SULQalDMvtP5FQq10mnSfaoGs=
github.com/tdewolff/parse/v2 v2.5.22/go.mod h1:WzaJpRSbwq++EIQHYIRTpbYKNA3gn9it1Ik++q4zyho=
github.com/tdewolff/test v1.0.6 h1:76mzYJQ83Op284kMT+63iCNCI7NEERsIN8dLM+RiKr4=
github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
golang.org/x/net v0.0.0-20210716203947-853a461950ff h1:j2EK/QoxYNBsXI4R7fQkkRUk8y6wnOBI+6hgPdP/6Ds=
Expand Down
210 changes: 136 additions & 74 deletions internal/js_scanner/js_scanner.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
package js_scanner

import (
"io"

"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/js"
)

// An ImportType is the type of import.
type ImportType uint32

Expand All @@ -19,64 +26,145 @@ type ImportStatement struct {
var source []byte
var pos int

// TODO: ignore `await` inside of function bodies
func FindRenderBody(_source []byte) int {
source = _source
pos = 0
lastBr := 0
for ; pos < len(source)-1; pos++ {
c := readCommentWhitespace(false)
switch true {
case isBr(c) || c == ';':
// Track the last position of a linebreak of ;
// This is a rough proxy for "end of previous statement"
lastBr = pos
case c == 'A':
// If we access the Astro global, we're in the function body
if isKeywordStart() && str_eq5('A', 's', 't', 'r', 'o') {
return lastBr + 1
// This function returns the index at which we should split the frontmatter.
// The first slice contains any top-level imports/exports, which are global.
// The second slice contains any non-exported declarations, which are scoped to the render body
//
// Why use a lexical scan here?
// 1. We can stop lexing as soon as we hit a non-exported token
// 2. Lexing supports malformed modules, they'll throw at runtime instead of compilation
// 3. `tdewolff/parse/v2` doesn't support TypeScript parsing yet, but we can lex it fine
func FindRenderBody(source []byte) int {
l := js.NewLexer(parse.NewInputBytes(source))
i := 0
pairs := make(map[byte]int)

// Let's lex the script until we find what we need!
for {
token, value := l.Next()
openPairs := pairs['{'] > 0 || pairs['('] > 0 || pairs['['] > 0

if token == js.ErrorToken {
if l.Err() != io.EOF {
return -1
}
case c == 'a':
// If we have to await something, we're in the function body
if isKeywordStart() && str_eq5('a', 'w', 'a', 'i', 't') {
return lastBr + 1
break
}

// Common delimeters. Track their length, then skip.
if token == js.WhitespaceToken || token == js.LineTerminatorToken || token == js.SemicolonToken {
i += len(value)
continue
}

// Imports should be consumed up until we find a specifier,
// then we can exit after the following line terminator or semicolon
if token == js.ImportToken {
i += len(value)
foundSpecifier := false
for {
next, nextValue := l.Next()
i += len(nextValue)
if next == js.StringToken {
foundSpecifier = true
}
if foundSpecifier && (next == js.LineTerminatorToken || next == js.SemicolonToken) {
break
}
}
case c == '/':
if str_eq2('/', '/') {
readLineComment()
continue
} else if str_eq2('/', '*') {
readBlockComment(true)
continue
}

// Exports should be consumed until all opening braces are closed,
// a specifier is found, and a line terminator has been found
if token == js.ExportToken {
foundIdentifier := false
foundSemicolonOrLineTerminator := false
i += len(value)
for {
next, nextValue := l.Next()
i += len(nextValue)
if js.IsIdentifier(next) {
foundIdentifier = true
} else if next == js.LineTerminatorToken || next == js.SemicolonToken {
foundSemicolonOrLineTerminator = true
} else if js.IsPunctuator(next) {
if nextValue[0] == '{' || nextValue[0] == '(' || nextValue[0] == '[' {
pairs[nextValue[0]]++
} else if nextValue[0] == '}' {
pairs['{']--
} else if nextValue[0] == ')' {
pairs['(']--
} else if nextValue[0] == ']' {
pairs['[']--
}
}

if foundIdentifier && foundSemicolonOrLineTerminator && pairs['{'] == 0 && pairs['('] == 0 && pairs['['] == 0 {
break
}
}
continue
}

// Track opening and closing braces
if js.IsPunctuator(token) {
if value[0] == '{' || value[0] == '(' || value[0] == '[' {
pairs[value[0]]++
i += len(value)
continue
} else if value[0] == '}' {
pairs['{']--
} else if value[0] == ')' {
pairs['(']--
} else if value[0] == ']' {
pairs['[']--
}
}

// If there are no open pairs and we hit a reserved word (var/let/const/async/function)
// return our index! This is the first non-exported declaration
if !openPairs && js.IsReservedWord(token) {
return i
}

// Track our current position
i += len(value)
}
return -1

// If we haven't found anything... there's nothing to find! Split at the start.
return i
}

func HasExports(_source []byte) bool {
source = _source
pos = 0
for ; pos < len(source)-1; pos++ {
c := readCommentWhitespace(true)
switch true {
case c == 'e':
if isKeywordStart() && str_eq6('e', 'x', 'p', 'o', 'r', 't') {
return true
}
case c == '/':
if str_eq2('/', '/') {
readLineComment()
continue
} else if str_eq2('/', '*') {
readBlockComment(true)
continue
}
func HasExports(source []byte) bool {
l := js.NewLexer(parse.NewInputBytes(source))
for {
token, _ := l.Next()
if token == js.ErrorToken {
// EOF or other error
return false
}
if token == js.ExportToken {
return true
}
}
}

func AccessesPrivateVars(source []byte) bool {
l := js.NewLexer(parse.NewInputBytes(source))
for {
token, value := l.Next()
if token == js.ErrorToken {
// EOF or other error
return false
}
if js.IsIdentifier(token) && len(value) > 1 && value[0] == '$' && value[1] == '$' {
return true
}
}
return false
}

// TODO: refactor to use lexer!
func NextImportSpecifier(_source []byte, _pos int) (int, string) {
source = _source
pos = _pos
Expand Down Expand Up @@ -128,30 +216,8 @@ MainLoop:
}
}

// TODO: check for access to $$vars
func AccessesPrivateVars(_source []byte) bool {
source = _source
pos = 0
for ; pos < len(source)-1; pos++ {
c := readCommentWhitespace(true)
switch true {
// case c == '$':
// fmt.Println(str_eq2('$', '$'))
// if isKeywordStart() && str_eq2('$', '$') {
// return true
// }
case c == '/':
if str_eq2('/', '/') {
readLineComment()
continue
} else if str_eq2('/', '*') {
readBlockComment(true)
continue
}
}
}
return false
}
// The following utilities are adapted from https://github.com/guybedford/es-module-lexer
// Released under the MIT License (C) 2018-2021 Guy Bedford

// Note: non-asii BR and whitespace checks omitted for perf / footprint
// if there is a significant user need this can be reconsidered
Expand Down Expand Up @@ -183,10 +249,6 @@ func str_eq2(c1 byte, c2 byte) bool {
return len(source[pos:]) >= 2 && source[pos+1] == c2 && source[pos] == c1
}

func str_eq5(c1 byte, c2 byte, c3 byte, c4 byte, c5 byte) bool {
return len(source[pos:]) >= 5 && source[pos+4] == c5 && source[pos+3] == c4 && source[pos+2] == c3 && source[pos+1] == c2 && source[pos] == c1
}

func str_eq6(c1 byte, c2 byte, c3 byte, c4 byte, c5 byte, c6 byte) bool {
return len(source[pos:]) >= 6 && source[pos+5] == c6 && source[pos+4] == c5 && source[pos+3] == c4 && source[pos+2] == c3 && source[pos+1] == c2 && source[pos] == c1
}
Expand Down
117 changes: 117 additions & 0 deletions internal/js_scanner/js_scanner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package js_scanner

import (
"fmt"
"testing"
)

func TestFindRenderBody(t *testing.T) {
// note: the tests have hashes inlined because it’s easier to read
// note: this must be valid CSS, hence the empty "{}"
tests := []struct {
name string
source string
want int
}{
{
name: "basic",
source: `const value = "test"`,
want: 0,
},
{
name: "import",
source: `import { fn } from "package";
const b = await fetch();`,
want: 30,
},
{
name: "big import",
source: `import {
a,
b,
c,
d,
} from "package"
const b = await fetch();`,
want: 44,
},
{
name: "import with comment",
source: `// comment
import { fn } from "package";
const b = await fetch();`,
want: 41,
},
{
name: "import assertion",
source: `// comment
import { fn } from "package" assert { it: 'works' };
const b = await fetch();`,
want: 64,
},
{
name: "import assertion",
source: `// comment
import {
fn
} from
"package"
assert {
it: 'works'
};
const b = await fetch();`,
want: 74,
},
{
name: "import/export",
source: `import { fn } from "package";
export async fn() {}
const b = await fetch()`,
want: 51,
},
{
name: "getStaticPaths",
source: `import { fn } from "package";
export async function getStaticPaths() {
const content = Astro.fetchContent('**/*.md');
}
const b = await fetch()`,
want: 121,
},
{
name: "getStaticPaths with comments",
source: `import { fn } from "package";
export async function getStaticPaths() {
const content = Astro.fetchContent('**/*.md');
}
const b = await fetch()`,
want: 121,
},
{
name: "getStaticPaths with semicolon",
source: `import { fn } from "package";
export async function getStaticPaths() {
const content = Astro.fetchContent('**/*.md');
}; const b = await fetch()`,
want: 122,
},
{
name: "multiple imports",
source: `import { a } from "a";
import { b } from "b";
import { c } from "c";
const d = await fetch()`,
want: 69,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := FindRenderBody([]byte(tt.source))
if tt.want != got {
t.Error(fmt.Sprintf("\nFAIL: %s\n want: %v\n got: %v", tt.name, tt.want, got))
fmt.Println(tt.source[got:])
}
})
}
}
Loading

0 comments on commit 7ab9148

Please sign in to comment.