Skip to content

Fix lexer bug involving template literals #106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/Language/JavaScript/Parser/Grammar7.y
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ module Language.JavaScript.Parser.Grammar7
) where

import Data.Char
import Data.Functor (($>))
import Language.JavaScript.Parser.Lexer
import Language.JavaScript.Parser.ParserMonad
import Language.JavaScript.Parser.SrcLocation
Expand Down Expand Up @@ -505,8 +506,16 @@ TemplateLiteral : 'tmplnosub' { JSUntaggedTemplate (mkJSAnnot $1) (
| 'tmplhead' TemplateParts { JSUntaggedTemplate (mkJSAnnot $1) (tokenLiteral $1) $2 }

TemplateParts :: { [AST.JSTemplatePart] }
TemplateParts : Expression 'tmplmiddle' TemplateParts { AST.JSTemplatePart $1 (mkJSAnnot $2) (tokenLiteral $2) : $3 }
| Expression 'tmpltail' { AST.JSTemplatePart $1 (mkJSAnnot $2) (tokenLiteral $2) : [] }
TemplateParts : TemplateExpression RBrace 'tmplmiddle' TemplateParts { AST.JSTemplatePart $1 $2 ('}' : tokenLiteral $3) : $4 }
| TemplateExpression RBrace 'tmpltail' { AST.JSTemplatePart $1 $2 ('}' : tokenLiteral $3) : [] }

-- This production only exists to ensure that inTemplate is set to True before
-- a tmplmiddle or tmpltail token is lexed. Since the lexer is always one token
-- ahead of the parser, setInTemplate needs to be called during a reduction
-- that is *two* tokens behind tmplmiddle/tmpltail. Accordingly,
-- TemplateExpression is always followed by an RBrace, which is lexed normally.
TemplateExpression :: { AST.JSExpression }
TemplateExpression : Expression {% setInTemplate True \$> $1 }

-- ArrayLiteral : See 11.1.4
-- [ Elisionopt ]
Expand Down
21 changes: 16 additions & 5 deletions src/Language/JavaScript/Parser/Lexer.x
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module Language.JavaScript.Parser.Lexer
, alexError
, runAlex
, alexTestTokeniser
, setInTemplate
) where

import Language.JavaScript.Parser.LexerUtils
Expand Down Expand Up @@ -199,8 +200,9 @@ $ZWJ = [\x200d]
tokens :-

-- State: 0 is regex allowed, 1 is / or /= allowed
-- 2 is a special state for parsing characters inside templates

<0> () ; -- { registerStates lexToken reg divide }
<0> () ; -- { registerStates lexToken reg divide template }

-- Skip Whitespace
<reg,divide> $white_char+ { adapt (mkString wsToken) }
Expand Down Expand Up @@ -256,8 +258,8 @@ tokens :-

<reg,divide> "`" @TemplateCharacters "`" { adapt (mkString' NoSubstitutionTemplateToken) }
<reg,divide> "`" @TemplateCharacters "${" { adapt (mkString' TemplateHeadToken) }
<reg,divide> "}" @TemplateCharacters "${" { adapt (mkString' TemplateMiddleToken) }
<reg,divide> "}" @TemplateCharacters "`" { adapt (mkString' TemplateTailToken) }
<template> @TemplateCharacters "${" { adapt (mkString' TemplateMiddleToken) }
<template> @TemplateCharacters "`" { adapt (mkString' TemplateTailToken) }



Expand Down Expand Up @@ -387,8 +389,11 @@ lexToken = do
lt <- getLastToken
case lt of
TailToken {} -> alexEOF
_other ->
case alexScan inp (classifyToken lt) of
_other -> do
isInTmpl <- getInTemplate
let state = if isInTmpl then template else classifyToken lt
setInTemplate False -- the inTemplate condition only needs to last for one token
case alexScan inp state of
AlexEOF -> do
tok <- tailToken
setLastToken tok
Expand Down Expand Up @@ -491,6 +496,12 @@ addComment c = Alex $ \s -> Right (s{alex_ust=(alex_ust s){comment=c:( comment
setComment :: [Token] -> Alex ()
setComment cs = Alex $ \s -> Right (s{alex_ust=(alex_ust s){comment=cs }}, ())

getInTemplate :: Alex Bool
getInTemplate = Alex $ \s@AlexState{alex_ust=ust} -> Right (s, inTemplate ust)

setInTemplate :: Bool -> Alex ()
setInTemplate it = Alex $ \s -> Right (s{alex_ust=(alex_ust s){inTemplate=it}}, ())

alexEOF :: Alex Token
alexEOF = return (EOFToken tokenPosnEmpty [])

Expand Down
2 changes: 2 additions & 0 deletions src/Language/JavaScript/Parser/ParserMonad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ import Language.JavaScript.Parser.SrcLocation
data AlexUserState = AlexUserState
{ previousToken :: !Token -- ^the previous token
, comment :: [Token] -- ^the previous comment, if any
, inTemplate :: Bool -- ^whether the parser is expecting template characters
}

alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState
{ previousToken = initToken
, comment = []
, inTemplate = False
}

initToken :: Token
Expand Down
2 changes: 2 additions & 0 deletions test/Test/Language/Javascript/RoundTrip.hs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ testRoundTrip = describe "Roundtrip:" $ do
testRT "/*a*/`<${/*b*/x/*c*/}>`/*d*/"
testRT "`\\${}`"
testRT "`\n\n`"
testRT "{}+``"
-- ^ https://github.com/erikd/language-javascript/issues/104


it "statement" $ do
Expand Down