Skip to content

Commit 200df34

Browse files
committed
Fix lexer bug involving template literals
In any program where a `}` character occurred anywhere before a template literal, the lexer was incorrectly tokenizing the `}` and all characters up through the initial backquote as a template tail. By adding an additional piece of state to the parser monad, the lexer now will only recognize a TemplateMiddleToken or a TemplateTailToken if the parser is expecting one.
1 parent b2f4cfa commit 200df34

File tree

4 files changed

+31
-7
lines changed

4 files changed

+31
-7
lines changed

src/Language/JavaScript/Parser/Grammar7.y

+11-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ module Language.JavaScript.Parser.Grammar7
99
) where
1010

1111
import Data.Char
12+
import Data.Functor (($>))
1213
import Language.JavaScript.Parser.Lexer
1314
import Language.JavaScript.Parser.ParserMonad
1415
import Language.JavaScript.Parser.SrcLocation
@@ -505,8 +506,16 @@ TemplateLiteral : 'tmplnosub' { JSUntaggedTemplate (mkJSAnnot $1) (
505506
| 'tmplhead' TemplateParts { JSUntaggedTemplate (mkJSAnnot $1) (tokenLiteral $1) $2 }
506507

507508
TemplateParts :: { [AST.JSTemplatePart] }
508-
TemplateParts : Expression 'tmplmiddle' TemplateParts { AST.JSTemplatePart $1 (mkJSAnnot $2) (tokenLiteral $2) : $3 }
509-
| Expression 'tmpltail' { AST.JSTemplatePart $1 (mkJSAnnot $2) (tokenLiteral $2) : [] }
509+
TemplateParts : TemplateExpression RBrace 'tmplmiddle' TemplateParts { AST.JSTemplatePart $1 $2 ("}" <> tokenLiteral $3) : $4 }
510+
| TemplateExpression RBrace 'tmpltail' { AST.JSTemplatePart $1 $2 ("}" <> tokenLiteral $3) : [] }
511+
512+
-- This production only exists to ensure that inTemplate is set to True before
513+
-- a tmplmiddle or tmpltail token is lexed. Since the lexer is always one token
514+
-- ahead of the parser, setInTemplate needs to be called during a reduction
515+
-- that is *two* tokens behind tmplmiddle/tmpltail. Accordingly,
516+
-- TemplateExpression is always followed by an RBrace, which is lexed normally.
517+
TemplateExpression :: { AST.JSExpression }
518+
TemplateExpression : Expression {% setInTemplate True \$> $1 }
510519

511520
-- ArrayLiteral : See 11.1.4
512521
-- [ Elisionopt ]

src/Language/JavaScript/Parser/Lexer.x

+16-5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ module Language.JavaScript.Parser.Lexer
1515
, alexError
1616
, runAlex
1717
, alexTestTokeniser
18+
, setInTemplate
1819
) where
1920

2021
import Language.JavaScript.Parser.LexerUtils
@@ -199,8 +200,9 @@ $ZWJ = [\x200d]
199200
tokens :-
200201
201202
-- State: 0 is regex allowed, 1 is / or /= allowed
203+
-- 2 is a special state for parsing characters inside templates
202204
203-
<0> () ; -- { registerStates lexToken reg divide }
205+
<0> () ; -- { registerStates lexToken reg divide template }
204206
205207
-- Skip Whitespace
206208
<reg,divide> $white_char+ { adapt (mkString wsToken) }
@@ -256,8 +258,8 @@ tokens :-
256258
257259
<reg,divide> "`" @TemplateCharacters "`" { adapt (mkString' NoSubstitutionTemplateToken) }
258260
<reg,divide> "`" @TemplateCharacters "${" { adapt (mkString' TemplateHeadToken) }
259-
<reg,divide> "}" @TemplateCharacters "${" { adapt (mkString' TemplateMiddleToken) }
260-
<reg,divide> "}" @TemplateCharacters "`" { adapt (mkString' TemplateTailToken) }
261+
<template> @TemplateCharacters "${" { adapt (mkString' TemplateMiddleToken) }
262+
<template> @TemplateCharacters "`" { adapt (mkString' TemplateTailToken) }
261263
262264
263265
@@ -387,8 +389,11 @@ lexToken = do
387389
lt <- getLastToken
388390
case lt of
389391
TailToken {} -> alexEOF
390-
_other ->
391-
case alexScan inp (classifyToken lt) of
392+
_other -> do
393+
isInTmpl <- getInTemplate
394+
let state = if isInTmpl then template else classifyToken lt
395+
setInTemplate False -- the inTemplate condition only needs to last for one token
396+
case alexScan inp state of
392397
AlexEOF -> do
393398
tok <- tailToken
394399
setLastToken tok
@@ -491,6 +496,12 @@ addComment c = Alex $ \s -> Right (s{alex_ust=(alex_ust s){comment=c:( comment
491496
setComment :: [Token] -> Alex ()
492497
setComment cs = Alex $ \s -> Right (s{alex_ust=(alex_ust s){comment=cs }}, ())
493498
499+
getInTemplate :: Alex Bool
500+
getInTemplate = Alex $ \s@AlexState{alex_ust=ust} -> Right (s, inTemplate ust)
501+
502+
setInTemplate :: Bool -> Alex ()
503+
setInTemplate it = Alex $ \s -> Right (s{alex_ust=(alex_ust s){inTemplate=it}}, ())
504+
494505
alexEOF :: Alex Token
495506
alexEOF = return (EOFToken tokenPosnEmpty [])
496507

src/Language/JavaScript/Parser/ParserMonad.hs

+2
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,14 @@ import Language.JavaScript.Parser.SrcLocation
2121
data AlexUserState = AlexUserState
2222
{ previousToken :: !Token -- ^the previous token
2323
, comment :: [Token] -- ^the previous comment, if any
24+
, inTemplate :: Bool -- ^whether the parser is expecting template characters
2425
}
2526

2627
alexInitUserState :: AlexUserState
2728
alexInitUserState = AlexUserState
2829
{ previousToken = initToken
2930
, comment = []
31+
, inTemplate = False
3032
}
3133

3234
initToken :: Token

test/Test/Language/Javascript/RoundTrip.hs

+2
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ testRoundTrip = describe "Roundtrip:" $ do
8585
testRT "/*a*/`<${/*b*/x/*c*/}>`/*d*/"
8686
testRT "`\\${}`"
8787
testRT "`\n\n`"
88+
testRT "{}+``"
89+
-- ^ https://github.com/erikd/language-javascript/issues/104
8890

8991

9092
it "statement" $ do

0 commit comments

Comments
 (0)