Skip to content

Commit

Permalink
Allow multiline strings to work properly with \r\n
Browse files Browse the repository at this point in the history
They are always encoded in the end result as just \n
  • Loading branch information
mbartlett21 committed Aug 9, 2024
1 parent 6f261f1 commit 4961a54
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 21 deletions.
58 changes: 39 additions & 19 deletions compiler/src/Parse/String.hs
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,18 @@ finalize start end revChunks =
then revChunks
else ES.Slice start (minusPtr end start) : revChunks

dropMultiStringEndingNewline :: [ES.Chunk] -> [ES.Chunk]
dropMultiStringEndingNewline revChunks =
case revChunks of
(ES.Escape 0x6E) : rest ->
rest
_ ->
revChunks
finalizeMultiString :: Ptr Word8 -> Ptr Word8 -> [ES.Chunk] -> ES.String
finalizeMultiString start end revChunks =
ES.fromChunks $
reverse $
if start == end
then {- Get rid of ending newline before """ -}
case revChunks of
(ES.Escape 0x6E) : rest ->
rest
_ ->
revChunks
else ES.Slice start (minusPtr end start) : revChunks

addEscape :: ES.Chunk -> Ptr Word8 -> Ptr Word8 -> [ES.Chunk] -> [ES.Chunk]
addEscape chunk start end revChunks =
Expand Down Expand Up @@ -179,7 +184,21 @@ multiString pos end row _ _ sr sc =
then
let !pos1 = plusPtr pos 1
in countLeadingWhiteSpaceThenMultiString 0 pos1 end (row + 1) 1 pos1 sr sc
else Err sr sc E.StringMultilineWithoutLeadingNewline
else
if word == 0x0D {- \r -}
then
if plusPtr pos 1 >= end
then Err sr sc E.StringEndless_Multi
else
let !word1 = P.unsafeIndex (plusPtr pos 1)
in if word1 == 0x0A {- \n -}
then
let !pos2 = plusPtr pos 2
in countLeadingWhiteSpaceThenMultiString 0 pos2 end (row + 1) 1 pos2 sr sc
else
Err sr sc E.StringInvalidNewline
else
Err sr sc E.StringMultilineWithoutLeadingNewline

countLeadingWhiteSpaceThenMultiString :: Int -> Ptr Word8 -> Ptr Word8 -> Row -> Col -> Ptr Word8 -> Row -> Col -> StringResult
countLeadingWhiteSpaceThenMultiString count pos end row col initialPos sr sc =
Expand Down Expand Up @@ -214,10 +233,7 @@ multiStringBody leadingWhitespace pos end row col initialPos sr sc revChunks =
in if word == 0x22 {- " -} && isDoubleQuote (plusPtr pos 1) end && isDoubleQuote (plusPtr pos 2) end
then
Ok (plusPtr pos 3) row (col + 3) ES.MultilineString $
finalize initialPos pos $
if initialPos == pos
then dropMultiStringEndingNewline revChunks
else revChunks {- only trim the last newline -}
finalizeMultiString initialPos pos revChunks
else
if word == 0x27 {- ' -}
then
Expand All @@ -233,9 +249,17 @@ multiStringBody leadingWhitespace pos end row col initialPos sr sc revChunks =
else
if word == 0x0D {- \r -}
then
let !pos1 = plusPtr pos 1
in dropLeadingWhiteSpaceThenMultiString 0 leadingWhitespace pos1 end row col pos1 sr sc $
addEscape carriageReturn initialPos pos revChunks
if plusPtr pos 1 >= end
then Err sr sc E.StringEndless_Multi
else
let !word1 = P.unsafeIndex (plusPtr pos 1)
in if word1 == 0x0A {- \n -}
then
let !pos2 = plusPtr pos 2
in dropLeadingWhiteSpaceThenMultiString 0 leadingWhitespace pos2 end (row + 1) 1 pos2 sr sc $
addEscape newline initialPos pos revChunks
else
Err row col E.StringInvalidNewline
else
if word == 0x5C {- \ -}
then case eatEscape (plusPtr pos 1) end row col of
Expand Down Expand Up @@ -310,10 +334,6 @@ newline :: ES.Chunk
newline =
ES.Escape 0x6E {-n-}

carriageReturn :: ES.Chunk
carriageReturn =
ES.Escape 0x72 {-r-}

placeholder :: ES.Chunk
placeholder =
ES.CodePoint 0xFFFD {-replacement character-}
17 changes: 15 additions & 2 deletions compiler/src/Reporting/Error/Syntax.hs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ data String
| StringEndless_Multi
| StringEscape Escape
| StringMultilineWithoutLeadingNewline
| StringInvalidNewline
deriving (Show)

data Escape
Expand Down Expand Up @@ -2980,13 +2981,25 @@ toStringReport source string row col =
source
region
Nothing
( D.reflow "The contents of a multiline sting must start on a new line",
( D.reflow "The contents of a multiline string must start on a new line",
D.stack
[ D.reflow "Add a \"\"\" a new line right after the opening quotes.",
[ D.reflow "Add a new line right after the opening quotes.",
D.toSimpleNote "Here is a valid multi-line string for reference:",
D.dullyellow $ D.indent 4 validMultilineStringExample
]
)
StringInvalidNewline ->
let region = toRegion row col
in Report.Report "MULTILINE STRING WITH INVALID NEWLINE" region [] $
Code.toSnippet
source
region
Nothing
( D.reflow "New lines in multiline strings must be either \\n or \\r\\n",
D.stack
[ D.reflow "Make sure newlines are set to either Windows (\\r\\n) or Unix (\\n)."
]
)

validMultilineStringExample :: D.Doc
validMultilineStringExample =
Expand Down
30 changes: 30 additions & 0 deletions tests/Parse/MultilineStringSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,21 @@ spec = do
"normal string"
"\"\"\"\nnormal string\"\"\""

it "crlf regression test" $ do
parse
"normal string"
"\"\"\"\r\nnormal string\"\"\""

it "no ending newline works" $ do
parse
"this is \\na test \\nfor newlines"
"\"\"\"\nthis is \na test \nfor newlines\"\"\""

it "crlfs work" $ do
parse
"this is\\na test"
"\"\"\"\r\n this is\r\n a test\r\n\"\"\""

it "mixing quotes work" $ do
parse
"string with \" in it"
Expand All @@ -41,6 +51,16 @@ spec = do
"this is\\n a test"
"\"\"\"\n this is\n a test\n\"\"\""

it "First proper line decides how many spaces to drop for crlf" $ do
parse
"this is\\n a test"
"\"\"\"\r\n this is\r\n a test\r\n\"\"\""

it "Works with differing lines" $ do
parse
"this is\\n a test"
"\"\"\"\n this is\r\n a test\n\"\"\""

it "Only leading spaces are dropped" $ do
parse
"this is\\na test"
Expand All @@ -51,6 +71,16 @@ spec = do
isCorrectError _ = False
Helpers.checkParseError Expression.expression ExpressionBadEnd isCorrectError "\"\"\"this is not allowed\"\"\""

it "does not allow CR without LF on the first line" $ do
let isCorrectError ((Error.Syntax.String Error.Syntax.StringInvalidNewline _ _)) = True
isCorrectError _ = False
Helpers.checkParseError Expression.expression ExpressionBadEnd isCorrectError "\"\"\"\rthis is not allowed\"\"\""

it "does not allow CR without LF on the other lines" $ do
let isCorrectError ((Error.Syntax.String Error.Syntax.StringInvalidNewline _ _)) = True
isCorrectError _ = False
Helpers.checkParseError Expression.expression ExpressionBadEnd isCorrectError "\"\"\"\nthis\ris not allowed\"\"\""

parse :: String -> BS.ByteString -> IO ()
parse expectedStr =
let isExpectedString :: Src.Pattern_ -> Bool
Expand Down

0 comments on commit 4961a54

Please sign in to comment.