From d5f1dcae064d47c4aa75281248c8021a2ed03654 Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Thu, 3 Jun 2021 16:55:19 +0200 Subject: [PATCH] allow escaping newlines with `\` inside strings (#40753) This allows the use of `\` in front of newlines inside non-raw/non-custom string or command literals as a line continuation character, so the following newline is ignored. This way, long strings without any newlines in them don't have to be written in a single line or be broken up. I think we might also want to use this to improve the printing of long strings in the REPL by printing them as multiline strings, making use of `\` for long lines if necessary, but that can be discussed separately. The command literal part is technically breaking, but the current behavior is probably unintuitive enough that this can be considered a minor change. For string literals, this should be entirely non-breaking since a single `\` before a newline currently throws a parsing error. closes #37728 --- NEWS.md | 5 ++ base/shell.jl | 9 ++-- doc/src/manual/strings.md | 17 +++++++ src/julia-parser.scm | 42 +++++++++++++---- test/syntax.jl | 97 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index ab7192af1fb87..2e10b875e9af5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,9 @@ New language features in `[A; B]` has always described concatenating along the first dimension (vertically), now two semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the third, and so on. ([#33697]) +* A backslash (`\`) before a newline inside a string literal now removes the newline while also + respecting indentation. This can be used to split up long strings without newlines into multiple + lines of code. ([#40753]) Language changes ---------------- @@ -114,6 +117,8 @@ Standard library changes * `@lock` is now exported from Base ([#39588]). * The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported from Base and given a more specific return type ([#29901]) * Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`, `acotd`, `atand` now accept an square matrix ([#39758]). +* A backslash before a newline in command literals now always removes the newline, similar to standard string + literals, whereas the result was not well-defined before. ([#40753]) #### Package Manager diff --git a/base/shell.jl b/base/shell.jl index 99866c8010b0f..e94ec466e2c05 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -87,15 +87,18 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; elseif !in_single_quotes && c == '"' in_double_quotes = !in_double_quotes i = consume_upto!(arg, s, i, j) - elseif c == '\\' - if in_double_quotes + elseif !in_single_quotes && c == '\\' + if !isempty(st) && peek(st)[2] == '\n' + i = consume_upto!(arg, s, i, j) + 1 + _ = popfirst!(st) + elseif in_double_quotes isempty(st) && error("unterminated double quote") k, c′ = peek(st) if c′ == '"' || c′ == '$' || c′ == '\\' i = consume_upto!(arg, s, i, j) _ = popfirst!(st) end - elseif !in_single_quotes + else isempty(st) && error("dangling backslash") i = consume_upto!(arg, s, i, j) _ = popfirst!(st) diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md index 4b3c35d5b45f6..56a5a20c1cef4 100644 --- a/doc/src/manual/strings.md +++ b/doc/src/manual/strings.md @@ -166,6 +166,14 @@ julia> """Contains "quote" characters""" "Contains \"quote\" characters" ``` +Long lines in strings can be broken up by preceding the newline with a backslash (`\`): + +```jldoctest +julia> "This is a long \ + line" +"This is a long line" +``` + If you want to extract a character from a string, you index into it: ```jldoctest helloworldstring @@ -639,6 +647,15 @@ julia> """ "Hello,\nworld." ``` +If the newline is removed using a backslash, dedentation will be respected as well: + +```jldoctest +julia> """ + Averylong\ + word""" +"Averylongword" +``` + Trailing whitespace is left unaltered. Triple-quoted string literals can contain `"` characters without escaping. diff --git a/src/julia-parser.scm b/src/julia-parser.scm index f68e0d2c4ebb5..70912c4272c8c 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -311,6 +311,9 @@ (define (numchk n s) (or n (error (string "invalid numeric constant \"" s "\"")))) +(define (string-lastchar s) + (string.char s (string.dec s (length s)))) + (define (read-number port leadingdot neg) (let ((str (open-output-string)) (pred char-numeric?) @@ -412,7 +415,7 @@ (string.sub s 1) s) r is-float32-literal))) - (if (and (eqv? #\. (string.char s (string.dec s (length s)))) + (if (and (eqv? #\. (string-lastchar s)) (let ((nxt (peek-char port))) (and (not (eof-object? nxt)) (or (identifier-start-char? nxt) @@ -2182,16 +2185,35 @@ (define (unescape-parsed-string-literal strs) (map-at even? unescape-string strs)) +;; remove `\` followed by a newline +(define (strip-escaped-newline s) + (let ((in (open-input-string s)) + (out (open-output-string))) + (define (loop preceding-backslash?) + (let ((c (read-char in))) + (cond ((eof-object? c)) + (preceding-backslash? + (if (not (eqv? c #\newline)) + (begin (write-char #\\ out) (write-char c out))) + (loop #f)) + ((eqv? c #\\) (loop #t)) + (else (write-char c out) (loop #f))))) + (loop #f) + (io.tostring! out))) + (define (parse-string-literal s delim raw) - (let ((p (ts:port s))) - ((if raw identity unescape-parsed-string-literal) - (if (eqv? (peek-char p) delim) - (if (eqv? (peek-char (take-char p)) delim) - (map-first strip-leading-newline - (dedent-triplequoted-string - (parse-string-literal- 2 (take-char p) s delim raw))) - (list "")) - (parse-string-literal- 0 p s delim raw))))) + (let* ((p (ts:port s)) + (str (if (eqv? (peek-char p) delim) + (if (eqv? (peek-char (take-char p)) delim) + (map-first strip-leading-newline + (dedent-triplequoted-string + (parse-string-literal- 2 (take-char p) s delim raw))) + (list "")) + (parse-string-literal- 0 p s delim raw)))) + (if raw str (unescape-parsed-string-literal + (map (lambda (s) + (if (string? s) (strip-escaped-newline s) s)) + str))))) (define (strip-leading-newline s) (let ((n (sizeof s))) diff --git a/test/syntax.jl b/test/syntax.jl index 5a3af3b1863cb..7cee3c0755f65 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -2830,3 +2830,100 @@ end x[3], x[1:2]... = x @test x == [2, 3, 1] end + +@testset "escaping newlines inside strings" begin + c = "c" + + @test "a\ +b" == "ab" + @test "a\ + b" == "a b" + @test raw"a\ +b" == "a\\\nb" + @test "a$c\ +b" == "acb" + @test "\\ +" == "\\\n" + + + @test """ + a\ + b""" == "ab" + @test """ + a\ + b""" == "a b" + @test """ + a\ + b""" == " ab" + @test raw""" + a\ + b""" == "a\\\nb" + @test """ + a$c\ + b""" == "acb" + + @test """ + \ + """ == "" + @test """ + \\ + """ == "\\\n" + @test """ + \\\ + """ == "\\" + @test """ + \\\\ + """ == "\\\\\n" + @test """ + \\\\\ + """ == "\\\\" + @test """ + \ + \ + """ == "" + @test """ + \\ + \ + """ == "\\\n" + @test """ + \\\ + \ + """ == "\\" + + + @test `a\ +b` == `ab` + @test `a\ + b` == `a b` + @test `a$c\ +b` == `acb` + @test `"a\ +b"` == `ab` + @test `'a\ +b'` == `$("a\\\nb")` + @test `\\ +` == `'\'` + + + @test ``` + a\ + b``` == `ab` + @test ``` + a\ + b``` == `a b` + @test ``` + a\ + b``` == ` ab` + @test ``` + a$c\ + b``` == `acb` + @test ``` + "a\ + b"``` == `ab` + @test ``` + 'a\ + b'``` == `$("a\\\nb")` + @test ``` + \\ + ``` == `'\'` +end