From aa050767dd7daa37b1c904d56572ce62a4a943aa Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Sat, 8 May 2021 22:10:55 +0200 Subject: [PATCH 1/6] allow escaping newlines with `\` inside strings This allows the use of `\` in front of newlines inside non-raw/non-custom string or command literals as a line continuation character, so the following newline is ignored. This way, long strings without any newlines in them don't have to be written in a single line or be broken up. I think we might also want to use this to improve the printing of long strings in the REPL by printing them as multiline strings, making use of `\` for long lines if necessary, but that can be discussed separately. The command literal part is technically breaking, but the current behavior is probably unintuitive enough that this can be considered a minor change. For string literals, this should be entirely non-breaking since a single `\` before a newline currently throws a parsing error. closes #37728 --- base/shell.jl | 5 ++- src/julia-parser.scm | 38 +++++++++++++---- test/syntax.jl | 97 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 9 deletions(-) diff --git a/base/shell.jl b/base/shell.jl index 99866c8010b0f..9640029b03151 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -88,7 +88,10 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; in_double_quotes = !in_double_quotes i = consume_upto!(arg, s, i, j) elseif c == '\\' - if in_double_quotes + if !isempty(st) && peek(st)[2] == '\n' + i = consume_upto!(arg, s, i, j) + 1 + _ = popfirst!(st) + elseif in_double_quotes isempty(st) && error("unterminated double quote") k, c′ = peek(st) if c′ == '"' || c′ == '$' || c′ == '\\' diff --git a/src/julia-parser.scm b/src/julia-parser.scm index f68e0d2c4ebb5..f1b392e143dcf 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -311,6 +311,9 @@ (define (numchk n s) (or n (error (string "invalid numeric constant \"" s "\"")))) +(define (string-lastchar s) + (string.char s (string.dec s (length s)))) + (define (read-number port leadingdot neg) (let ((str (open-output-string)) (pred char-numeric?) @@ -412,7 +415,7 @@ (string.sub s 1) s) r is-float32-literal))) - (if (and (eqv? #\. (string.char s (string.dec s (length s)))) + (if (and (eqv? #\. (string-lastchar s)) (let ((nxt (peek-char port))) (and (not (eof-object? nxt)) (or (identifier-start-char? nxt) @@ -2185,13 +2188,32 @@ (define (parse-string-literal s delim raw) (let ((p (ts:port s))) ((if raw identity unescape-parsed-string-literal) - (if (eqv? (peek-char p) delim) - (if (eqv? (peek-char (take-char p)) delim) - (map-first strip-leading-newline - (dedent-triplequoted-string - (parse-string-literal- 2 (take-char p) s delim raw))) - (list "")) - (parse-string-literal- 0 p s delim raw))))) + (map (lambda (s) + (if (and (not raw) (string? s)) + ;; remove `\` followed by a newline + (let ((spl (string-split s "\\\n"))) + (foldl (lambda (line s) + ;; if there is an odd number of backslashes before the backslash + ;; preceding the newline, keep the backslash and newline since + ;; the backslash is actually escaped + (define (odd-backslashes? (i (length s))) + (and (> i 0) + (let ((i (string.dec s i))) + (and (eqv? (string.char s i) #\\) + (not (odd-backslashes? i)))))) + (if (odd-backslashes?) + (string s "\\\n" line) + (string s line))) + "" + spl)) + s)) + (if (eqv? (peek-char p) delim) + (if (eqv? (peek-char (take-char p)) delim) + (map-first strip-leading-newline + (dedent-triplequoted-string + (parse-string-literal- 2 (take-char p) s delim raw))) + (list "")) + (parse-string-literal- 0 p s delim raw)))))) (define (strip-leading-newline s) (let ((n (sizeof s))) diff --git a/test/syntax.jl b/test/syntax.jl index 5a3af3b1863cb..913dae9e7dd13 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -2830,3 +2830,100 @@ end x[3], x[1:2]... = x @test x == [2, 3, 1] end + +@testset "escaping newlines inside strings" begin + c = "c" + + @test "a\ +b" == "ab" + @test "a\ + b" == "a b" + @test raw"a\ +b" == "a\\\nb" + @test "a$c\ +b" == "acb" + @test "\\ +" == "\\\n" + + + @test """ + a\ + b""" == "ab" + @test """ + a\ + b""" == "a b" + @test """ + a\ + b""" == " ab" + @test raw""" + a\ + b""" == "a\\\nb" + @test """ + a$c\ + b""" == "acb" + + @test """ + \ + """ == "" + @test """ + \\ + """ == "\\\n" + @test """ + \\\ + """ == "\\" + @test """ + \\\\ + """ == "\\\\\n" + @test """ + \\\\\ + """ == "\\\\" + @test """ + \ + \ + """ == "" + @test """ + \\ + \ + """ == "\\\n" + @test """ + \\\ + \ + """ == "\\" + + + @test `a\ +b` == `ab` + @test `a\ + b` == `a b` + @test `a$c\ +b` == `acb` + @test `"a\ +b"` == `ab` + @test `'a\ +b'` == `ab` + @test `\\ +` == `'\'` + + + @test ``` + a\ + b``` == `ab` + @test ``` + a\ + b``` == `a b` + @test ``` + a\ + b``` == ` ab` + @test ``` + a$c\ + b``` == `acb` + @test ``` + "a\ + b"``` == `ab` + @test ``` + 'a\ + b'``` == `ab` + @test ``` + \\ + ``` == `'\'` +end From 36f9a09c5bc7082c756eb29eee8bdeb25bab446d Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Wed, 12 May 2021 22:40:59 +0200 Subject: [PATCH 2/6] avoid being O(n^2) in worst case, improve style --- src/julia-parser.scm | 56 ++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/julia-parser.scm b/src/julia-parser.scm index f1b392e143dcf..70912c4272c8c 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -2185,35 +2185,35 @@ (define (unescape-parsed-string-literal strs) (map-at even? unescape-string strs)) +;; remove `\` followed by a newline +(define (strip-escaped-newline s) + (let ((in (open-input-string s)) + (out (open-output-string))) + (define (loop preceding-backslash?) + (let ((c (read-char in))) + (cond ((eof-object? c)) + (preceding-backslash? + (if (not (eqv? c #\newline)) + (begin (write-char #\\ out) (write-char c out))) + (loop #f)) + ((eqv? c #\\) (loop #t)) + (else (write-char c out) (loop #f))))) + (loop #f) + (io.tostring! out))) + (define (parse-string-literal s delim raw) - (let ((p (ts:port s))) - ((if raw identity unescape-parsed-string-literal) - (map (lambda (s) - (if (and (not raw) (string? s)) - ;; remove `\` followed by a newline - (let ((spl (string-split s "\\\n"))) - (foldl (lambda (line s) - ;; if there is an odd number of backslashes before the backslash - ;; preceding the newline, keep the backslash and newline since - ;; the backslash is actually escaped - (define (odd-backslashes? (i (length s))) - (and (> i 0) - (let ((i (string.dec s i))) - (and (eqv? (string.char s i) #\\) - (not (odd-backslashes? i)))))) - (if (odd-backslashes?) - (string s "\\\n" line) - (string s line))) - "" - spl)) - s)) - (if (eqv? (peek-char p) delim) - (if (eqv? (peek-char (take-char p)) delim) - (map-first strip-leading-newline - (dedent-triplequoted-string - (parse-string-literal- 2 (take-char p) s delim raw))) - (list "")) - (parse-string-literal- 0 p s delim raw)))))) + (let* ((p (ts:port s)) + (str (if (eqv? (peek-char p) delim) + (if (eqv? (peek-char (take-char p)) delim) + (map-first strip-leading-newline + (dedent-triplequoted-string + (parse-string-literal- 2 (take-char p) s delim raw))) + (list "")) + (parse-string-literal- 0 p s delim raw)))) + (if raw str (unescape-parsed-string-literal + (map (lambda (s) + (if (string? s) (strip-escaped-newline s) s)) + str))))) (define (strip-leading-newline s) (let ((n (sizeof s))) From 9d6c79179eef2173c9bed8c10ac51ba9fd24383b Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Sat, 29 May 2021 14:32:13 +0200 Subject: [PATCH 3/6] add NEWS entry --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index fcd5168dd9514..1e15374629682 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,9 @@ New language features in `[A; B]` has always described concatenating along the first dimension (vertically), now two semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the third, and so on. ([#33697]) +* A backslash (`\`) before a newline inside a string literal now escapes the newline while also + respecting indentation. This can be used to split up long strings without newlines into multiple + lines of code. ([#40753]) Language changes ---------------- @@ -102,6 +105,8 @@ Standard library changes * `@lock` is now exported from Base ([#39588]). * The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported from Base and given a more specific return type ([#29901]) * Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`, `acotd`, `atand` now accept an square matrix ([#39758]). +* A backslash before a newline in command literals now always escapes the newline, similar to standard string + literals, whereas the result was not well-defined before. ([#40753]) #### Package Manager From e6ebe848935db60324986083595ff8dd906b10c1 Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Tue, 1 Jun 2021 09:58:56 +0200 Subject: [PATCH 4/6] Update base/shell.jl Co-authored-by: Jameson Nash --- base/shell.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/shell.jl b/base/shell.jl index 9640029b03151..903ca7166d6bb 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -87,7 +87,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; elseif !in_single_quotes && c == '"' in_double_quotes = !in_double_quotes i = consume_upto!(arg, s, i, j) - elseif c == '\\' + elseif !in_single_quotes && c == '\\' if !isempty(st) && peek(st)[2] == '\n' i = consume_upto!(arg, s, i, j) + 1 _ = popfirst!(st) From 0ffe98a39279169c124fa7edc5ae49bacd2c0b6d Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Tue, 1 Jun 2021 12:19:34 +0200 Subject: [PATCH 5/6] fix tests --- base/shell.jl | 2 +- test/syntax.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/shell.jl b/base/shell.jl index 903ca7166d6bb..e94ec466e2c05 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -98,7 +98,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; i = consume_upto!(arg, s, i, j) _ = popfirst!(st) end - elseif !in_single_quotes + else isempty(st) && error("dangling backslash") i = consume_upto!(arg, s, i, j) _ = popfirst!(st) diff --git a/test/syntax.jl b/test/syntax.jl index 913dae9e7dd13..7cee3c0755f65 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -2900,7 +2900,7 @@ b` == `acb` @test `"a\ b"` == `ab` @test `'a\ -b'` == `ab` +b'` == `$("a\\\nb")` @test `\\ ` == `'\'` @@ -2922,7 +2922,7 @@ b'` == `ab` b"``` == `ab` @test ``` 'a\ - b'``` == `ab` + b'``` == `$("a\\\nb")` @test ``` \\ ``` == `'\'` From 2e143c9c5891f3af75ce522d542bf0c54fc9bef6 Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Wed, 2 Jun 2021 20:59:39 +0200 Subject: [PATCH 6/6] address review comments --- NEWS.md | 4 ++-- doc/src/manual/strings.md | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1e15374629682..40b0b09507cca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,7 +16,7 @@ New language features in `[A; B]` has always described concatenating along the first dimension (vertically), now two semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the third, and so on. ([#33697]) -* A backslash (`\`) before a newline inside a string literal now escapes the newline while also +* A backslash (`\`) before a newline inside a string literal now removes the newline while also respecting indentation. This can be used to split up long strings without newlines into multiple lines of code. ([#40753]) @@ -105,7 +105,7 @@ Standard library changes * `@lock` is now exported from Base ([#39588]). * The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported from Base and given a more specific return type ([#29901]) * Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`, `acotd`, `atand` now accept an square matrix ([#39758]). -* A backslash before a newline in command literals now always escapes the newline, similar to standard string +* A backslash before a newline in command literals now always removes the newline, similar to standard string literals, whereas the result was not well-defined before. ([#40753]) #### Package Manager diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md index 4b3c35d5b45f6..56a5a20c1cef4 100644 --- a/doc/src/manual/strings.md +++ b/doc/src/manual/strings.md @@ -166,6 +166,14 @@ julia> """Contains "quote" characters""" "Contains \"quote\" characters" ``` +Long lines in strings can be broken up by preceding the newline with a backslash (`\`): + +```jldoctest +julia> "This is a long \ + line" +"This is a long line" +``` + If you want to extract a character from a string, you index into it: ```jldoctest helloworldstring @@ -639,6 +647,15 @@ julia> """ "Hello,\nworld." ``` +If the newline is removed using a backslash, dedentation will be respected as well: + +```jldoctest +julia> """ + Averylong\ + word""" +"Averylongword" +``` + Trailing whitespace is left unaltered. Triple-quoted string literals can contain `"` characters without escaping.