Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

move triple-quoted string processing into parser #11815

Merged
merged 1 commit into from
Jun 23, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ Language changes
* The keyword `local` is no longer allowed in global scope. Use `let` instead of
`begin` to create a new scope from the top level ([#7234], [#10472]).

* Triple-quoted strings no longer treat tabs as 8 spaces. Instead, the
longest common prefix of spaces and tabs is removed.

Command line option changes
---------------------------

Expand Down
2 changes: 0 additions & 2 deletions base/docs/Docs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,6 @@ namify(sy::Symbol) = sy
function mdify(ex)
if isa(ex, AbstractString)
:(@doc_str $ex)
elseif isexpr(ex, :macrocall) && namify(ex) == symbol("@mstr")
:(@doc_str $(Expr(:triple_quoted_string, ex.args[2])))
else
esc(ex)
end
Expand Down
1 change: 0 additions & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1329,7 +1329,6 @@ export
@int128_str,
@uint128_str,
@big_str,
@mstr, # triple-quoted strings
@cmd, # `commands`

# notation for certain types
Expand Down
41 changes: 0 additions & 41 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1050,51 +1050,10 @@ function unindent(s::AbstractString, indent::Int)
takebuf_string(buf)
end

function triplequoted(args...)
sx = Any[ isa(arg,ByteString) ? arg : esc(arg) for arg in args ]

indent = 0
rlines = split(RevString(sx[end]), '\n'; limit=2)
last_line = rlines[1]
if length(rlines) > 1 && lstrip(last_line) == ""
indent,_ = indentation(last_line)
else
indent = typemax(Int)
for s in sx
if isa(s,ByteString)
lines = split(s,'\n')
for line in lines[2:end]
n,blank = indentation(line)
if !blank
indent = min(indent, n)
end
end
end
end
end

for i in 1:length(sx)
if isa(sx[i],ByteString)
sx[i] = unindent(sx[i], indent)
end
end

# strip leading blank line
s = sx[1]
j = search(s,'\n')
if j != 0 && lstrip(s[1:j]) == ""
sx[1] = s[j+1:end]
end

length(sx) == 1 ? sx[1] : Expr(:call, :string, sx...)
end

## core string macros ##

macro b_str(s); :($(unescape_string(s)).data); end

macro mstr(s...); triplequoted(s...); end

## shell-like command parsing ##

function shell_parse(raw::AbstractString, interp::Bool)
Expand Down
2 changes: 0 additions & 2 deletions contrib/BBEditTextWrangler-julia.plist
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,12 @@
<string>@less</string>
<string>@linux</string>
<string>@linux_only</string>
<string>@mstr</string>
<string>@non_windowsxp_only</string>
<string>@osx</string>
<string>@osx_only</string>
<string>@parallel</string>
<string>@printf</string>
<string>@profile</string>
<string>@r_mstr</string>
<string>@r_str</string>
<string>@schedule</string>
<string>@show</string>
Expand Down
109 changes: 92 additions & 17 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@
(parse-string-literal s #t)))
(nxt (peek-token s))
(macname (symbol (string #\@ ex '_str)))
(macstr (if (triplequote-string-literal? str) str (cadr str))))
(macstr (car str)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a typo here? This is changing it from picking up the 2nd element of str (cadr str), to picking up the first (car str).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Over terseness can be an impediment to communication. In-line comments explaining the structure that you are processing might have made the change clear (not just for me, but for anybody else who needs to maintain this in the future).

(if (and (symbol? nxt) (not (operator? nxt))
(not (ts:space? s)))
;; string literal suffix, "s"x
Expand Down Expand Up @@ -1634,9 +1634,89 @@
(let ((p (ts:port s)))
(if (eqv? (peek-char p) #\")
(if (eqv? (peek-char (take-char p)) #\")
(parse-string-literal- 'triple_quoted_string 2 (take-char p) s custom)
'(single_quoted_string ""))
(parse-string-literal- 'single_quoted_string 0 p s custom))))
(strip-first-newline
(dedent-triplequoted-string
(parse-string-literal- 2 (take-char p) s custom)))
(list ""))
(parse-string-literal- 0 p s custom))))

(define (strip-first-newline lst)
(let* ((f (car lst))
(n (sizeof f)))
(if (and (> n 0) (eqv? (string.char f 0) #\newline))
(cons (string.sub f 1 n) (cdr lst))
lst)))

(define (dedent-triplequoted-string lst)
(let ((prefix (triplequoted-string-indentation lst)))
(if (length> prefix 0)
(map (lambda (s)
(if (string? s)
(string-swap s
(list->string (cons #\newline prefix))
#\newline)
s))
lst)
lst)))

(define (triplequoted-string-indentation lst)
(longest-common-prefix
(apply append (map (lambda (s) (if (string? s)
(triplequoted-string-indentation- s)
()))
lst))))

(define (triplequoted-string-indentation- s)
(let ((p (open-input-string s)))
(let loop ((c (read-char p))
(state 0)
(prefix ())
(prefixes ()))
(cond
((eqv? c #\newline)
(loop (read-char p) 1 () prefixes))
((eqv? state 0)
(if (eof-object? c) prefixes
(loop (read-char p) 0 () prefixes)))
((memv c '(#\space #\tab))
(loop (read-char p) 2 (cons c prefix) prefixes))
(else
(loop (read-char p) 0 () (cons (reverse prefix) prefixes)))))))

; return the longest common prefix of the elements of l
; e.g., (longest-common-prefix ((1 2) (1 4))) -> (1)
(define (longest-common-prefix l)
(let ((len (length l)))
(cond
((= len 0) ())
((= len 1) (car l))
(else (longest-common-prefix
(cons (longest-common-prefix2 (car l) (cadr l))
(cddr l)))))))

; return the longest common prefix of lists a & b
(define (longest-common-prefix2 a b)
(longest-common-prefix2- a b ()))

(define (longest-common-prefix2- a b p)
(if (and (length> a 0)
(length> b 0)
(eqv? (car a) (car b)))
(longest-common-prefix2- (cdr a) (cdr b) (cons (car a) p))
(reverse p)))

(define (string-split s sep)
(string-split- s sep 0 ()))

(define (string-split- s sep start splits)
(let ((i (string.find s sep start)))
(if i
(string-split- s sep (+ i (sizeof sep)) (cons (string.sub s start i) splits))
(reverse (cons (string.sub s start (sizeof s)) splits)))))

; swap all occurrences of a in s with b
(define (string-swap s a b)
(string.join (string-split s a) b))

(define (parse-interpolate s)
(let* ((p (ts:port s))
Expand Down Expand Up @@ -1664,10 +1744,10 @@
;; custom = custom string literal
;; when custom is #t, unescape only \\ and \"
;; otherwise do full unescaping, and parse interpolations too
(define (parse-string-literal- head n p s custom)
(define (parse-string-literal- n p s custom)
(let loop ((c (read-char p))
(b (open-output-string))
(e (list head))
(e ())
(quotes 0))
(cond
((eqv? c #\")
Expand Down Expand Up @@ -1706,9 +1786,6 @@
(write-char (not-eof-3 c) b)
(loop (read-char p) b e 0)))))

(define (interpolate-string-literal? s) (length> s 2))
(define (triplequote-string-literal? s) (eqv? (car s) 'triple_quoted_string))

(define (not-eof-1 c)
(if (eof-object? c)
(error "incomplete: invalid character literal") ; NOTE: changing this may affect code in base/client.jl
Expand Down Expand Up @@ -1897,14 +1974,12 @@
((eqv? t #\")
(take-token s)
(let ((ps (parse-string-literal s #f)))
(if (triplequote-string-literal? ps)
`(macrocall @mstr ,@(cdr ps))
(if (interpolate-string-literal? ps)
`(string ,@(filter (lambda (s)
(not (and (string? s)
(= (length s) 0))))
(cdr ps)))
(cadr ps)))))
(if (length> ps 1)
`(string ,@(filter (lambda (s)
(not (and (string? s)
(= (length s) 0))))
ps))
(car ps))))

;; macro call
((eqv? t #\@)
Expand Down
9 changes: 1 addition & 8 deletions src/julia-syntax.scm
Original file line number Diff line number Diff line change
Expand Up @@ -3307,14 +3307,7 @@ So far only the second case can actually occur.
e)
((eq? (car e) 'macrocall)
;; expand macro
(let ((form
(if (and (length> e 2) (pair? (caddr e)) (eq? (caaddr e) 'triple_quoted_string))
;; for a custom triple-quoted string literal, first invoke mstr
;; to handle unindenting
(apply invoke-julia-macro (cadr e)
(julia-expand-macros `(macrocall @mstr ,(cadr (caddr e))))
(cdddr e))
(apply invoke-julia-macro (cadr e) (cddr e)))))
(let ((form (apply invoke-julia-macro (cadr e) (cddr e))))
(if (not form)
(error (string "macro \"" (cadr e) "\" not defined")))
(if (and (pair? form) (eq? (car form) 'error))
Expand Down
14 changes: 5 additions & 9 deletions test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -910,14 +910,14 @@ nl = "
a
b
c""" == " a$(nl)b$(nl) c"
# note tab/space mixing
# tabs + spaces
@test """
a
b
""" == " a$(nl)b$(nl)"
a
b
""" == " a$(nl) b$(nl)"
@test """
a
""" == "a$(nl)"
""" == "a$(nl) "
s = " p"
@test """
$s""" == "$s"
Expand All @@ -937,10 +937,6 @@ s = " p"
@test """
foo
bar\t""" == "foo$(nl)bar\t"
@test """
foo
\tbar
""" == "foo$(nl) bar$(nl)"

# bytes2hex and hex2bytes
hex_str = "d7a8fbb307d7809469ca9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592"
Expand Down