Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

disallow unbalanced bidirectional formatting in strings and comments #42918

Merged
merged 1 commit into from
Nov 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Language changes
* New `@showtime` macro to show both the line being evaluated and the `@time` report ([#42431])
* `last(collection)` will now work on any collection that supports `Iterators.reverse` and `first`, rather than being
restricted to indexable collections.
* Unbalanced Unicode bidirectional formatting directives are now disallowed within strings and comments,
to mitigate the ["trojan source"](https://www.trojansource.codes) vulnerability ([#42918]).

Compiler/Runtime improvements
-----------------------------
Expand Down
104 changes: 62 additions & 42 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,6 @@

(define (newline? c) (eqv? c #\newline))

(define (skip-to-eol port)
(let ((c (peek-char port)))
(cond ((eof-object? c) c)
((eqv? c #\newline) c)
(else (read-char port)
(skip-to-eol port)))))

(define (op-or-sufchar? c) (or (op-suffix-char? c) (opchar? c)))

(define (read-operator port c0 (postfix? #f))
Expand Down Expand Up @@ -495,33 +488,56 @@
(pair? (cadr t)) (eq? (car (cadr t)) 'core)
(memq (cadadr t) '(@int128_str @uint128_str @big_str))))

(define (make-bidi-state) '(0 . 0))

(define (update-bidi-state st c)
(case c
((#\U202A #\U202B #\U202D #\U202E) (cons (+ (car st) 1) (cdr st))) ;; LRE RLE LRO RLO
((#\U2066 #\U2067 #\U2068) (cons (car st) (+ (cdr st) 1))) ;; LRI RLI FSI
((#\U202C) (cons (- (car st) 1) (cdr st))) ;; PDF
((#\U2069) (cons (car st) (- (cdr st) 1))) ;; PDI
((#\newline) '(0 . 0))
(else st)))

(define (bidi-state-terminated? st) (equal? st '(0 . 0)))

(define (skip-line-comment port)
(let ((c (peek-char port)))
(cond ((eof-object? c) c)
((eqv? c #\newline) c)
(else (read-char port)
(skip-line-comment port)))))

(define (skip-multiline-comment port count bds)
(let ((c (read-char port)))
(if (eof-object? c)
(error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
(if (eqv? c #\=)
(let ((c (peek-char port)))
(if (eqv? c #\#)
(begin
(read-char port)
(if (> count 1)
(skip-multiline-comment port (- count 1) bds)
(if (not (bidi-state-terminated? bds))
(error "unbalanced bidirectional formatting in comment"))))
(skip-multiline-comment port count (update-bidi-state bds c))))
(if (eqv? c #\#)
(skip-multiline-comment port
(if (eqv? (peek-char port) #\=)
(begin (read-char port)
(+ count 1))
count)
bds)
(skip-multiline-comment port count (update-bidi-state bds c)))))))

;; skip to end of comment, starting at #: either #...<eol> or #= .... =#.
(define (skip-comment port)
(define (skip-multiline-comment port count)
(let ((c (read-char port)))
(if (eof-object? c)
(error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
(begin (if (eqv? c #\=)
(let ((c (peek-char port)))
(if (eqv? c #\#)
(begin
(read-char port)
(if (> count 1)
(skip-multiline-comment port (- count 1))))
(skip-multiline-comment port count)))
(if (eqv? c #\#)
(skip-multiline-comment port
(if (eqv? (peek-char port) #\=)
(begin (read-char port)
(+ count 1))
count))
(skip-multiline-comment port count)))))))

(read-char port) ; read # that was already peeked
(if (eqv? (peek-char port) #\=)
(begin (read-char port) ; read initial =
(skip-multiline-comment port 1))
(skip-to-eol port)))
(skip-multiline-comment port 1 (make-bidi-state)))
(skip-line-comment port)))

(define (skip-ws-and-comments port)
(skip-ws port #t)
Expand Down Expand Up @@ -2354,24 +2370,28 @@
(let loop ((c (read-char p))
(b (open-output-string))
(e ())
(quotes 0))
(quotes 0)
(bds (make-bidi-state)))
(cond
((eqv? c delim)
(if (< quotes n)
(loop (read-char p) b e (+ quotes 1))
(reverse (cons (io.tostring! b) e))))
(loop (read-char p) b e (+ quotes 1) bds)
(begin
(if (not (bidi-state-terminated? bds))
(error "unbalanced bidirectional formatting in string literal"))
(reverse (cons (io.tostring! b) e)))))

((= quotes 1)
(if (not raw) (write-char #\\ b))
(write-char delim b)
(loop c b e 0))
(loop c b e 0 (update-bidi-state bds c)))

((= quotes 2)
(if (not raw) (write-char #\\ b))
(write-char delim b)
(if (not raw) (write-char #\\ b))
(write-char delim b)
(loop c b e 0))
(loop c b e 0 (update-bidi-state bds c)))

((eqv? c #\\)
(if raw
Expand All @@ -2384,19 +2404,19 @@
(io.write b (string.rep "\\" (div count 2)))
(if (odd? count)
(begin (write-char delim b)
(loop (read-char p) b e 0))
(loop nxch b e 0)))
(loop (read-char p) b e 0 bds))
(loop nxch b e 0 bds)))
(else
(io.write b (string.rep "\\" count))
(write-char nxch b)
(loop (read-char p) b e 0))))
(loop (read-char p) b e 0 (update-bidi-state bds nxch)))))
(let ((nxch (not-eof-for delim (read-char p))))
(write-char #\\ b)
(if (eqv? nxch #\return)
(loop nxch b e 0)
(loop nxch b e 0 bds)
(begin
(write-char nxch b)
(loop (read-char p) b e 0))))))
(loop (read-char p) b e 0 (update-bidi-state bds nxch)))))))

((and (eqv? c #\$) (not raw))
(let* ((ex (parse-interpolate s))
Expand All @@ -2406,19 +2426,19 @@
(loop (read-char p)
(open-output-string)
(list* ex (io.tostring! b) e)
0)))
0 bds)))

; convert literal \r and \r\n in strings to \n (issue #11988)
((eqv? c #\return) ; \r
(begin
(if (eqv? (peek-char p) #\linefeed) ; \r\n
(read-char p))
(write-char #\newline b)
(loop (read-char p) b e 0)))
(loop (read-char p) b e 0 bds)))

(else
(write-char (not-eof-for delim c) b)
(loop (read-char p) b e 0)))))
(loop (read-char p) b e 0 (update-bidi-state bds c))))))

(define (not-eof-1 c)
(if (eof-object? c)
Expand Down
18 changes: 18 additions & 0 deletions test/syntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3060,3 +3060,21 @@ end
@test err == 5 + 6
@test x == 1
end

@test_throws ParseError Meta.parse("""
function checkUserAccess(u::User)
if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason this was indented with tabs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That may be from the initial report/gist I sent - I just checked my local version and it seems to have a tab character there as well, though that wasn't intentional. Not sure how it got there. It certainly isn't a functional part of the vulnerability though.

return true
end
return false
end
""")

@test_throws ParseError Meta.parse("""
function checkUserAccess(u::User)
#=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =#
return true
#= end admin only \u202e \u2066end\u2069 \u2066=#
return false
end
""")