Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix non-ascii string/binary literals; Add multi-line binary literals #22

Merged
merged 19 commits into from
Jul 7, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
5cf5941
Allow using string blocks to create binary literals
michallepicki Jun 9, 2021
8354950
multi line binary, not multi line string
michallepicki Jun 9, 2021
40e809e
mark binary literals as utf8 in Erlang; do not escape binary or strin…
michallepicki Jun 13, 2021
5db9f40
output string blocks as raw bytes in erlang source to avoid escaping
michallepicki Jun 13, 2021
aac70de
no need to escape single quotes
michallepicki Jun 13, 2021
9de662d
if double quote in string literal is already escaped, don't try to ex…
michallepicki Jun 13, 2021
97acd17
Move comment about multi-codepoint characters in strings to correct p…
michallepicki Jun 14, 2021
6766774
acknowledge new_line when lexing string block with ignored starting b…
michallepicki Jun 14, 2021
33bc920
Trim lexeme from whitespace instead of subtracting 1 to avoid potenti…
michallepicki Jun 14, 2021
5aa9907
add test_binary.sest
michallepicki Jun 14, 2021
3fdf5aa
show in test what binary literals compile to
michallepicki Jun 14, 2021
7ab034d
simplify test
michallepicki Jun 14, 2021
429ad78
Fix bug where escaped backslash pairs were not grouped so next charac…
michallepicki Jun 15, 2021
b54d983
Reduce down the test to planned escape sequences only
michallepicki Jun 16, 2021
4bf9261
Only allow selected escape sequences; Parse literal into intended run…
michallepicki Jun 19, 2021
8fbf9c2
WIP escape_string in outputErlangCode
michallepicki Jun 19, 2021
5b6e955
fix mistake: don't switch to parsing binary literal from within strin…
michallepicki Jun 19, 2021
bd6acb5
throw a buffer at it
michallepicki Jun 29, 2021
01c8918
Correctly escape carriage return and tab escape sequences
michallepicki Jul 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions src/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ rule token = parse
let (rng, s) = string_literal posL strbuf lexbuf in
match MyUtil.Utf.uchar_of_utf8 s with
| [ uchar ] -> CHAR(rng, uchar)
(* TODO: (?) allow escape sequences in char literal,
e.g. $'\n' (erlang $\n) or $'\x{10ffff}' (erlang $\x{10ffff}) *)
| _ -> raise_error (NotASingleCodePoint(rng))
}

Expand Down Expand Up @@ -204,10 +206,22 @@ rule token = parse
let posL = Range.from_lexbuf lexbuf in
let strbuf = Buffer.create 128 in
let (rng, s) = string_literal posL strbuf lexbuf in
(* TODO: handle multi-codepoint unicode characters e.g. 🤷🏽‍♀️
(raise_error NotASingleCodePoint or convert somehow) *)
STRING(rng, s)
}

| ("`" +) {
| ("`"+ break) {
(* When first character in a string block is a line break,
ignore this line break *)
Lexing.new_line lexbuf;
let posL = Range.from_lexbuf lexbuf in
let num_start = String.length (String.trim (Lexing.lexeme lexbuf)) in
let strbuf = Buffer.create 128 in
string_block num_start posL strbuf lexbuf
}

| ("`"+) {
let posL = Range.from_lexbuf lexbuf in
let num_start = String.length (Lexing.lexeme lexbuf) in
let strbuf = Buffer.create 128 in
Expand All @@ -219,15 +233,17 @@ rule token = parse
and binary_literal posL strbuf = parse
| break { raise_error (SeeBreakInStringLiteral(posL)) }
| eof { raise_error (SeeEndOfFileInStringLiteral(posL)) }
| "\\\"" { Buffer.add_char strbuf '\\'; Buffer.add_char strbuf '"'; binary_literal posL strbuf lexbuf }
| "\"" { let posR = Range.from_lexbuf lexbuf in (Range.unite posL posR, Buffer.contents strbuf) }
| "\\\"" { Buffer.add_char strbuf '"'; binary_literal posL strbuf lexbuf }
| _ as c { Buffer.add_char strbuf c; binary_literal posL strbuf lexbuf }

and string_literal posL strbuf = parse
| break { raise_error (SeeBreakInStringLiteral(posL)) }
| eof { raise_error (SeeEndOfFileInStringLiteral(posL)) }
| "\\\'" { Buffer.add_char strbuf '\\'; Buffer.add_char strbuf '\''; string_literal posL strbuf lexbuf }
| "\\\"" { Buffer.add_char strbuf '\\'; Buffer.add_char strbuf '\"'; string_literal posL strbuf lexbuf }
| "\"" { Buffer.add_char strbuf '\\'; Buffer.add_char strbuf '\"'; string_literal posL strbuf lexbuf }
| "\'" { let posR = Range.from_lexbuf lexbuf in (Range.unite posL posR, Buffer.contents strbuf) }
| "\\\'" { Buffer.add_char strbuf '\''; string_literal posL strbuf lexbuf }
| _ as c { Buffer.add_char strbuf c; string_literal posL strbuf lexbuf }

and format_literal posL strbuf acc = parse
Expand Down
4 changes: 2 additions & 2 deletions src/outputErlangCode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ let stringify_base_constant (bc : base_constant) =
else
assert false

| BinaryByString(s) -> Printf.sprintf "<<\"%s\">>" (String.escaped s)
| BinaryByString(s) -> Printf.sprintf "<<\"%s\"/utf8>>" s
| BinaryByInts(ns) -> Printf.sprintf "<<%s>>" (ns |> List.map string_of_int |> String.concat ", ")
| String(s) -> Printf.sprintf "\"%s\"" (String.escaped s)
michallepicki marked this conversation as resolved.
Show resolved Hide resolved
| String(s) -> Printf.sprintf "\"%s\"" s
| Char(uchar) -> Printf.sprintf "%d" (Uchar.to_int uchar)

| FormatString(fmtelems) ->
Expand Down
8 changes: 6 additions & 2 deletions src/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -677,10 +677,14 @@ exprbot:
let rng = make_range (Token(tokL)) (Token(tokR)) in
(rng, BinaryByList(ns))
}
| strlit=BINARY {
let (rng, s) = strlit in
| binlit=BINARY {
let (rng, s) = binlit in
(rng, BaseConst(BinaryByString(s)))
}
| strblock=STRING_BLOCK {
let (rng, s) = strblock in
(rng, BaseConst(BinaryByInts(s |> String.to_seq |> List.of_seq |> List.map Char.code)))
}
| strlit=STRING {
let (rng, s) = strlit in
(rng, BaseConst(String(s)))
Expand Down
20 changes: 20 additions & 0 deletions test/pass/test_binary.sest
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module TestBinary = struct
michallepicki marked this conversation as resolved.
Show resolved Hide resolved

val check : fun({binary, binary, binary, binary}) -> {binary, binary, binary, binary} = external 1 ```
check(X) ->
{<<"👩‍🔬"/utf8>>,<<"\n"/utf8>>,<<33, 96, 92, 92>>,<<111, 110, 101, 10, 116, 119, 111>>} = X,
X.
```

val main(args) =
let woman_scientist = "👩‍🔬" in
let newline = "\n" in
let raw = ``!`\\`` in
let multiline = ```
one
two```
in
let result = {woman_scientist, newline, raw, multiline} in
print_debug(check({woman_scientist, newline, raw, multiline}))

end