From e6567533389cd61ff0b2aa078b73f72c03ed561b Mon Sep 17 00:00:00 2001 From: Jules Aguillon Date: Mon, 19 Oct 2020 19:42:05 +0200 Subject: [PATCH 1/3] Fix end of line parsing on Windows --- lib/lexer_mdx.mll | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/lexer_mdx.mll b/lib/lexer_mdx.mll index 7ff24abd5..91501a871 100644 --- a/lib/lexer_mdx.mll +++ b/lib/lexer_mdx.mll @@ -16,17 +16,21 @@ let labels l = failwith msg } -let eol = '\n' | eof -let ws = ' ' | '\t' +let eol = '\n' | '\r' '\n' | eof +let ws = [' ' '\t'] + +let until_eol = [^'\n' '\r'] +let until_ws = [^' ' '\t'] +let until_ws_or_eol = [^' ' '\t' '\n' '\r'] rule text section = parse | eof { [] } - | ("#"+ as n) " " ([^'\n']* as str) eol + | ("#"+ as n) " " (until_eol* as str) eol { let section = (String.length n, str) in newline lexbuf; `Section section :: text (Some section) lexbuf } - | ( "" ws* eol? )? - "```" ([^' ' '\n']* as h) ws* ([^'\n']* as legacy_labels) eol + | ( "" ws* eol? )? + "```" (until_ws_or_eol* as h) ws* (until_eol* as legacy_labels) eol { let header = Block.Header.of_string h in let contents = block lexbuf in let labels, legacy_labels = @@ -63,7 +67,7 @@ rule text section = parse List.iter (fun _ -> newline lexbuf) errors; newline lexbuf); `Block block :: text section lexbuf } - | "" ws* eol + | "" ws* eol { let labels = labels label_cmt in newline lexbuf; let loc = Location.curr lexbuf in @@ -73,24 +77,24 @@ rule text section = parse | Error (`Msg msg) -> failwith msg in `Block block :: text section lexbuf } - | ([^'\n']* as str) eol + | (until_eol* as str) eol { newline lexbuf; `Text str :: text section lexbuf } and block = parse | eof | "```" ws* eol { [] } - | ([^'\n'] * as str) eol { str :: block lexbuf } + | (until_eol* as str) eol { str :: block lexbuf } and error_block = parse | "```mdx-error" ws* eol { block lexbuf } and cram_text section = parse | eof { [] } - | ("#"+ as n) " " ([^'\n']* as str) eol + | ("#"+ as n) " " (until_eol* as str) eol { let section = (String.length n, str) in newline lexbuf; `Section section :: cram_text (Some section) lexbuf } - | " " ([^'\n']* as first_line) eol + | " " (until_eol* as first_line) eol { let header = Some (Block.Header.Shell `Sh) in let requires_empty_line, contents = cram_block lexbuf in let contents = first_line :: contents in @@ -109,7 +113,7 @@ and cram_text section = parse in `Block block :: (if requires_empty_line then `Text "" :: rest else rest) } - | "<-- non-deterministic" ws* ([^'\n']* as choice) eol + | "<-- non-deterministic" ws* (until_eol* as choice) eol { let header = Some (Block.Header.Shell `Sh) in let requires_empty_line, contents = cram_block lexbuf in let labels = @@ -132,14 +136,14 @@ and cram_text section = parse in `Block block :: (if requires_empty_line then `Text "" :: rest else rest) } - | ([^'\n']* as str) eol + | (until_eol* as str) eol { newline lexbuf; `Text str :: cram_text section lexbuf } and cram_block = parse | eof { false, [] } | eol { newline lexbuf; true, [] } - | " " ([^'\n'] * as str) eol + | " " (until_eol* as str) eol { let requires_empty_line, lst = cram_block lexbuf in requires_empty_line, str :: lst } From bd731f8751744b8c1e1f80025b68d241cf84d260 Mon Sep 17 00:00:00 2001 From: Jules Aguillon Date: Mon, 16 Nov 2020 12:02:22 +0100 Subject: [PATCH 2/3] Cleanup Co-authored-by: Nathan Rebours --- lib/lexer_mdx.mll | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/lexer_mdx.mll b/lib/lexer_mdx.mll index 91501a871..d68ba2196 100644 --- a/lib/lexer_mdx.mll +++ b/lib/lexer_mdx.mll @@ -16,21 +16,21 @@ let labels l = failwith msg } -let eol = '\n' | '\r' '\n' | eof +let eol = '\n' | "\r\n" | eof let ws = [' ' '\t'] -let until_eol = [^'\n' '\r'] -let until_ws = [^' ' '\t'] -let until_ws_or_eol = [^' ' '\t' '\n' '\r'] +let not_eol = [^'\n' '\r'] +let not_ws = [^' ' '\t'] +let not_ws_or_eol = [^' ' '\t' '\n' '\r'] rule text section = parse | eof { [] } - | ("#"+ as n) " " (until_eol* as str) eol + | ("#"+ as n) " " (not_eol* as str) eol { let section = (String.length n, str) in newline lexbuf; `Section section :: text (Some section) lexbuf } - | ( "" ws* eol? )? - "```" (until_ws_or_eol* as h) ws* (until_eol* as legacy_labels) eol + | ( "" ws* eol? )? + "```" (not_ws_or_eol* as h) ws* (not_eol* as legacy_labels) eol { let header = Block.Header.of_string h in let contents = block lexbuf in let labels, legacy_labels = @@ -67,7 +67,7 @@ rule text section = parse List.iter (fun _ -> newline lexbuf) errors; newline lexbuf); `Block block :: text section lexbuf } - | "" ws* eol + | "" ws* eol { let labels = labels label_cmt in newline lexbuf; let loc = Location.curr lexbuf in @@ -77,24 +77,24 @@ rule text section = parse | Error (`Msg msg) -> failwith msg in `Block block :: text section lexbuf } - | (until_eol* as str) eol + | (not_eol* as str) eol { newline lexbuf; `Text str :: text section lexbuf } and block = parse | eof | "```" ws* eol { [] } - | (until_eol* as str) eol { str :: block lexbuf } + | (not_eol* as str) eol { str :: block lexbuf } and error_block = parse | "```mdx-error" ws* eol { block lexbuf } and cram_text section = parse | eof { [] } - | ("#"+ as n) " " (until_eol* as str) eol + | ("#"+ as n) " " (not_eol* as str) eol { let section = (String.length n, str) in newline lexbuf; `Section section :: cram_text (Some section) lexbuf } - | " " (until_eol* as first_line) eol + | " " (not_eol* as first_line) eol { let header = Some (Block.Header.Shell `Sh) in let requires_empty_line, contents = cram_block lexbuf in let contents = first_line :: contents in @@ -113,7 +113,7 @@ and cram_text section = parse in `Block block :: (if requires_empty_line then `Text "" :: rest else rest) } - | "<-- non-deterministic" ws* (until_eol* as choice) eol + | "<-- non-deterministic" ws* (not_eol* as choice) eol { let header = Some (Block.Header.Shell `Sh) in let requires_empty_line, contents = cram_block lexbuf in let labels = @@ -136,14 +136,14 @@ and cram_text section = parse in `Block block :: (if requires_empty_line then `Text "" :: rest else rest) } - | (until_eol* as str) eol + | (not_eol* as str) eol { newline lexbuf; `Text str :: cram_text section lexbuf } and cram_block = parse | eof { false, [] } | eol { newline lexbuf; true, [] } - | " " (until_eol* as str) eol + | " " (not_eol* as str) eol { let requires_empty_line, lst = cram_block lexbuf in requires_empty_line, str :: lst } From 88d45e80f86a12181f2eabe8bbf72dea372a705b Mon Sep 17 00:00:00 2001 From: Jules Aguillon Date: Mon, 16 Nov 2020 12:12:59 +0100 Subject: [PATCH 3/3] Change entry --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index b9fd2dd5b..45fdd87aa 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,6 +18,7 @@ - Report `#require` directive errors (#276, @gpetiot) - Handle no such file exception: the input file and the values of options `--root` and `--prelude` are checked (#292, @gpetiot) - Keep locations from parsing instead of recomputing the lines, providing better error messages (#241, @gpetiot) +- Fix parsing of Windows end-of-lines (#294, @julow) #### Security