From 2e1ecf8989aa5124397c3d0f07ec786647f0b010 Mon Sep 17 00:00:00 2001 From: Bobby Priambodo Date: Sat, 21 Apr 2018 15:20:34 +0700 Subject: [PATCH 1/7] Handle leading whitespace on code blocks --- src/parser/jbuild | 2 +- src/parser/lexer.mll | 27 +++++++++++++++++++ .../code-block/leading-newline-with-space.txt | 2 +- test/parser/expect/code-block/leading-tab.txt | 2 +- .../expect/code-block/leading-whitespace.txt | 2 +- 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/parser/jbuild b/src/parser/jbuild index e012b5dda3..ea37edec84 100644 --- a/src/parser/jbuild +++ b/src/parser/jbuild @@ -5,4 +5,4 @@ (library ((name parser_) (preprocess (pps (bisect_ppx))) - (libraries (model)))) + (libraries (model str)))) diff --git a/src/parser/lexer.mll b/src/parser/lexer.mll index f335c2d55b..b992779396 100644 --- a/src/parser/lexer.mll +++ b/src/parser/lexer.mll @@ -70,6 +70,32 @@ let trim_trailing_blank_lines : string -> string = fun s -> in String.sub s 0 trim_from +let trim_leading_whitespace : string -> string = fun s -> + let count_leading_whitespace : string -> int = fun line -> + let rec count_leading_whitespace' : int -> int = fun index -> + if index >= String.length line then + index + else + match line.[index] with + | ' ' | '\t' -> count_leading_whitespace' (index + 1) + | _ -> index + in + count_leading_whitespace' 0 + in + let lines = Str.(split (regexp "\n") s) in + let least_amount_of_whitespace = + lines + |> List.map count_leading_whitespace + |> List.fold_left min max_int + in + let remove_whitespace : string -> string = fun line -> + String.sub line least_amount_of_whitespace (String.length line - least_amount_of_whitespace) + in + lines + |> List.map remove_whitespace + |> String.concat "\n" + + module Location = Model.Location_ @@ -256,6 +282,7 @@ rule token input = parse | "{[" (code_block_text as c) "]}" { let c = trim_leading_blank_lines c in let c = trim_trailing_blank_lines c in + let c = trim_leading_whitespace c in emit input (`Code_block c) } | "{v" (verbatim_text as t) "v}" diff --git a/test/parser/expect/code-block/leading-newline-with-space.txt b/test/parser/expect/code-block/leading-newline-with-space.txt index 5cdbd6693e..68907aafc4 100644 --- a/test/parser/expect/code-block/leading-newline-with-space.txt +++ b/test/parser/expect/code-block/leading-newline-with-space.txt @@ -1 +1 @@ -((output (ok (((f.ml (1 0) (2 6)) (code_block " foo"))))) (warnings ())) +((output (ok (((f.ml (1 0) (2 6)) (code_block foo))))) (warnings ())) diff --git a/test/parser/expect/code-block/leading-tab.txt b/test/parser/expect/code-block/leading-tab.txt index 7672eef1b3..bd00b8aa2c 100644 --- a/test/parser/expect/code-block/leading-tab.txt +++ b/test/parser/expect/code-block/leading-tab.txt @@ -1 +1 @@ -((output (ok (((f.ml (1 0) (1 8)) (code_block "\tfoo"))))) (warnings ())) +((output (ok (((f.ml (1 0) (1 8)) (code_block foo))))) (warnings ())) diff --git a/test/parser/expect/code-block/leading-whitespace.txt b/test/parser/expect/code-block/leading-whitespace.txt index 590144181c..bd00b8aa2c 100644 --- a/test/parser/expect/code-block/leading-whitespace.txt +++ b/test/parser/expect/code-block/leading-whitespace.txt @@ -1 +1 @@ -((output (ok (((f.ml (1 0) (1 8)) (code_block " foo"))))) (warnings ())) +((output (ok (((f.ml (1 0) (1 8)) (code_block foo))))) (warnings ())) From 1853dded09f91a50292e29323d7081d090c06d41 Mon Sep 17 00:00:00 2001 From: Bobby Priambodo Date: Sat, 21 Apr 2018 15:22:09 +0700 Subject: [PATCH 2/7] Add new tests for multiple-line code-block with leading whitespace --- .../expect/code-block/leading-tab-two-different-indent.txt | 3 +++ test/parser/expect/code-block/leading-tab-two.txt | 3 +++ .../expect/code-block/leading-whitespace-two-cr-lf.txt | 3 +++ .../leading-whitespace-two-different-indent-rev.txt | 3 +++ .../code-block/leading-whitespace-two-different-indent.txt | 3 +++ test/parser/expect/code-block/leading-whitespace-two.txt | 3 +++ test/parser/test.ml | 6 ++++++ 7 files changed, 24 insertions(+) create mode 100644 test/parser/expect/code-block/leading-tab-two-different-indent.txt create mode 100644 test/parser/expect/code-block/leading-tab-two.txt create mode 100644 test/parser/expect/code-block/leading-whitespace-two-cr-lf.txt create mode 100644 test/parser/expect/code-block/leading-whitespace-two-different-indent-rev.txt create mode 100644 test/parser/expect/code-block/leading-whitespace-two-different-indent.txt create mode 100644 test/parser/expect/code-block/leading-whitespace-two.txt diff --git a/test/parser/expect/code-block/leading-tab-two-different-indent.txt b/test/parser/expect/code-block/leading-tab-two-different-indent.txt new file mode 100644 index 0000000000..c1fc7fcd64 --- /dev/null +++ b/test/parser/expect/code-block/leading-tab-two-different-indent.txt @@ -0,0 +1,3 @@ +((output (ok (((f.ml (1 0) (2 7)) (code_block "foo\ + \n\tbar"))))) + (warnings ())) diff --git a/test/parser/expect/code-block/leading-tab-two.txt b/test/parser/expect/code-block/leading-tab-two.txt new file mode 100644 index 0000000000..2a6e899e76 --- /dev/null +++ b/test/parser/expect/code-block/leading-tab-two.txt @@ -0,0 +1,3 @@ +((output (ok (((f.ml (1 0) (2 6)) (code_block "foo\ + \nbar"))))) + (warnings ())) diff --git a/test/parser/expect/code-block/leading-whitespace-two-cr-lf.txt b/test/parser/expect/code-block/leading-whitespace-two-cr-lf.txt new file mode 100644 index 0000000000..e52d0ed630 --- /dev/null +++ b/test/parser/expect/code-block/leading-whitespace-two-cr-lf.txt @@ -0,0 +1,3 @@ +((output (ok (((f.ml (1 0) (2 6)) (code_block "foo\r\ + \nbar"))))) + (warnings ())) diff --git a/test/parser/expect/code-block/leading-whitespace-two-different-indent-rev.txt b/test/parser/expect/code-block/leading-whitespace-two-different-indent-rev.txt new file mode 100644 index 0000000000..7f6d5a0520 --- /dev/null +++ b/test/parser/expect/code-block/leading-whitespace-two-different-indent-rev.txt @@ -0,0 +1,3 @@ +((output (ok (((f.ml (1 0) (2 6)) (code_block " foo\ + \nbar"))))) + (warnings ())) diff --git a/test/parser/expect/code-block/leading-whitespace-two-different-indent.txt b/test/parser/expect/code-block/leading-whitespace-two-different-indent.txt new file mode 100644 index 0000000000..784aa5f132 --- /dev/null +++ b/test/parser/expect/code-block/leading-whitespace-two-different-indent.txt @@ -0,0 +1,3 @@ +((output (ok (((f.ml (1 0) (2 8)) (code_block "foo\ + \n bar"))))) + (warnings ())) diff --git a/test/parser/expect/code-block/leading-whitespace-two.txt b/test/parser/expect/code-block/leading-whitespace-two.txt new file mode 100644 index 0000000000..2a6e899e76 --- /dev/null +++ b/test/parser/expect/code-block/leading-whitespace-two.txt @@ -0,0 +1,3 @@ +((output (ok (((f.ml (1 0) (2 6)) (code_block "foo\ + \nbar"))))) + (warnings ())) diff --git a/test/parser/test.ml b/test/parser/test.ml index 92ac52df22..a08ebb1213 100644 --- a/test/parser/test.ml +++ b/test/parser/test.ml @@ -271,7 +271,13 @@ let tests : test_suite list = [ t "cr-lf" "{[foo\r\nbar]}"; t "blank-line" "{[foo\n\nbar]}"; t "leading-whitespace" "{[ foo]}"; + t "leading-whitespace-two" "{[ foo\n bar]}"; + t "leading-whitespace-two-cr-lf" "{[ foo\r\n bar]}"; + t "leading-whitespace-two-different-indent" "{[ foo\n bar]}"; + t "leading-whitespace-two-different-indent-rev" "{[ foo\n bar]}"; t "leading-tab" "{[\tfoo]}"; + t "leading-tab-two" "{[\tfoo\n\tbar]}"; + t "leading-tab-two-different-indent" "{[\tfoo\n\t\tbar]}"; t "leading-newline" "{[\nfoo]}"; t "leading-cr-lf" "{[\r\nfoo]}"; t "leading-newlines" "{[\n\nfoo]}"; From 7c9bee4754c44fcf7b49cd564875e18ccb0179b6 Mon Sep 17 00:00:00 2001 From: Bobby Priambodo Date: Sat, 21 Apr 2018 15:22:20 +0700 Subject: [PATCH 3/7] Support opam v2 local switches --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 08a111c2ee..e516216dea 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ _coverage/ # For local experiments. scratch/ + +# opam v2 +_opam/ From c71dcf38a45bf7846d78108904189c7dba47f462 Mon Sep 17 00:00:00 2001 From: Bobby Priambodo Date: Sat, 21 Apr 2018 19:38:56 +0700 Subject: [PATCH 4/7] Use Astring instead of Str for splitting --- src/parser/jbuild | 2 +- src/parser/lexer.mll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/jbuild b/src/parser/jbuild index ea37edec84..feb9b23cf6 100644 --- a/src/parser/jbuild +++ b/src/parser/jbuild @@ -5,4 +5,4 @@ (library ((name parser_) (preprocess (pps (bisect_ppx))) - (libraries (model str)))) + (libraries (model astring)))) diff --git a/src/parser/lexer.mll b/src/parser/lexer.mll index b992779396..a9fccbbdc5 100644 --- a/src/parser/lexer.mll +++ b/src/parser/lexer.mll @@ -82,7 +82,7 @@ let trim_leading_whitespace : string -> string = fun s -> in count_leading_whitespace' 0 in - let lines = Str.(split (regexp "\n") s) in + let lines = Astring.String.cuts ~sep:"\n" s in let least_amount_of_whitespace = lines |> List.map count_leading_whitespace From 2bbc5ba26ba27faef9cc9804d7f0c8516dea1987 Mon Sep 17 00:00:00 2001 From: Bobby Priambodo Date: Sat, 21 Apr 2018 19:39:17 +0700 Subject: [PATCH 5/7] Make astring direct dependency of odoc --- odoc.opam | 1 + 1 file changed, 1 insertion(+) diff --git a/odoc.opam b/odoc.opam index e34f0e19f3..348e228a75 100644 --- a/odoc.opam +++ b/odoc.opam @@ -16,6 +16,7 @@ dev-repo: "http://github.com/ocaml-doc/odoc.git" available: [ocaml-version >= "4.03.0"] depends: [ + "astring" {build} "bos" {build} "cmdliner" {build} "cppo" {build} From cde299b882ff00b11bec86fde416b92b0b60d5d6 Mon Sep 17 00:00:00 2001 From: Bobby Priambodo Date: Sat, 21 Apr 2018 19:52:34 +0700 Subject: [PATCH 6/7] Add comment about the use of max_int --- src/parser/lexer.mll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/parser/lexer.mll b/src/parser/lexer.mll index a9fccbbdc5..b412df9bc7 100644 --- a/src/parser/lexer.mll +++ b/src/parser/lexer.mll @@ -86,6 +86,9 @@ let trim_leading_whitespace : string -> string = fun s -> let least_amount_of_whitespace = lines |> List.map count_leading_whitespace + (* Note that if [lines] is empty, [least_amount_of_whitespace] will be + [max_int]. But this is okay since if it's indeed empty, the value + will not be used when trying to remove whitespace below. *) |> List.fold_left min max_int in let remove_whitespace : string -> string = fun line -> From 1e8a5df95c516e09b3b7c85295d0d4a871dd7563 Mon Sep 17 00:00:00 2001 From: Bobby Priambodo Date: Sat, 21 Apr 2018 19:57:57 +0700 Subject: [PATCH 7/7] Keep line length under 80 character --- src/parser/lexer.mll | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser/lexer.mll b/src/parser/lexer.mll index b412df9bc7..81dbbc6280 100644 --- a/src/parser/lexer.mll +++ b/src/parser/lexer.mll @@ -92,7 +92,10 @@ let trim_leading_whitespace : string -> string = fun s -> |> List.fold_left min max_int in let remove_whitespace : string -> string = fun line -> - String.sub line least_amount_of_whitespace (String.length line - least_amount_of_whitespace) + String.sub + line + least_amount_of_whitespace + (String.length line - least_amount_of_whitespace) in lines |> List.map remove_whitespace