From caa6fa9f9d67ad13aff56847ab45055965ee0fe2 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Wed, 24 Jun 2020 21:55:59 +0200 Subject: [PATCH 1/3] Reproduced the error in Oneline Parsing --- lib/earmark.ex | 50 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/lib/earmark.ex b/lib/earmark.ex index cf8aa960..773a1ee3 100644 --- a/lib/earmark.ex +++ b/lib/earmark.ex @@ -147,17 +147,27 @@ defmodule Earmark do #### HTML Blocks - Are only supported if the begin and end tag is in its own line, thusly + HTML is not parsed recursively or detected in all conditons right now, though GFM compliance + is a goal. -
- Hello -
+ But for now the following holds: - will parse as HTML while + A HTML Block defined by a tag starting a line and the same tag starting a different line is parsed + as one HTML AST node, marked with %{verbatim: true} -
Hello
+ E.g. + + iex(3)> lines = [ "
", "some", "
more text" ] + ...(3)> Earmark.as_ast(lines) + {:ok, [{"div", [], ["", "some"], %{meta: %{verbatim: true}}}, "more text"], []} + + And a line starting with an opening tag and ending with the corresponding closing tag is parsed in similar + fashion + + iex(4)> Earmark.as_ast([~s{spaniel}]) + {:ok, [{"span"}, [{"class", "superspan"}], ["spaniel"], %{meta: %{verbatim: true}}], []} + - is not supported and its result is undefined ### Adding HTML attributes with the IAL extension @@ -186,26 +196,26 @@ defmodule Earmark do It is possible to add IAL attributes to generated links or images in the following format. - iex(3)> markdown = "[link](url) {: .classy}" - ...(3)> Earmark.as_html(markdown) + iex(4)> markdown = "[link](url) {: .classy}" + ...(4)> Earmark.as_html(markdown) { :ok, "

\\n \\n link\\n \\n

\\n", []} For both cases, malformed attributes are ignored and warnings are issued. - iex(4)> [ "Some text", "{:hello}" ] |> Enum.join("\\n") |> Earmark.as_html() + iex(5)> [ "Some text", "{:hello}" ] |> Enum.join("\\n") |> Earmark.as_html() {:error, "

\\n Some text\\n

\\n", [{:warning, 2,"Illegal attributes [\\"hello\\"] ignored in IAL"}]} It is possible to escape the IAL in both forms if necessary - iex(5)> markdown = "[link](url)\\\\{: .classy}" - ...(5)> Earmark.as_html(markdown) + iex(6)> markdown = "[link](url)\\\\{: .classy}" + ...(6)> Earmark.as_html(markdown) {:ok, "

\\n \\n link\\n \\n {: .classy}\\n

\\n", []} This of course is not necessary in code blocks or text lines containing an IAL-like string, as in the following example - iex(6)> markdown = "hello {:world}" - ...(6)> Earmark.as_html!(markdown) + iex(7)> markdown = "hello {:world}" + ...(7)> Earmark.as_html!(markdown) "

\\n hello {:world}\\n

\\n" ## Limitations @@ -369,17 +379,17 @@ defmodule Earmark do end @doc """ - iex(7)> markdown = "My `code` is **best**" - ...(7)> {:ok, ast, []} = Earmark.as_ast(markdown) - ...(7)> ast + iex(8)> markdown = "My `code` is **best**" + ...(8)> {:ok, ast, []} = Earmark.as_ast(markdown) + ...(8)> ast [{"p", [], ["My ", {"code", [{"class", "inline"}], ["code"]}, " is ", {"strong", [], ["best"]}]}] Options are passes like to `as_html`, some do not have an effect though (e.g. `smartypants`) as formatting and escaping is not done for the AST. - iex(8)> markdown = "```elixir\\nIO.puts 42\\n```" - ...(8)> {:ok, ast, []} = Earmark.as_ast(markdown, code_class_prefix: "lang-") - ...(8)> ast + iex(9)> markdown = "```elixir\\nIO.puts 42\\n```" + ...(9)> {:ok, ast, []} = Earmark.as_ast(markdown, code_class_prefix: "lang-") + ...(9)> ast [{"pre", [], [{"code", [{"class", "elixir lang-elixir"}], ["IO.puts 42"]}]}] **Rationale**: From 8b5b1d78268d865fc5b17e67ec0b580e0901e0b7 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Thu, 25 Jun 2020 19:29:43 +0200 Subject: [PATCH 2/3] Added doc for comments, relates to #361 --- lib/earmark.ex | 52 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/lib/earmark.ex b/lib/earmark.ex index 773a1ee3..e5404a57 100644 --- a/lib/earmark.ex +++ b/lib/earmark.ex @@ -6,7 +6,7 @@ defmodule Earmark do Earmark now exposes a well-defined and stable Abstract Syntax Tree #### Earmark.as_ast - + The function is described below and the other two API functions `as_html` and `as_html!` are now based upon the structure of the result of `as_ast`. @@ -146,7 +146,7 @@ defmodule Earmark do never is. #### HTML Blocks - + HTML is not parsed recursively or detected in all conditons right now, though GFM compliance is a goal. @@ -164,9 +164,19 @@ defmodule Earmark do And a line starting with an opening tag and ending with the corresponding closing tag is parsed in similar fashion - iex(4)> Earmark.as_ast([~s{spaniel}]) - {:ok, [{"span"}, [{"class", "superspan"}], ["spaniel"], %{meta: %{verbatim: true}}], []} - + iex(4)> Earmark.as_ast(["spaniel"]) + {:ok, [{"span"}, [{"class", "superspan"}], ["spaniel"], %{verbatim: true}], []} + + #### HTML Comments + + Are recoginized if they start a line (after ws and are parsed until the next `-->` is found + all text after the next '-->' is ignored + + E.g. + + iex(5)> Earmark.as_ast(" text -->\\nafter") + {:ok, [{:comment, [" Comment", "comment line", "comment "], %{comment: true}}, {"p", [], ["after"]}], []} + ### Adding HTML attributes with the IAL extension @@ -196,26 +206,26 @@ defmodule Earmark do It is possible to add IAL attributes to generated links or images in the following format. - iex(4)> markdown = "[link](url) {: .classy}" - ...(4)> Earmark.as_html(markdown) + iex(6)> markdown = "[link](url) {: .classy}" + ...(6)> Earmark.as_html(markdown) { :ok, "

\\n \\n link\\n \\n

\\n", []} For both cases, malformed attributes are ignored and warnings are issued. - iex(5)> [ "Some text", "{:hello}" ] |> Enum.join("\\n") |> Earmark.as_html() + iex(7)> [ "Some text", "{:hello}" ] |> Enum.join("\\n") |> Earmark.as_html() {:error, "

\\n Some text\\n

\\n", [{:warning, 2,"Illegal attributes [\\"hello\\"] ignored in IAL"}]} It is possible to escape the IAL in both forms if necessary - iex(6)> markdown = "[link](url)\\\\{: .classy}" - ...(6)> Earmark.as_html(markdown) + iex(8)> markdown = "[link](url)\\\\{: .classy}" + ...(8)> Earmark.as_html(markdown) {:ok, "

\\n \\n link\\n \\n {: .classy}\\n

\\n", []} This of course is not necessary in code blocks or text lines containing an IAL-like string, as in the following example - iex(7)> markdown = "hello {:world}" - ...(7)> Earmark.as_html!(markdown) + iex(9)> markdown = "hello {:world}" + ...(9)> Earmark.as_html!(markdown) "

\\n hello {:world}\\n

\\n" ## Limitations @@ -319,7 +329,7 @@ defmodule Earmark do - description of the error - `options` can be an `%Earmark.Options{}` structure, or can be passed in as a `Keyword` argument (with legal keys for `%Earmark.Options` + `options` can be an `%Earmark.Options{}` structure, or can be passed in as a `Keyword` argument (with legal keys for `%Earmark.Options` * `renderer`: ModuleName @@ -364,7 +374,7 @@ defmodule Earmark do * `pure_links`: boolean Pure links of the form `~r{\\bhttps?://\\S+\\b}` are rendered as links from now on. - However, by setting the `pure_links` option to `false` this can be disabled and pre 1.4 + However, by setting the `pure_links` option to `false` this can be disabled and pre 1.4 behavior can be used. """ def as_html(lines, options \\ %Options{}) @@ -379,17 +389,17 @@ defmodule Earmark do end @doc """ - iex(8)> markdown = "My `code` is **best**" - ...(8)> {:ok, ast, []} = Earmark.as_ast(markdown) - ...(8)> ast - [{"p", [], ["My ", {"code", [{"class", "inline"}], ["code"]}, " is ", {"strong", [], ["best"]}]}] + iex(10)> markdown = "My `code` is **best**" + ...(10)> {:ok, ast, []} = Earmark.as_ast(markdown) + ...(10)> ast + [{"p", [], ["My ", {"code", [{"class", "inline"}], ["code"]}, " is ", {"strong", [], ["best"]}]}] Options are passes like to `as_html`, some do not have an effect though (e.g. `smartypants`) as formatting and escaping is not done for the AST. - iex(9)> markdown = "```elixir\\nIO.puts 42\\n```" - ...(9)> {:ok, ast, []} = Earmark.as_ast(markdown, code_class_prefix: "lang-") - ...(9)> ast + iex(11)> markdown = "```elixir\\nIO.puts 42\\n```" + ...(11)> {:ok, ast, []} = Earmark.as_ast(markdown, code_class_prefix: "lang-") + ...(11)> ast [{"pre", [], [{"code", [{"class", "elixir lang-elixir"}], ["IO.puts 42"]}]}] **Rationale**: From 179487dca6559d37d37e1502e64d742f57c3dfa7 Mon Sep 17 00:00:00 2001 From: RobertDober Date: Fri, 26 Jun 2020 11:17:46 +0200 Subject: [PATCH 3/3] Added doc for oneline tags --- lib/earmark.ex | 44 +++++++++++++++++++++++++------------ lib/earmark/line_scanner.ex | 17 +++++--------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/lib/earmark.ex b/lib/earmark.ex index e5404a57..2acaad78 100644 --- a/lib/earmark.ex +++ b/lib/earmark.ex @@ -167,6 +167,22 @@ defmodule Earmark do iex(4)> Earmark.as_ast(["spaniel"]) {:ok, [{"span"}, [{"class", "superspan"}], ["spaniel"], %{verbatim: true}], []} + What is HTML? + + We differ from strict GFM by allowing **all** tags not only HTML5 tagsn this holds for oneliners.... + + iex(5)> {:ok, ast, []} = Earmark.as_ast(["", "better"]) + ...(5)> ast + [ + {"stupid", [], [], %{meta: %{verbatim: true}}}, + {"not", [], ["better"], %{meta: %{verbatim: true}}}] + + and for multiline blocks + + iex(6)> {:ok, ast, []} = Earmark.as_ast([ "", "world", ""]) + ...(6)> ast + [{"hello", [], ["world"], %{verbatim: true}}] + #### HTML Comments Are recoginized if they start a line (after ws and are parsed until the next `-->` is found @@ -174,7 +190,7 @@ defmodule Earmark do E.g. - iex(5)> Earmark.as_ast(" text -->\\nafter") + iex(7)> Earmark.as_ast(" text -->\\nafter") {:ok, [{:comment, [" Comment", "comment line", "comment "], %{comment: true}}, {"p", [], ["after"]}], []} @@ -206,26 +222,26 @@ defmodule Earmark do It is possible to add IAL attributes to generated links or images in the following format. - iex(6)> markdown = "[link](url) {: .classy}" - ...(6)> Earmark.as_html(markdown) + iex(8)> markdown = "[link](url) {: .classy}" + ...(8)> Earmark.as_html(markdown) { :ok, "

\\n \\n link\\n \\n

\\n", []} For both cases, malformed attributes are ignored and warnings are issued. - iex(7)> [ "Some text", "{:hello}" ] |> Enum.join("\\n") |> Earmark.as_html() + iex(9)> [ "Some text", "{:hello}" ] |> Enum.join("\\n") |> Earmark.as_html() {:error, "

\\n Some text\\n

\\n", [{:warning, 2,"Illegal attributes [\\"hello\\"] ignored in IAL"}]} It is possible to escape the IAL in both forms if necessary - iex(8)> markdown = "[link](url)\\\\{: .classy}" - ...(8)> Earmark.as_html(markdown) + iex(10)> markdown = "[link](url)\\\\{: .classy}" + ...(10)> Earmark.as_html(markdown) {:ok, "

\\n \\n link\\n \\n {: .classy}\\n

\\n", []} This of course is not necessary in code blocks or text lines containing an IAL-like string, as in the following example - iex(9)> markdown = "hello {:world}" - ...(9)> Earmark.as_html!(markdown) + iex(11)> markdown = "hello {:world}" + ...(11)> Earmark.as_html!(markdown) "

\\n hello {:world}\\n

\\n" ## Limitations @@ -389,17 +405,17 @@ defmodule Earmark do end @doc """ - iex(10)> markdown = "My `code` is **best**" - ...(10)> {:ok, ast, []} = Earmark.as_ast(markdown) - ...(10)> ast + iex(12)> markdown = "My `code` is **best**" + ...(12)> {:ok, ast, []} = Earmark.as_ast(markdown) + ...(12)> ast [{"p", [], ["My ", {"code", [{"class", "inline"}], ["code"]}, " is ", {"strong", [], ["best"]}]}] Options are passes like to `as_html`, some do not have an effect though (e.g. `smartypants`) as formatting and escaping is not done for the AST. - iex(11)> markdown = "```elixir\\nIO.puts 42\\n```" - ...(11)> {:ok, ast, []} = Earmark.as_ast(markdown, code_class_prefix: "lang-") - ...(11)> ast + iex(13)> markdown = "```elixir\\nIO.puts 42\\n```" + ...(13)> {:ok, ast, []} = Earmark.as_ast(markdown, code_class_prefix: "lang-") + ...(13)> ast [{"pre", [], [{"code", [{"class", "elixir lang-elixir"}], ["IO.puts 42"]}]}] **Rationale**: diff --git a/lib/earmark/line_scanner.ex b/lib/earmark/line_scanner.ex index 8d76c90b..f695821e 100644 --- a/lib/earmark/line_scanner.ex +++ b/lib/earmark/line_scanner.ex @@ -139,17 +139,11 @@ defmodule Earmark.LineScanner do match = !recursive && Regex.run(~r{\A<([-\w]+?)(?:\s.*)?>.*}, line) -> [_, tag] = match - - if block_tag?(tag), - do: %Line.HtmlOneLine{tag: tag, content: line, indent: 0}, - else: %Line.Text{content: line, indent: 0} + %Line.HtmlOneLine{tag: tag, content: line, indent: 0} match = !recursive && Regex.run(~r{\A<([-\w]+?)(?:\s.*)?/>.*}, line) -> [_, tag] = match - - if block_tag?(tag), - do: %Line.HtmlOneLine{tag: tag, content: line, indent: 0}, - else: %Line.Text{content: line, indent: 0} + %Line.HtmlOneLine{tag: tag, content: line, indent: 0} match = !recursive && Regex.run(~r/^<([-\w]+?)(?:\s.*)?>/, line) -> [_, tag] = match @@ -251,9 +245,10 @@ defmodule Earmark.LineScanner do |> String.replace("<", "<") - @block_tags ~w< address article aside blockquote canvas dd div dl fieldset figcaption h1 h2 h3 h4 h5 h6 header hgroup li main nav noscript ol output p pre section table tfoot ul video> - |> Enum.into(MapSet.new()) - defp block_tag?(tag), do: MapSet.member?(@block_tags, tag) + # Not sure yet if we shall enforce all tags, in that case we shall enlargen @block_tags to @html_tags + # @block_tags ~w< address article aside blockquote canvas dd div dl fieldset figcaption h1 h2 h3 h4 h5 h6 header hgroup li main nav noscript ol output p pre section table tfoot ul video> + # |> Enum.into(MapSet.new()) + # defp block_tag?(tag), do: MapSet.member?(@block_tags, tag) @column_rgx ~r{\A[\s|:-]+\z} defp _determine_if_header(columns) do