From 9ed45b41d5a3b8c9924da358cbd0b60c36c59759 Mon Sep 17 00:00:00 2001 From: Bnchi <77180905+bnchi@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:32:08 +0300 Subject: [PATCH] Add support for serializing math nodes as markdown Closes GH-148. Reviewed-by: Titus Wormer --- mdast_util_to_markdown/src/configure.rs | 6 + mdast_util_to_markdown/src/construct_name.rs | 34 +- .../src/handle/inline_math.rs | 82 +++++ mdast_util_to_markdown/src/handle/math.rs | 46 +++ mdast_util_to_markdown/src/handle/mod.rs | 2 + mdast_util_to_markdown/src/handle/root.rs | 5 +- mdast_util_to_markdown/src/state.rs | 10 +- mdast_util_to_markdown/src/unsafe.rs | 27 +- mdast_util_to_markdown/tests/math.rs | 307 ++++++++++++++++++ 9 files changed, 505 insertions(+), 14 deletions(-) create mode 100644 mdast_util_to_markdown/src/handle/inline_math.rs create mode 100644 mdast_util_to_markdown/src/handle/math.rs create mode 100644 mdast_util_to_markdown/tests/math.rs diff --git a/mdast_util_to_markdown/src/configure.rs b/mdast_util_to_markdown/src/configure.rs index 4f13a079..f80be73f 100644 --- a/mdast_util_to_markdown/src/configure.rs +++ b/mdast_util_to_markdown/src/configure.rs @@ -65,6 +65,11 @@ pub struct Options { /// Setext headings cannot be used for empty headings or headings with a /// rank of three or more. pub setext: bool, + /// Whether to support math (text) with a single dollar (`bool`, default: `true`). + /// Single dollars work in Pandoc and many other places, but often interfere with “normal” + /// dollars in text. + /// If you turn this off, you can still use two or more dollars for text math. + pub single_dollar_text_math: bool, /// Marker to use for strong (`'*'` or `'_'`, default: `'*'`). pub strong: char, /// Whether to join definitions without a blank line (`bool`, default: @@ -90,6 +95,7 @@ impl Default for Options { rule_repetition: 3, rule_spaces: false, setext: false, + single_dollar_text_math: true, strong: '*', tight_definitions: false, } diff --git a/mdast_util_to_markdown/src/construct_name.rs b/mdast_util_to_markdown/src/construct_name.rs index 6bac4273..21e5734b 100644 --- a/mdast_util_to_markdown/src/construct_name.rs +++ b/mdast_util_to_markdown/src/construct_name.rs @@ -20,13 +20,6 @@ pub enum ConstructName { /// ^ /// ``` Blockquote, - /// Whole code (indented). - /// - /// ```markdown - /// ␠␠␠␠console.log(1) - /// ^^^^^^^^^^^^^^^^^^ - /// ``` - CodeIndented, /// Whole code (fenced). /// /// ````markdown @@ -74,6 +67,13 @@ pub enum ConstructName { /// | ~~~ /// ```` CodeFencedMetaTilde, + /// Whole code (indented). + /// + /// ```markdown + /// ␠␠␠␠console.log(1) + /// ^^^^^^^^^^^^^^^^^^ + /// ``` + CodeIndented, /// Whole definition. /// /// ```markdown @@ -186,6 +186,26 @@ pub enum ConstructName { /// ^^^^ /// ``` ListItem, + /// Math (flow). + /// + /// ```markdown + /// > | $$ + /// ^^ + /// > | a + /// ^ + /// > | $$ + /// ^^ + /// ``` + MathFlow, + /// Math (flow) meta flag. + /// + /// ```markdown + /// > | $$a + /// ^ + /// | b + /// | $$ + /// ``` + MathFlowMeta, /// Paragraph. /// /// ```markdown diff --git a/mdast_util_to_markdown/src/handle/inline_math.rs b/mdast_util_to_markdown/src/handle/inline_math.rs new file mode 100644 index 00000000..76085997 --- /dev/null +++ b/mdast_util_to_markdown/src/handle/inline_math.rs @@ -0,0 +1,82 @@ +//! JS equivalent: https://github.com/syntax-tree/mdast-util-math/blob/main/lib/index.js#L241 + +use super::Handle; +use crate::state::{Info, State}; +use alloc::format; +use markdown::{ + mdast::{InlineMath, Node}, + message::Message, +}; +use regex::Regex; + +impl Handle for InlineMath { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { + let mut size: usize = if !state.options.single_dollar_text_math { + 2 + } else { + 1 + }; + + let pattern = format!("(^|[^$]){}([^$]|$)", "\\$".repeat(size)); + let mut dollar_sign_match = Regex::new(&pattern).unwrap(); + while dollar_sign_match.is_match(&self.value) { + size += 1; + let pattern = format!("(^|[^$]){}([^$]|$)", "\\$".repeat(size)); + dollar_sign_match = Regex::new(&pattern).unwrap(); + } + + let sequence = "$".repeat(size); + + let no_whitespaces = !self.value.chars().all(char::is_whitespace); + let starts_with_whitespace = self.value.starts_with(char::is_whitespace); + let ends_with_whitespace = self.value.ends_with(char::is_whitespace); + let starts_with_dollar = self.value.starts_with('$'); + let ends_with_dollar = self.value.ends_with('$'); + + let mut value = self.value.clone(); + if no_whitespaces + && ((starts_with_whitespace && ends_with_whitespace) + || starts_with_dollar + || ends_with_dollar) + { + value = format!(" {} ", value); + } + + for pattern in &mut state.r#unsafe { + if !pattern.at_break { + continue; + } + + State::compile_pattern(pattern); + + if let Some(regex) = &pattern.compiled { + while let Some(m) = regex.find(&value) { + let position = m.start(); + + let position = if position > 0 + && &value[position..m.len()] == "\n" + && &value[position - 1..position] == "\r" + { + position - 1 + } else { + position + }; + + value.replace_range(position..m.start() + 1, " "); + } + } + } + + Ok(format!("{}{}{}", sequence, value, sequence)) + } +} + +pub fn peek_inline_math() -> char { + '$' +} diff --git a/mdast_util_to_markdown/src/handle/math.rs b/mdast_util_to_markdown/src/handle/math.rs new file mode 100644 index 00000000..9e3bb73b --- /dev/null +++ b/mdast_util_to_markdown/src/handle/math.rs @@ -0,0 +1,46 @@ +//! JS equivalent: https://github.com/syntax-tree/mdast-util-math/blob/main/lib/index.js#L204 + +use super::Handle; +use crate::{ + construct_name::ConstructName, + state::{Info, State}, + util::{longest_char_streak::longest_char_streak, safe::SafeConfig}, +}; +use alloc::string::String; +use markdown::{ + mdast::{Math, Node}, + message::Message, +}; + +impl Handle for Math { + fn handle( + &self, + state: &mut State, + _info: &Info, + _parent: Option<&Node>, + _node: &Node, + ) -> Result { + let sequence = "$".repeat((longest_char_streak(&self.value, '$') + 1).max(2)); + state.enter(ConstructName::MathFlow); + + let mut value = String::new(); + value.push_str(&sequence); + + if let Some(meta) = &self.meta { + state.enter(ConstructName::MathFlowMeta); + value.push_str(&state.safe(meta, &SafeConfig::new(&value, "\n", Some('$')))); + state.exit(); + } + + value.push('\n'); + + if !self.value.is_empty() { + value.push_str(&self.value); + value.push('\n'); + } + + value.push_str(&sequence); + state.exit(); + Ok(value) + } +} diff --git a/mdast_util_to_markdown/src/handle/mod.rs b/mdast_util_to_markdown/src/handle/mod.rs index 74708e77..8e5e0c01 100644 --- a/mdast_util_to_markdown/src/handle/mod.rs +++ b/mdast_util_to_markdown/src/handle/mod.rs @@ -12,10 +12,12 @@ pub mod html; pub mod image; pub mod image_reference; pub mod inline_code; +pub mod inline_math; pub mod link; pub mod link_reference; mod list; mod list_item; +mod math; mod paragraph; mod root; pub mod strong; diff --git a/mdast_util_to_markdown/src/handle/root.rs b/mdast_util_to_markdown/src/handle/root.rs index bcd84648..6ab8a95e 100644 --- a/mdast_util_to_markdown/src/handle/root.rs +++ b/mdast_util_to_markdown/src/handle/root.rs @@ -33,11 +33,12 @@ fn phrasing(child: &Node) -> bool { *child, Node::Break(_) | Node::Emphasis(_) - | Node::ImageReference(_) | Node::Image(_) + | Node::ImageReference(_) | Node::InlineCode(_) - | Node::LinkReference(_) + | Node::InlineMath(_) | Node::Link(_) + | Node::LinkReference(_) | Node::Strong(_) | Node::Text(_) ) diff --git a/mdast_util_to_markdown/src/state.rs b/mdast_util_to_markdown/src/state.rs index 8d00b67c..891e82bc 100644 --- a/mdast_util_to_markdown/src/state.rs +++ b/mdast_util_to_markdown/src/state.rs @@ -7,8 +7,9 @@ use crate::{ construct_name::ConstructName, handle::{ emphasis::peek_emphasis, html::peek_html, image::peek_image, - image_reference::peek_image_reference, inline_code::peek_inline_code, link::peek_link, - link_reference::peek_link_reference, strong::peek_strong, Handle, + image_reference::peek_image_reference, inline_code::peek_inline_code, + inline_math::peek_inline_math, link::peek_link, link_reference::peek_link_reference, + strong::peek_strong, Handle, }, r#unsafe::Unsafe, util::{ @@ -322,6 +323,8 @@ impl<'a> State<'a> { Node::Strong(strong) => strong.handle(self, info, parent, node), Node::Text(text) => text.handle(self, info, parent, node), Node::ThematicBreak(thematic_break) => thematic_break.handle(self, info, parent, node), + Node::Math(math) => math.handle(self, info, parent, node), + Node::InlineMath(inline_math) => inline_math.handle(self, info, parent, node), _ => Err(Message { place: None, reason: format!("Unexpected node type `{:?}`", node), @@ -409,7 +412,7 @@ impl<'a> State<'a> { index_stack: Vec::new(), options, stack: Vec::new(), - r#unsafe: Unsafe::get_default_unsafe(), + r#unsafe: Unsafe::get_default_unsafe(options), } } @@ -424,6 +427,7 @@ impl<'a> State<'a> { Node::LinkReference(_) => Some(peek_link_reference()), Node::Link(link) => Some(peek_link(link, node, self)), Node::Strong(_) => Some(peek_strong(self)), + Node::InlineMath(_) => Some(peek_inline_math()), _ => None, } } diff --git a/mdast_util_to_markdown/src/unsafe.rs b/mdast_util_to_markdown/src/unsafe.rs index 3d4d658f..062076ca 100644 --- a/mdast_util_to_markdown/src/unsafe.rs +++ b/mdast_util_to_markdown/src/unsafe.rs @@ -3,7 +3,7 @@ //! JS equivalent: . //! Also: . -use crate::construct_name::ConstructName; +use crate::{construct_name::ConstructName, Options}; use alloc::{vec, vec::Vec}; use regex::Regex; @@ -38,7 +38,7 @@ impl<'a> Unsafe<'a> { } } - pub fn get_default_unsafe() -> Vec { + pub fn get_default_unsafe(options: &Options) -> Vec { let full_phrasing_spans = vec![ ConstructName::Autolink, ConstructName::DestinationLiteral, @@ -87,6 +87,7 @@ impl<'a> Unsafe<'a> { ConstructName::CodeFencedMetaTilde, ConstructName::DestinationLiteral, ConstructName::HeadingAtx, + ConstructName::MathFlowMeta, ], vec![], false, @@ -102,6 +103,7 @@ impl<'a> Unsafe<'a> { ConstructName::CodeFencedMetaTilde, ConstructName::DestinationLiteral, ConstructName::HeadingAtx, + ConstructName::MathFlowMeta, ], vec![], false, @@ -308,6 +310,27 @@ impl<'a> Unsafe<'a> { false, ), Self::new('~', None, None, vec![], vec![], true), + Self::new( + '$', + None, + if options.single_dollar_text_math { + None + } else { + "\\$".into() + }, + vec![ConstructName::Phrasing], + vec![], + false, + ), + Self::new( + '$', + None, + None, + vec![ConstructName::MathFlowMeta], + vec![], + false, + ), + Self::new('$', None, "\\$".into(), vec![], vec![], true), ] } diff --git a/mdast_util_to_markdown/tests/math.rs b/mdast_util_to_markdown/tests/math.rs new file mode 100644 index 00000000..d6c8b3cb --- /dev/null +++ b/mdast_util_to_markdown/tests/math.rs @@ -0,0 +1,307 @@ +use markdown::mdast::{Definition, InlineMath, Math, Node, Paragraph, Text}; +use mdast_util_to_markdown::{ + to_markdown as to, to_markdown_with_options as to_md_with_opts, Options, +}; +use pretty_assertions::assert_eq; + +#[test] +fn math() { + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a"), + position: None + })) + .unwrap(), + "$a$\n", + "should serialize math (text)" + ); + + assert_eq!( + to_md_with_opts( + &Node::InlineMath(InlineMath { + value: String::from("a"), + position: None + }), + &Options { + single_dollar_text_math: false, + ..Default::default() + } + ) + .unwrap(), + "$$a$$\n", + "should serialize math (text) with at least 2 dollars w/ `single_dollar_text_math: false`" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::new(), + position: None + })) + .unwrap(), + "$$\n", + "should serialize math (text) w/o `value`" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a \\$ b"), + position: None + })) + .unwrap(), + "$$a \\$ b$$\n", + "should serialize math (text) w/ two dollar signs when including a dollar" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a \\$"), + position: None + })) + .unwrap(), + "$$ a \\$ $$\n", + "should serialize math (text) w/ padding when ending in a dollar sign" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("$ a"), + position: None + })) + .unwrap(), + "$$ $ a $$\n", + "should serialize math (text) w/ padding when starting in a dollar sign" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from(" a "), + position: None + })) + .unwrap(), + "$ a $\n", + "should pad w/ a space if the value starts and ends w/ a space" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from(" a"), + position: None + })) + .unwrap(), + "$ a$\n", + "should not pad w/ spaces if the value ends w/ a non-space" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a "), + position: None + })) + .unwrap(), + "$a $\n", + "should not pad w/ spaces if the value starts w/ a non-space" + ); + + assert_eq!( + to(&Node::Math(Math { + value: String::from("a"), + position: None, + meta: None + })) + .unwrap(), + "$$\na\n$$\n", + "should serialize math (flow)" + ); + + assert_eq!( + to(&Node::Math(Math { + value: String::new(), + position: None, + meta: None + })) + .unwrap(), + "$$\n$$\n", + "should serialize math (flow) w/o `value`" + ); + + assert_eq!( + to(&Node::Math(Math { + value: String::new(), + position: None, + meta: String::from("a").into() + })) + .unwrap(), + "$$a\n$$\n", + "should serialize math (flow) w/ `meta`" + ); + + assert_eq!( + to(&Node::Math(Math { + value: String::from("$$"), + position: None, + meta: None + })) + .unwrap(), + "$$$\n$$\n$$$\n", + "should serialize math (flow) w/ more dollars than occur together in `value`" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a $ b"), + position: None + })], + position: None + })) + .unwrap(), + "a \\$ b\n", + "should escape `$` in phrasing" + ); + + assert_eq!( + to_md_with_opts( + &Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a $ b"), + position: None + })], + position: None + }), + &Options { + single_dollar_text_math: false, + ..Default::default() + } + ) + .unwrap(), + "a $ b\n", + "should not escape a single dollar in phrasing w/ `single_dollar_text_math: false`'" + ); + + assert_eq!( + to_md_with_opts( + &Node::Paragraph(Paragraph { + children: vec![Node::Text(Text { + value: String::from("a $$ b"), + position: None + })], + position: None + }), + &Options { + single_dollar_text_math: false, + ..Default::default() + } + ) + .unwrap(), + "a \\$$ b\n", + "should escape two dollars in phrasing w/ `single_dollar_text_math: false`" + ); + + assert_eq!( + to(&Node::Paragraph(Paragraph { + children: vec![ + Node::Text(Text { + value: String::from("a $"), + position: None + }), + Node::InlineMath(InlineMath { + value: String::from("b"), + position: None + }), + Node::Text(Text { + value: String::from("$ c"), + position: None + }), + ], + position: None + })) + .unwrap(), + "a \\$$b$\\$ c\n", + "should escape `$` around math (text)" + ); + + assert_eq!( + to(&Node::Definition(Definition { + position: None, + url: String::from("b"), + title: String::from("a\n$\nb").into(), + identifier: String::from("a"), + label: String::from("a").into(), + })) + .unwrap(), + "[a]: b \"a\n$\nb\"\n", + "should not escape `$` at the start of a line" + ); + + assert_eq!( + to(&Node::Math(Math { + value: String::new(), + position: None, + meta: String::from("a\rb\nc").into() + })) + .unwrap(), + "$$a b c\n$$\n", + "should escape `\\r`, `\\n` when in `meta` of math (flow)" + ); + + assert_eq!( + to(&Node::Math(Math { + value: String::new(), + position: None, + meta: String::from("a$b").into() + })) + .unwrap(), + "$$a$b\n$$\n", + "should escape `$` when in `meta` of math (flow)" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a\n- b"), + position: None + })) + .unwrap(), + "$a - b$\n", + "should prevent breaking out of code (-)" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a\n#"), + position: None + })) + .unwrap(), + "$a #$\n", + "should prevent breaking out of code (#)" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a\n1. "), + position: None + })) + .unwrap(), + "$a 1. $\n", + "should prevent breaking out of code (\\d\\.)" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a\r- b"), + position: None + })) + .unwrap(), + "$a - b$\n", + "should prevent breaking out of code (cr)" + ); + + assert_eq!( + to(&Node::InlineMath(InlineMath { + value: String::from("a\n- b"), + position: None + })) + .unwrap(), + "$a - b$\n", + "should prevent breaking out of code (crlf)" + ); +}