From 5a8ed88b64568dbcfd4f8404b35a9445e731b100 Mon Sep 17 00:00:00 2001 From: ltdk Date: Mon, 1 Jan 2024 16:26:02 -0500 Subject: [PATCH 1/4] Match without spaces --- components/markdown/src/markdown.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/components/markdown/src/markdown.rs b/components/markdown/src/markdown.rs index a6cb3beca6..1d97c3fb6d 100644 --- a/components/markdown/src/markdown.rs +++ b/components/markdown/src/markdown.rs @@ -10,7 +10,7 @@ use utils::net::is_external_link; use crate::context::RenderContext; use errors::{Context, Error, Result}; use libs::pulldown_cmark::escape::escape_html; -use libs::regex::Regex; +use libs::regex::{Regex, RegexBuilder}; use utils::site::resolve_internal_link; use utils::slugs::slugify_anchors; use utils::table_of_contents::{make_table_of_contents, Heading}; @@ -24,6 +24,10 @@ const CONTINUE_READING: &str = ""; const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html"; static EMOJI_REPLACER: Lazy = Lazy::new(EmojiReplacer::new); +/// Set as a regex to help match some extra cases. This way, spaces and case don't matter. +static MORE_DIVIDER_RE: Lazy = + Lazy::new(|| RegexBuilder::new(r#""#).case_insensitive(true).build().unwrap()); + /// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary, /// private schemes. This regex checks if the given string starts with something that just looks /// like a scheme, i.e., a case-insensitive identifier followed by a colon. @@ -485,7 +489,7 @@ pub fn markdown_to_html( }); } Event::Html(text) => { - if text.contains("") { + if MORE_DIVIDER_RE.is_match(&text) { has_summary = true; events.push(Event::Html(CONTINUE_READING.into())); continue; From d21a4bde35082b66380b9681594641d63023bfd9 Mon Sep 17 00:00:00 2001 From: ltdk Date: Wed, 3 Jan 2024 15:32:23 -0500 Subject: [PATCH 2/4] Add tests for new handling, with a note on pulldown-cmark bug --- components/markdown/src/markdown.rs | 38 +++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/components/markdown/src/markdown.rs b/components/markdown/src/markdown.rs index 1d97c3fb6d..11a1b169da 100644 --- a/components/markdown/src/markdown.rs +++ b/components/markdown/src/markdown.rs @@ -25,8 +25,13 @@ const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html"; static EMOJI_REPLACER: Lazy = Lazy::new(EmojiReplacer::new); /// Set as a regex to help match some extra cases. This way, spaces and case don't matter. -static MORE_DIVIDER_RE: Lazy = - Lazy::new(|| RegexBuilder::new(r#""#).case_insensitive(true).build().unwrap()); +static MORE_DIVIDER_RE: Lazy = Lazy::new(|| { + RegexBuilder::new(r#""#) + .case_insensitive(true) + .dot_matches_new_line(true) + .build() + .unwrap() +}); /// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary, /// private schemes. This regex checks if the given string starts with something that just looks @@ -489,6 +494,7 @@ pub fn markdown_to_html( }); } Event::Html(text) => { + dbg!(&text); if MORE_DIVIDER_RE.is_match(&text) { has_summary = true; events.push(Event::Html(CONTINUE_READING.into())); @@ -604,6 +610,8 @@ pub fn markdown_to_html( #[cfg(test)] mod tests { + use config::Config; + use super::*; #[test] @@ -648,4 +656,30 @@ mod tests { assert!(!is_colocated_asset_link(link)); } } + + #[test] + // Tests for summary being split out + fn test_summary_split() { + let top = "Here's a compelling summary."; + let top_rendered = format!("

{top}

"); + let bottom = "Here's the compelling conclusion."; + let bottom_rendered = format!("

{bottom}

"); + // FIXME: would add a test that includes newlines, but due to the way pulldown-cmark parses HTML nodes, these are passed as separate HTML events. see: https://github.com/raphlinus/pulldown-cmark/issues/803 + let mores = ["", "", "", "", ""]; + let config = Config::default(); + let context = RenderContext::from_config(&config); + for more in mores { + let content = format!("{top}\n\n{more}\n\n{bottom}"); + let rendered = markdown_to_html(&content, &context, vec![]).unwrap(); + assert!(rendered.summary_len.is_some(), "no summary when splitting on {more}"); + let summary_len = rendered.summary_len.unwrap(); + let summary = &rendered.body[..summary_len].trim(); + let body = &rendered.body[summary_len..].trim(); + let continue_reading = &body[..CONTINUE_READING.len()]; + let body = &body[CONTINUE_READING.len()..].trim(); + assert_eq!(summary, &top_rendered); + assert_eq!(continue_reading, CONTINUE_READING); + assert_eq!(body, &bottom_rendered); + } + } } From d4f6ab461050fb5618ce69267a5af65c6b9ac97c Mon Sep 17 00:00:00 2001 From: ltdk Date: Fri, 5 Jan 2024 22:02:51 -0500 Subject: [PATCH 3/4] cargo fmt missed somehow --- components/markdown/src/markdown.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/markdown/src/markdown.rs b/components/markdown/src/markdown.rs index 11a1b169da..b29f6d0bcf 100644 --- a/components/markdown/src/markdown.rs +++ b/components/markdown/src/markdown.rs @@ -665,7 +665,8 @@ mod tests { let bottom = "Here's the compelling conclusion."; let bottom_rendered = format!("

{bottom}

"); // FIXME: would add a test that includes newlines, but due to the way pulldown-cmark parses HTML nodes, these are passed as separate HTML events. see: https://github.com/raphlinus/pulldown-cmark/issues/803 - let mores = ["", "", "", "", ""]; + let mores = + ["", "", "", "", ""]; let config = Config::default(); let context = RenderContext::from_config(&config); for more in mores { From d43ccbaff68f09752abe2c84f0be6ea855d7e308 Mon Sep 17 00:00:00 2001 From: ltdk Date: Sun, 7 Jan 2024 13:35:43 -0500 Subject: [PATCH 4/4] Remove dbg, avoid multiple continue-reading events --- components/markdown/src/markdown.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/markdown/src/markdown.rs b/components/markdown/src/markdown.rs index b29f6d0bcf..4d30b2ac92 100644 --- a/components/markdown/src/markdown.rs +++ b/components/markdown/src/markdown.rs @@ -494,8 +494,7 @@ pub fn markdown_to_html( }); } Event::Html(text) => { - dbg!(&text); - if MORE_DIVIDER_RE.is_match(&text) { + if !has_summary && MORE_DIVIDER_RE.is_match(&text) { has_summary = true; events.push(Event::Html(CONTINUE_READING.into())); continue;