diff --git a/Cargo.lock b/Cargo.lock index ea6e0783c..a1a77e13d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,6 +18,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "atlatl" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32bb156841d2e2a888185b5b4f7d93d30efd3a40d1671d9628ab39536adb7ea2" +dependencies = [ + "fnv", + "num-traits", + "serde", +] + [[package]] name = "atty" version = "0.2.14" @@ -35,6 +46,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" +[[package]] +name = "bincode" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30d3a39baa26f9651f17b375061f3233dde33424a8b72b0dbe93a68a0bc896d" +dependencies = [ + "byteorder", + "serde", +] + [[package]] name = "bitflags" version = "1.2.1" @@ -70,6 +91,7 @@ dependencies = [ "comrak", "fs_extra", "handlebars", + "kl-hyphenate", "lazy_static", "regex", "sass-rs", @@ -173,6 +195,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "fs_extra" version = "1.1.0" @@ -218,6 +246,28 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" +[[package]] +name = "kl-hyphenate" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7f2e69c68893c9f23e268f56eab6fed56c6637f69a28a31346db5cef3a389dd" +dependencies = [ + "atlatl", + "bincode", + "kl-hyphenate-commons", + "serde", +] + +[[package]] +name = "kl-hyphenate-commons" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7db52982b6c74f9a9f66468824a6d8f6cf242be6b39d14625ca56f6939e7665" +dependencies = [ + "atlatl", + "serde", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -425,6 +475,9 @@ name = "serde" version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3" +dependencies = [ + "serde_derive", +] [[package]] name = "serde_derive" diff --git a/Cargo.toml b/Cargo.toml index 7c01e45fc..e807fae28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ fs_extra = "1.1.0" regex = "1.3" sass-rs = "0.2.1" chrono = "0.4.13" +kl-hyphenate = "0.7.2" diff --git a/hyphenation-en-us.bincode b/hyphenation-en-us.bincode new file mode 100644 index 000000000..8f42678f3 Binary files /dev/null and b/hyphenation-en-us.bincode differ diff --git a/src/main.rs b/src/main.rs index 51f89292b..15b581c93 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod blogs; +mod markdown; mod posts; use crate::blogs::Blog; diff --git a/src/markdown.rs b/src/markdown.rs new file mode 100644 index 000000000..f35d0ce2c --- /dev/null +++ b/src/markdown.rs @@ -0,0 +1,80 @@ +use comrak::{ + nodes::{AstNode, NodeValue}, + Arena, ComrakExtensionOptions, ComrakOptions, ComrakRenderOptions, +}; +use kl_hyphenate::{Hyphenator, Language, Load, Standard}; +use std::error::Error; + +const SOFT_HYPHEN: char = '\u{00AD}'; +const HYPHENATION_DICTIONARY: &str = "hyphenation-en-us.bincode"; + +pub(crate) fn render(input: &str) -> Result> { + let options = ComrakOptions { + render: ComrakRenderOptions { + unsafe_: true, // Allow rendering of raw HTML + ..ComrakRenderOptions::default() + }, + extension: ComrakExtensionOptions { + header_ids: Some(String::new()), + ..ComrakExtensionOptions::default() + }, + ..ComrakOptions::default() + }; + + let hyphenator = Standard::from_path(Language::EnglishUS, HYPHENATION_DICTIONARY)?; + + let arena = Arena::new(); + let ast = comrak::parse_document(&arena, input, &options); + + hyphenate(&ast, &hyphenator); + + let mut output = Vec::new(); + comrak::format_html(&ast, &options, &mut output)?; + Ok(String::from_utf8(output)?) +} + +// Pre-compute points inside words where browsers can add hyphens during rendering. +// +// Support for the CSS rule `hyphens: auto`, which tells the browser to split words by adding +// hyphens when there is no space left on the line, is quite low across browsers, preventing us +// from using it on the blog. +// +// A widely supported alternative is the `hyphens: manual` rule, which moves the burden of deciding +// *where* to break the word to the website. To properly use that rule, the website has to insert +// the "soft hyphen" unicode character (U+00AD) in every position the browser is allowed to break +// the word. +// +// The following piece of code walks through the Markdown AST adding those characters in every +// suitable place, thanks to the kl-hyphenate library. + +fn hyphenate<'a>(node: &'a AstNode<'a>, hyphenator: &Standard) { + match &mut node.data.borrow_mut().value { + NodeValue::Text(content) => { + if let Ok(string) = std::str::from_utf8(&content) { + let hyphenated = add_soft_hyphens(string, hyphenator); + *content = hyphenated.as_bytes().to_vec(); + } + } + _ => {} + } + for child in node.children() { + hyphenate(child, hyphenator); + } +} + +fn add_soft_hyphens(content: &str, hyphenator: &Standard) -> String { + let mut output = String::with_capacity(content.len()); + for (i, word) in content.split(' ').enumerate() { + if i != 0 { + output.push(' '); + } + let hyphenated = hyphenator.hyphenate(word); + for (j, segment) in hyphenated.into_iter().segments().enumerate() { + if j != 0 { + output.push(SOFT_HYPHEN); + } + output.push_str(&segment); + } + } + output +} diff --git a/src/posts.rs b/src/posts.rs index d8770f5ab..fa3f06047 100644 --- a/src/posts.rs +++ b/src/posts.rs @@ -1,5 +1,4 @@ use crate::blogs::Manifest; -use comrak::{ComrakExtensionOptions, ComrakOptions, ComrakRenderOptions}; use regex::Regex; use serde_derive::{Deserialize, Serialize}; use std::error::Error; @@ -63,19 +62,7 @@ impl Post { layout, } = serde_yaml::from_str(yaml)?; // next, the contents. we add + to get rid of the final "---\n\n" - let options = ComrakOptions { - render: ComrakRenderOptions { - unsafe_: true, // Allow rendering of raw HTML - ..ComrakRenderOptions::default() - }, - extension: ComrakExtensionOptions { - header_ids: Some(String::new()), - ..ComrakExtensionOptions::default() - }, - ..ComrakOptions::default() - }; - - let contents = comrak::markdown_to_html(&contents[end_of_yaml + 5..], &options); + let contents = crate::markdown::render(&contents[end_of_yaml + 5..])?; // finally, the url. let mut url = PathBuf::from(&*filename); diff --git a/src/styles/app.scss b/src/styles/app.scss index e33b36bf3..57474dbcb 100644 --- a/src/styles/app.scss +++ b/src/styles/app.scss @@ -54,8 +54,19 @@ blockquote { } } +p { + text-align: justify; + + /* Use manual hyphenation, as automatic hyphenation is not widely + * supported (Chrome doesn't implement it on all platforms). */ + -webkit-hyphens: manual; + -ms-hyphens: manual; + hyphens: manual; +} + code { overflow: auto; + line-break: anywhere; } code.language-console::before,