From d24d69a8f534dd9507ce1f8ce4cb29e1ca8086c4 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 19 Oct 2023 15:28:12 +0200 Subject: [PATCH] Add link, markup, gendered ordinals --- src/csl/elem.rs | 93 ++++++++++++++++++++++++++++++++-------- src/csl/mod.rs | 41 +++++++++++++++--- src/csl/rendering/mod.rs | 56 ++++++++++++++++++------ src/lang/mod.rs | 23 +++++----- src/types/numeric.rs | 9 ++-- src/types/strings.rs | 72 +++++++++++++++++++++++++++++++ 6 files changed, 246 insertions(+), 48 deletions(-) diff --git a/src/csl/elem.rs b/src/csl/elem.rs index e1072fc9..816eb966 100644 --- a/src/csl/elem.rs +++ b/src/csl/elem.rs @@ -26,6 +26,8 @@ impl Elem { .map(|c| match c { ElemChild::Text(t) => t.text.len(), ElemChild::Elem(e) => e.str_len(), + ElemChild::Markup(m) => m.len(), + ElemChild::Link { text, .. } => text.text.len(), }) .sum() } @@ -203,6 +205,7 @@ impl ElemChildren { self.0.last_mut().and_then(|c| match c { ElemChild::Text(t) => Some(&mut t.text), ElemChild::Elem(e) => e.children.last_text_mut(), + ElemChild::Markup(_) | ElemChild::Link { .. } => None, }) } @@ -227,8 +230,14 @@ impl ElemChildren { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ElemChild { + /// This is some text. Text(Formatted), + /// A child element. Elem(Elem), + /// This should be processed by Typst. + Markup(String), + /// This is a link. + Link { text: Formatted, url: String }, } impl ElemChild { @@ -238,26 +247,28 @@ impl ElemChild { format: BufWriteFormat, ) -> Result<(), fmt::Error> { match self { - ElemChild::Text(t) if format == BufWriteFormat::HTML => { - let is_default = t.formatting == Formatting::default(); - if !is_default { - w.write_str("")?; - } + ElemChild::Text(t) => { + t.formatting.write_start(w, format)?; w.write_str(&t.text)?; - if !is_default { - w.write_str("")?; - } + t.formatting.write_end(w, format)?; Ok(()) } - ElemChild::Text(t) if format == BufWriteFormat::VT100 => { - t.formatting.write_vt100(w)?; - w.write_str(&t.text)?; - w.write_str("\x1b[0m") - } - ElemChild::Text(t) => w.write_str(&t.text), ElemChild::Elem(e) => e.write_buf(w, format), + ElemChild::Markup(m) => w.write_str(m), + ElemChild::Link { text, url } if format == BufWriteFormat::HTML => { + w.write_str("")?; + text.formatting.write_start(w, format)?; + w.write_str(&text.text)?; + text.formatting.write_end(w, format)?; + w.write_str("") + } + ElemChild::Link { text, .. } => { + text.formatting.write_start(w, format)?; + w.write_str(&text.text)?; + text.formatting.write_end(w, format) + } } } @@ -265,20 +276,28 @@ impl ElemChild { match self { ElemChild::Text(t) => t.text.len(), ElemChild::Elem(e) => e.str_len(), + ElemChild::Markup(m) => m.len(), + ElemChild::Link { text, .. } => text.text.len(), } } pub(super) fn has_content(&self) -> bool { match self { - ElemChild::Text(t) => t.text.chars().any(|c| !c.is_whitespace()), + ElemChild::Text(Formatted { text, .. }) | ElemChild::Markup(text) => { + text.chars().any(|c| !c.is_whitespace()) + } ElemChild::Elem(e) => e.has_content(), + ElemChild::Link { .. } => true, } } pub(super) fn is_empty(&self) -> bool { match self { - ElemChild::Text(t) => t.text.is_empty(), + ElemChild::Text(Formatted { text, .. }) | ElemChild::Markup(text) => { + text.is_empty() + } ElemChild::Elem(e) => e.is_empty(), + ElemChild::Link { text, .. } => text.text.is_empty(), } } } @@ -452,6 +471,44 @@ impl Formatting { Ok(()) } + + pub(super) fn write_start( + &self, + buf: &mut impl fmt::Write, + format: BufWriteFormat, + ) -> Result<(), fmt::Error> { + match format { + BufWriteFormat::Plain => Ok(()), + BufWriteFormat::VT100 => self.write_vt100(buf), + BufWriteFormat::HTML => { + let is_default = self == &Formatting::default(); + if !is_default { + buf.write_str("")?; + } + Ok(()) + } + } + } + + pub(super) fn write_end( + &self, + buf: &mut impl fmt::Write, + format: BufWriteFormat, + ) -> Result<(), fmt::Error> { + match format { + BufWriteFormat::Plain => Ok(()), + BufWriteFormat::VT100 => buf.write_str("\x1b[0m"), + BufWriteFormat::HTML => { + let is_default = self == &Formatting::default(); + if !is_default { + buf.write_str("")?; + } + Ok(()) + } + } + } } #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/src/csl/mod.rs b/src/csl/mod.rs index 3f29b9db..dabbecdf 100644 --- a/src/csl/mod.rs +++ b/src/csl/mod.rs @@ -10,8 +10,8 @@ use std::{mem, vec}; use citationberg::taxonomy::{Locator, OtherTerm, Term, Variable}; use citationberg::{ taxonomy as csl_taxonomy, Affixes, Citation, Collapse, CslMacro, Display, - IndependentStyle, InheritableNameOptions, Locale, LocaleCode, RendersYearSuffix, - SecondFieldAlign, StyleClass, TermForm, ToFormatting, + GrammarGender, IndependentStyle, InheritableNameOptions, Locale, LocaleCode, + RendersYearSuffix, SecondFieldAlign, StyleClass, TermForm, ToFormatting, }; use citationberg::{DateForm, LongShortForm, OrdinalLookup, TextCase}; use indexmap::IndexSet; @@ -1258,6 +1258,7 @@ impl WritingContext { } Some(ElemChild::Text(_)) => false, Some(ElemChild::Elem(e)) => e.has_content(), + Some(ElemChild::Markup(_) | ElemChild::Link { .. }) => true, None => false, }; @@ -1802,14 +1803,33 @@ impl<'a> Context<'a> { for chunk in &chunked.0 { match chunk.kind { ChunkKind::Normal => self.push_str(&chunk.value), - _ => { - self.writing.buf.push_chunk(chunk); + ChunkKind::Verbatim => { + self.writing.buf.push_verbatim(&chunk.value); self.writing.pull_punctuation = false; } + ChunkKind::Math => { + self.writing.save_to_block(); + self.writing + .elem_stack + .last_mut() + .0 + .push(ElemChild::Markup(chunk.value.clone())) + } } } } + /// Push a link into the buffer. + pub fn push_link(&mut self, chunked: &ChunkedString, url: String) { + let format = *self.writing.formatting(); + self.writing.save_to_block(); + self.writing + .elem_stack + .last_mut() + .0 + .push(ElemChild::Link { text: format.add_text(chunked.to_string()), url }) + } + /// Folds all remaining elements into the first element and returns it. fn flush(self) -> ElemChildren { self.writing.flush() @@ -1832,6 +1852,17 @@ impl<'a> Context<'a> { None } + /// Get the gender of a term. + fn gender(&self, term: Term) -> Option { + if let Some(localization) = + self.style.lookup_locale(|l| l.term(term, TermForm::default())) + { + return localization.gender; + } else { + None + } + } + /// Get a localized date format. fn localized_date(&self, form: DateForm) -> Option<&'a citationberg::Date> { self.style @@ -2076,7 +2107,7 @@ mod tests { let mut driver = BibliographyDriver::new().unwrap(); for n in (0..bib.len()).step_by(3) { - let mut items = vec![ + let items = vec![ CitationItem::from_entry(bib.nth(n).unwrap()), CitationItem::from_entry(bib.nth(n + 1).unwrap()), CitationItem::from_entry(bib.nth(n + 2).unwrap()), diff --git a/src/csl/rendering/mod.rs b/src/csl/rendering/mod.rs index 4fceb68d..6b1fe6e2 100644 --- a/src/csl/rendering/mod.rs +++ b/src/csl/rendering/mod.rs @@ -41,17 +41,41 @@ impl RenderCsl for citationberg::Text { let cidx = ctx.push_case(self.text_case); match &self.target { - TextTarget::Variable { var, form } => ctx.push_chunked( - match var { - Variable::Standard(var) => ctx.resolve_standard_variable(*form, *var), - Variable::Number(var) => ctx - .resolve_number_variable(*var) - .map(|t| Cow::Owned(t.to_chunked_string())), - _ => None, + TextTarget::Variable { var: Variable::Standard(var), form } => { + if let Some(val) = ctx.resolve_standard_variable(*form, *var) { + match var { + StandardVariable::URL => { + let str = val.to_string(); + ctx.push_link(&val, str); + } + StandardVariable::DOI => { + let url = format!("https://doi.org/{}", val.to_str()); + ctx.push_link(&val, url); + } + StandardVariable::PMID => { + let url = format!( + "https://www.ncbi.nlm.nih.gov/pubmed/{}", + val.to_str() + ); + ctx.push_link(&val, url); + } + StandardVariable::PMCID => { + let url = format!( + "https://www.ncbi.nlm.nih.gov/pmc/articles/{}", + val.to_str() + ); + ctx.push_link(&val, url); + } + _ => ctx.push_chunked(&val), + } } - .unwrap_or_default() - .as_ref(), - ), + } + TextTarget::Variable { var: Variable::Number(var), .. } => { + if let Some(n) = ctx.resolve_number_variable(*var) { + ctx.push_str(&n.to_str()) + } + } + TextTarget::Variable { .. } => {} TextTarget::Macro { name } => { let len = ctx.writing.len(); let mac = ctx.style.get_macro(name); @@ -98,6 +122,7 @@ impl RenderCsl for citationberg::Number { let depth = ctx.push_elem(self.formatting); let affix_loc = ctx.apply_prefix(&self.affixes); let cidx = ctx.push_case(self.text_case); + let gender = ctx.gender(self.variable.into()); match value { Some(MaybeTyped::Typed(num)) if num.will_transform() => { @@ -129,7 +154,9 @@ impl RenderCsl for citationberg::Number { }; if normal_num { - num.as_ref().with_form(ctx, self.form, ctx.ordinal_lookup()).unwrap(); + num.as_ref() + .with_form(ctx, self.form, gender, ctx.ordinal_lookup()) + .unwrap(); } } Some(MaybeTyped::Typed(num)) => write!(ctx, "{}", num).unwrap(), @@ -348,11 +375,16 @@ fn render_date_part( }) .unwrap_or_default() => { + let gender = date + .month + .and_then(OtherTerm::month) + .and_then(|m| ctx.gender(m.into())); + write!( ctx, "{}{}", val, - ctx.ordinal_lookup().lookup(val).unwrap_or_default() + ctx.ordinal_lookup().lookup(val, gender).unwrap_or_default() ) .unwrap(); } diff --git a/src/lang/mod.rs b/src/lang/mod.rs index 5012db33..0bdd1973 100644 --- a/src/lang/mod.rs +++ b/src/lang/mod.rs @@ -5,7 +5,7 @@ pub(crate) mod name; use std::{fmt::Write, mem}; -use crate::types::{ChunkKind, StringChunk}; +use crate::types::{FoldableKind, FoldableStringChunk}; /// Rules for the title case transformation. #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -375,17 +375,20 @@ impl CaseFolder { } /// Add a string chunk to the buffer. - pub fn push_chunk(&mut self, chunk: &StringChunk) { + pub fn push_verbatim(&mut self, value: &str) { + let conf = mem::replace(&mut self.case, Case::NoTransform); + self.last_reconfig = self.buf.len(); + self.push_str(&value); + self.last_reconfig = self.buf.len(); + self.case = conf; + } + /// Add a string chunk to the buffer. + pub fn push_chunk(&mut self, chunk: &FoldableStringChunk) { match chunk.kind { - ChunkKind::Verbatim => { - let conf = mem::replace(&mut self.case, Case::NoTransform); - self.last_reconfig = self.buf.len(); - self.push_str(&chunk.value); - self.last_reconfig = self.buf.len(); - self.case = conf; + FoldableKind::Verbatim => { + self.push_verbatim(&chunk.value); } - ChunkKind::Math => todo!(), - ChunkKind::Normal => self.push_str(&chunk.value), + FoldableKind::Normal => self.push_str(&chunk.value), } } diff --git a/src/types/numeric.rs b/src/types/numeric.rs index f75a3b07..21a94cca 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -4,7 +4,7 @@ use std::fmt::Write; use std::fmt::{self, Display}; use std::str::FromStr; -use citationberg::{NumberForm, OrdinalLookup}; +use citationberg::{GrammarGender, NumberForm, OrdinalLookup}; use serde::de::Visitor; use serde::{Deserialize, Deserializer, Serialize}; use thiserror::Error; @@ -159,6 +159,7 @@ impl Numeric { &self, buf: &mut T, form: NumberForm, + gender: Option, ords: OrdinalLookup<'_>, ) -> std::fmt::Result where @@ -167,11 +168,13 @@ impl Numeric { let format = |n: i32, buf: &mut T| -> std::fmt::Result { match form { NumberForm::Ordinal => { - write!(buf, "{}{}", n, ords.lookup(n).unwrap_or_default()) + write!(buf, "{}{}", n, ords.lookup(n, gender).unwrap_or_default()) } NumberForm::LongOrdinal => match ords.lookup_long(n) { Some(str) => buf.write_str(str), - None => write!(buf, "{}{}", n, ords.lookup(n).unwrap_or_default()), + None => { + write!(buf, "{}{}", n, ords.lookup(n, gender).unwrap_or_default()) + } }, NumberForm::Roman if n > 0 && n <= i16::MAX as i32 => { write!(buf, "{:x}", numerals::roman::Roman::from(n as i16)) diff --git a/src/types/strings.rs b/src/types/strings.rs index 30b4b4d6..b96a2a57 100644 --- a/src/types/strings.rs +++ b/src/types/strings.rs @@ -171,6 +171,24 @@ impl FromStr for FormatString { #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] pub struct ChunkedString(pub Vec); +/// A string whose elements can set whether they do case folding. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +pub struct FoldableChunkedString(pub Vec); + +impl TryFrom for FoldableChunkedString { + type Error = (); + + fn try_from(value: ChunkedString) -> Result { + Ok(Self(value.0.into_iter().map(TryInto::try_into).collect::>()?)) + } +} + +impl From for ChunkedString { + fn from(value: FoldableChunkedString) -> Self { + Self(value.0.into_iter().map(Into::into).collect()) + } +} + impl<'de> Deserialize<'de> for ChunkedString { fn deserialize(deserializer: D) -> Result where @@ -467,6 +485,29 @@ pub struct StringChunk { pub kind: ChunkKind, } +/// A chunk of a string. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FoldableStringChunk { + /// The string value. + pub value: String, + /// Whether the chunk is subject to case folding or contains math. + pub kind: FoldableKind, +} + +impl TryFrom for FoldableStringChunk { + type Error = (); + + fn try_from(value: StringChunk) -> Result { + Ok(Self { value: value.value, kind: value.kind.try_into()? }) + } +} + +impl From for StringChunk { + fn from(value: FoldableStringChunk) -> Self { + Self { value: value.value, kind: value.kind.into() } + } +} + impl StringChunk { /// Creates a new `StrChunk` from a string and a kind. pub fn new(value: impl Into, kind: ChunkKind) -> Self { @@ -552,3 +593,34 @@ pub enum ChunkKind { /// [Typst](https://typst.app/). Math, } + +/// The kind of a string chunk for use with the case folder. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash)] +pub enum FoldableKind { + /// Case-folding will be applied. + #[default] + Normal, + /// Case-folding will not be applied. + Verbatim, +} + +impl TryFrom for FoldableKind { + type Error = (); + + fn try_from(value: ChunkKind) -> Result { + match value { + ChunkKind::Normal => Ok(Self::Normal), + ChunkKind::Verbatim => Ok(Self::Verbatim), + ChunkKind::Math => Err(()), + } + } +} + +impl From for ChunkKind { + fn from(value: FoldableKind) -> Self { + match value { + FoldableKind::Normal => Self::Normal, + FoldableKind::Verbatim => Self::Verbatim, + } + } +}