From cc1ec064781021cc54168e9765d8b956b5ad2732 Mon Sep 17 00:00:00 2001 From: Omikhleia Date: Wed, 11 Dec 2024 16:30:34 +0100 Subject: [PATCH] feat: Add basic support for bibliographic citations Based on SILE 0.15.7 new CSL support. --- README.md | 1 + examples/extra-styles.dj | 9 ++ examples/markdown-djot.bib | 23 ++++ examples/sile-and-djot.dj | 32 ++++-- examples/sile-and-markdown-manual-styles.yml | 114 +++++++++++++------ examples/sile-and-markdown.md | 21 +++- inputters/djot.lua | 5 + inputters/markdown.lua | 13 +++ inputters/pandocast.lua | 30 ++++- lua-libraries/djot/ast.lua | 4 + lua-libraries/djot/inline.lua | 12 ++ packages/markdown/commands.lua | 8 ++ packages/markdown/utils.lua | 33 +++++- 13 files changed, 258 insertions(+), 47 deletions(-) create mode 100644 examples/markdown-djot.bib diff --git a/README.md b/README.md index eff5bc4..d0a8b5c 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,7 @@ This is but an overview. For more details, please refer to the provided example - Raw inline HTML convenience subset in Markdown - Advanced use of symbols in Djot (variable substitution ,and templating) - Advanced configuration (e.g. Markdown variants, headings shifting, etc.) +- Bibliography citations (when appropriate support is enabled) ## Use with the resilient collection diff --git a/examples/extra-styles.dj b/examples/extra-styles.dj index 5d2885a..b723d8f 100644 --- a/examples/extra-styles.dj +++ b/examples/extra-styles.dj @@ -57,3 +57,12 @@ class:registerCommand("Initial", function (_, content) SILE.call("dropcap", { lines = lines, family = family, color = color, join = true }, { letter }) end) ``` + +```=sile +% Chicken and egg problem: resilient.sile will support loading bibliographies +% in master documents, but relies on markdown.sile as dependency for references +% in content files. +% So this is a workaround to load the bibliography here, be able to release a +% new version of markdown.sile and move forward with the development. +\loadbibliography[file=examples/markdown-djot.bib] +``` diff --git a/examples/markdown-djot.bib b/examples/markdown-djot.bib new file mode 100644 index 0000000..4fd3bf5 --- /dev/null +++ b/examples/markdown-djot.bib @@ -0,0 +1,23 @@ +@book{sile, + title = {The SILE Book}, + author = {Cozens, Simon and Maclennan, Caleb and Nicole, Olivier and Willis, Didier}, + year = {2014–202X}, +} + +@book{sile:willis2021, + title = {The re·sil·ient collection of classes & packages for SILE: User Guide}, + author = {Willis, Didier}, + year = {2021–}, +} + +@book{sile:willis2022, + title = {Markdown and Djot to PDF with SILE: Ultimate Guide & Showcase Document}, + author = {Willis, Didier}, + year = {2022–}, +} + +@book{sile:willis2024, + title = {SILE and the Hydra of Maths. Showcase & Critical Assessment 2024}, + author = {Willis, Didier}, + year = {2024}, +} diff --git a/examples/sile-and-djot.dj b/examples/sile-and-djot.dj index 871491b..790f929 100644 --- a/examples/sile-and-djot.dj +++ b/examples/sile-and-djot.dj @@ -501,7 +501,7 @@ $$`\pi=\sum_{k=0}^\infty\frac{1}{16^k}(\frac{4}{8k+1} − \frac{2}{8k+4} − \fr Attributes are passed through to SILE. In display math mode, you can number equations using the default counter (`numbered=true`), a named counter (`counter=name`), or a custom value (`number=...`). -$$`e^{i\phi} = \mi{cos} \phi + i \mi{sin} \phi`{numbered=true} +$$`e^{i\phi} = \cos \phi + i \sin \phi`{numbered=true} ### Footnote calls @@ -517,6 +517,25 @@ A footnote call[^djot-some-fn] is marked... [^djot-some-fn]: An example footnote. +### Bibliographic citations + +Djot does not officially support citations at the moment. +This implementation, however, recognizes a simplified subset of the Pandoc citation syntax. + +{custom-style=CodeBlock} +::: +``` +I wrote a whole book on mathematics with SILE [@sile:willis2024]. +... in a "master document" [@sile:willis2021, part 1]. +... _The SILE Book_ for more information [@sile, chapter 5]. +``` +::: + +I wrote whole book on mathematics with SILE [@sile:willis2024]. + +How to load bibliographies and configure citation styles is beyond the scope of this manual. +If you are using the resilient collection, the most natural way to do this in a "master document" [@sile:willis2021, part 1]. +Note that support for biblibraphies in SILE is an experimental feature, so you may also want to check _The SILE Book_ for more information [@sile, chapter 5]. {#djot-symbols} ### Symbols @@ -809,8 +828,8 @@ Any further blocks included in the item are assumed to be the definition. Block attributes are passed to the underlying definition environment. When not using a supporting class or package, the converter uses its own fallback method, with hard-coded styling (e.g. the term is typeset in boldface). -When using the *resilient* classes, the converter uses the *resilient.defn* environment, and the `variant` option may thus be used to switch to an alternate style.[^djot-defn-variant] -For the purpose of illustration, let's say you have a "Custom" variant with an _ad hoc_ style. +When using the *resilient* classes, the *resilient.defn* environment is loaded, and the `variant` option may thus be used to switch to an alternate style.[^djot-defn-variant] +As an example, let's say you have a "Custom" variant with an _ad hoc_ style. {custom-style=CodeBlock} ::: @@ -996,7 +1015,7 @@ The `render` attribute can be set to `false` to prevent this behavior, and enfor For instance, the image below is produced with `{.dot width="3.5cm" layout=twopi}`. ::: - {width="3.5cm" layout=twopi} + {width="3cm" layout=twopi} ```dot graph { node [fillcolor="lightskyblue:darkcyan" style=filled gradientangle=270] @@ -1009,7 +1028,7 @@ The `render` attribute can be set to `false` to prevent this behavior, and enfor ::: ^ An example of a rendered DOT graph. - The original DOT description used in the code block is shown hereafter. + The original DOT description is shown hereafter. {custom-style=CodeBlock} ::: @@ -1239,7 +1258,7 @@ Language changes within the text are supported, on either blocks or inline elements. It relies on the `lang` key-value attribute, where the value is a BCP47 language code. It is not much visible below, obviously, but the language setting affects the hyphenation and other properties. -In the case of French, for instance, you can see the special thin space before the exclamation point, the use of appropriate quotation marks, and the internal spacing around quoted text: +In the case of French, for instance, you can see the thin space before the exclamation point, the use of appropriate quotation marks, and the internal spacing around quoted text: {lang=fr} > Cette citation est en français! @@ -1331,7 +1350,6 @@ Since it's possible to have unused footnote definitions, let's craft one as show When encountering a symbol, this converter looks for such a footnote and expands its content. It works with inline elements as shown above, but also with full blocks, provided the symbol is the only element in a paragraph of its own. - Of course, these pseudo-footnotes[^djot-pseudo-footnotes] can in turn contain symbols, which get replaced too. diff --git a/examples/sile-and-markdown-manual-styles.yml b/examples/sile-and-markdown-manual-styles.yml index c0e0734..855e651 100644 --- a/examples/sile-and-markdown-manual-styles.yml +++ b/examples/sile-and-markdown-manual-styles.yml @@ -1,25 +1,4 @@ -defn-term-Custom: - inherit: "defn-base" - origin: "resilient.defn" - style: - font: - features: "+smcp" - paragraph: - after: - vbreak: false - before: - skip: "smallskip" - -defn-desc-Custom: - inherit: "defn-desc" - style: - paragraph: - after: - skip: "smallskip" - before: - vbreak: false - CodeBlock: origin: "resilient.book" style: @@ -44,22 +23,12 @@ CoverCredit: CustomDroppedInitial: origin: "resilient.book" style: + color: "#66a0b3" font: family: "Zallman Caps" - color: "#66a0b3" special: lines: 3 -FramedPara: - origin: "resilient.book" - style: - paragraph: - after: - skip: "medskip" - align: "fancy-framed" - before: - skip: "medskip" - Difference: style: font: @@ -71,6 +40,16 @@ Difference: before: skip: "smallskip" +FramedPara: + origin: "resilient.book" + style: + paragraph: + after: + skip: "medskip" + align: "fancy-framed" + before: + skip: "medskip" + blockquote: origin: "resilient.book" style: @@ -104,6 +83,46 @@ bookmatter-copyright: paragraph: align: "noparindent" +bookmatter-cover-author: + inherit: "bookmatter-coverpage" + origin: "resilient.bookmatters" + style: + font: + size: "16pt" + properties: + case: "upper" + +bookmatter-cover-publisher: + inherit: "bookmatter-coverpage" + origin: "resilient.bookmatters" + style: + +bookmatter-cover-subtitle: + inherit: "bookmatter-coverpage" + origin: "resilient.bookmatters" + style: + font: + size: "16pt" + paragraph: + after: + skip: "5%fh" + +bookmatter-cover-title: + inherit: "bookmatter-coverpage" + origin: "resilient.bookmatters" + style: + font: + size: "20pt" + paragraph: + after: + skip: "5%fh" + before: + skip: "30%fh" + +bookmatter-coverpage: + origin: "resilient.bookmatters" + style: + bookmatter-halftitle: origin: "resilient.bookmatters" style: @@ -179,6 +198,15 @@ defn-desc: before: vbreak: false +defn-desc-Custom: + inherit: "defn-desc" + style: + paragraph: + after: + skip: "smallskip" + before: + vbreak: false + defn-term: inherit: "defn-base" origin: "resilient.defn" @@ -191,6 +219,18 @@ defn-term: before: skip: "smallskip" +defn-term-Custom: + inherit: "defn-base" + origin: "resilient.defn" + style: + font: + features: "+smcp" + paragraph: + after: + vbreak: false + before: + skip: "smallskip" + dropcap: origin: "resilient.book" style: @@ -239,6 +279,16 @@ epigraph-text: paragraph: align: "justify" +eqno: + origin: "resilient.book" + style: + numbering: + after: + text: ")" + before: + text: "(" + display: "arabic" + fancytoc-base: style: diff --git a/examples/sile-and-markdown.md b/examples/sile-and-markdown.md index d8286f2..ef95979 100644 --- a/examples/sile-and-markdown.md +++ b/examples/sile-and-markdown.md @@ -507,6 +507,24 @@ They are introduced with directly with the caret `^`, immediately followed by th ::: +### Bibliographic citations + +This implementation recognizes a simplified subset of the Pandoc citation syntax, aligned with what we also support in Djot. + +::: {custom-style=CodeBlock} +``` +I wrote a whole book on mathematics with SILE [@sile:willis2024]. +... in a "master document" [@sile:willis2021, part 1]. +... _The SILE Book_ for more information [@sile, chapter 5]. +``` +::: + +I wrote whole book on mathematics with SILE [@sile:willis2024]. + +How to load bibliographies and configure citation styles is beyond the scope of this manual. +If you are using the resilient collection, the most natural way to do this in a "master document" [@sile:willis2021, part 1]. +Note that support for biblibraphies in SILE is an experimental feature, so you may also want to check _The SILE Book_ for more information [@sile, chapter 5]. + ### Symbols ::: {custom-style=Difference} @@ -1054,7 +1072,8 @@ This converter recognizes a few specific attributes on divs: ### Horizontal dividers In standard Markdown, a line containing a row of three or more asterisks, dashes, or underscores (optionally separated by spaces) are supposed to produce a horizontal rule. -This converter however slightly deviates from that simple specification^[And also from Pandoc, therefore. Quite obviously, the `\autodoc:package{pandocast}`{=sile} package will also only show horizontal rules.], +This converter however slightly deviates from that simple specification^[And also from Pandoc. +Our `\autodoc:package{pandocast}`{=sile} package will only show horizontal rules.], for the mere reason that such a horizontal rule is seldom typographically sound in many contexts. Three asterisks produce a centered asterism. diff --git a/inputters/djot.lua b/inputters/djot.lua index 6fd3430..7a60f98 100644 --- a/inputters/djot.lua +++ b/inputters/djot.lua @@ -759,6 +759,11 @@ function Renderer:math (node) return createCommand("markdown:internal:math", options, { node.s }, node_pos(node)) end +function Renderer:naive_citations (node) + local pos = node_pos(node) + return utils.naiveCitations(node.s, pos) +end + -- SILE INPUTTER LOGIC local base = require("inputters.base") diff --git a/inputters/markdown.lua b/inputters/markdown.lua index 14b8dd9..e332521 100644 --- a/inputters/markdown.lua +++ b/inputters/markdown.lua @@ -267,6 +267,10 @@ local function SileAstWriter (writerOps, renderOps) return createCommand("markdown:internal:math" , { mode = mode }, { text }) end + writer.naive_citations = function (rawcites) + return utils.naiveCitations(rawcites) + end + -- Final AST conversion logic. -- The lunamark "AST" is made of "ropes": -- "A rope is an array whose elements may be ropes, strings, numbers, @@ -366,11 +370,15 @@ parsers.lineof = function (c) local function customSyntax (writer, options) return function (syntax) + -- Re-create the horizontal rule syntax to use our modified lineof + -- with a capture. syntax.HorizontalRule = (parsers.lineof(parsers.asterisk) + parsers.lineof(parsers.dash) + parsers.lineof(parsers.underscore) ) / writer.hrule + -- Extend the smart typography syntax to recognize primes and double primes + -- after digits. if options.smart_primes then syntax.Smart = lpeg.P("\"") * lpeg.B(parsers.digit*1) / function () return "″" -- double primes @@ -380,6 +388,10 @@ local function customSyntax (writer, options) end + syntax.Smart end + -- Override the citation syntax to use our own raw citations writer + syntax.Citations = lpeg.P("[") + * lpeg.C(lpeg.P("@") *(lpeg.P(1) - lpeg.P("]"))^1) + * lpeg.P("]") / writer.naive_citations return syntax end end @@ -430,6 +442,7 @@ function inputter:parse (doc) line_blocks = true, escaped_line_breaks = true, tex_math_dollars = true, + citations = true, } for k, v in pairs(self.options) do -- Allow overriding known options diff --git a/inputters/pandocast.lua b/inputters/pandocast.lua index 3f61535..3dde186 100644 --- a/inputters/pandocast.lua +++ b/inputters/pandocast.lua @@ -515,13 +515,33 @@ function Renderer:Quoted (quotetype, inlines) return createCommand("singlequoted", {}, content) end +local function _inlinesToStr (inlines) + local buffer = {} + for _, inline in ipairs(inlines) do + if inline.t == "Str" then + buffer[#buffer + 1] = inline.c + elseif inline.t == "Space" or inline.t == "SoftBreak" or inline.t == "LineBreak" then + buffer[#buffer + 1] = " " + else + SU.warn("Skipped inline element in Cite: " .. inline.t) + end + end + return table.concat(buffer) +end + -- Cite [Citation] [Inline] --- Where a Citation is a dictionary +-- Where a Citation is a dictionary: +-- { citationId: Text, citationPrefix: [Inline], citationSuffix: [Inline], +-- citationMode: CitationMode, citationNoteNum: Int, citationHash: Int } +-- and CitationMode is a tag AuthorInText, SuppressAuthor or NormalCitation. +-- We do not use the parse Citation, but rather the inlines directly, +-- re-serializing them to string, so we can be aligned with what we do in +-- the Markdown and Djot inputters. function Renderer:Cite (_, inlines) - -- TODO - -- We could possibly do better. - -- Just render the inlines and ignore the citations - return self:render(inlines) + local rawcites = _inlinesToStr(inlines) + -- Remove leading and trailing brackets (if any) + rawcites = rawcites:gsub("^%[", ""):gsub("%]$", "") + return utils.naiveCitations(rawcites) end -- Code Attr Text diff --git a/lua-libraries/djot/ast.lua b/lua-libraries/djot/ast.lua index d9d75d8..d034a2a 100644 --- a/lua-libraries/djot/ast.lua +++ b/lua-libraries/djot/ast.lua @@ -907,6 +907,10 @@ local function to_ast(parser, sourcepos) node.s = sub(subject, startpos + 1, endpos - 1) elseif tag == "footnote_reference" then node.s = sub(subject, startpos + 2, endpos - 1) + -- BEGIN EXTENSION DIDIER 20241214 + elseif tag == "naive_citations" then + node.s = sub(subject, startpos + 1, endpos - 1) + -- END EXTENSION DIDIER elseif tag == "symbol" then node.alias = sub(subject, startpos + 1, endpos - 1) elseif tag == "raw_format" then diff --git a/lua-libraries/djot/inline.lua b/lua-libraries/djot/inline.lua index 8a3c2ad..f1d8c97 100644 --- a/lua-libraries/djot/inline.lua +++ b/lua-libraries/djot/inline.lua @@ -269,6 +269,18 @@ InlineParser.matchers = { self:add_match(pos, ep, "footnote_reference") return ep + 1 else + -- BEGIN EXTENSION DIDIER 20241214 + -- [@...] is interpreted as a citation reference + -- In-text citations and author-suppressed citations are not well-defined + -- in CSL as far as I can tell, so we'll pass on those for now. + -- So this is for "normal" citations, only, and the reference parsing is + -- left to the renderer. + sp, ep = bounded_find(self.subject, "^%@([^]]+)%]", pos + 1, endpos) + if sp then -- citation ref + self:add_match(pos, ep, "naive_citations") + return ep + 1 + end + -- END EXTENSION DIDIER 20241214 self:add_opener("[", pos, pos) self:add_match(pos, pos, "str") return pos + 1 diff --git a/packages/markdown/commands.lua b/packages/markdown/commands.lua index 6deb9cd..76441cf 100644 --- a/packages/markdown/commands.lua +++ b/packages/markdown/commands.lua @@ -139,6 +139,8 @@ function package:_init (_) -- Only load low-level packages (= utilities) -- The class should be responsible for loading the appropriate higher-level -- constructs, see fallback commands further below for more details. + self:loadPackage("bibtex") + SILE.settings:set("bibtex.style", "csl") -- The future is CSL self:loadPackage("color") self:loadPackage("embedders") self:loadPackage("image") @@ -855,6 +857,12 @@ Please consider using a resilient-compatible class!]]) end end, "Symbol in Djot (internal)") + self:registerCommand("markdown:internal:citations", function (_, content) + -- We cannot handle multiple citations yet in a single call. + -- See https://github.com/sile-typesetter/sile/issues/2196 + SILE.process(content) + end, "Citations (internal)") + -- B. Fallback commands self:registerCommand("markdown:fallback:blockquote", function (_, content) diff --git a/packages/markdown/utils.lua b/packages/markdown/utils.lua index d824b27..4009a09 100644 --- a/packages/markdown/utils.lua +++ b/packages/markdown/utils.lua @@ -3,6 +3,7 @@ -- require("silex.ast") -- Compatibility layer local createCommand = SU.ast.createCommand +local createStructuredCommand = SU.ast.createStructuredCommand --- Some utility functions. -- @section utils @@ -95,8 +96,8 @@ local bsratiocache = {} --- Compute the baseline ratio for the current font. --- This is a ratio of the descender to the theoretical height of the font. ---- @return number Descender ratio -local computeBaselineRatio = function () +---@return number Descender ratio +local function computeBaselineRatio () local fontoptions = SILE.font.loadDefaults({}) local bsratio = bsratiocache[SILE.font._key(fontoptions)] if not bsratio then @@ -108,6 +109,33 @@ local computeBaselineRatio = function () return bsratio end +--- Naive citation reference parser. +--- We only support a very simple syntax for now: "@key[, ]+[locator]" +--- Where the unique locator consists of a name and a value separated by spaces. +---@param str string Citation string +---@param pos table Position object +---@return table AST command +local function naiveCitations (str, pos) + local refs = pl.stringx.split(str, ";") + pl.tablex.transform(function (ref) + local key, locator = ref:match("^[%s]*@([^%s,]+)[, ]*(.*)$") + if not key or key == "" then + SU.warn("Skipping citation reference '" .. ref .. "'") + return {} + end + if locator and locator ~= "" then + local locnname, locnvalue = locator:match("^([^%s]+)[%s]+(.+)$") + if locnname and locnvalue then + -- Remove trailing periods in locname if any + locnname = locnname:gsub("%.+$", "") + return createCommand("cite", { key = key, [locnname] = locnvalue }) + end + end + return createCommand("cite", { key = key }, nil, pos) + end, refs) + return createStructuredCommand("markdown:internal:citations", {}, refs, pos) +end + --- @export return { getFileExtension = getFileExtension, @@ -116,4 +144,5 @@ return { hasRawHandler = hasRawHandler, hasEmbedHandler = hasEmbedHandler, computeBaselineRatio = computeBaselineRatio, + naiveCitations = naiveCitations, }