pandoc · Delanii · Oct 13, 2020 · Oct 13, 2020 · Oct 13, 2020 · Nov 6, 2020
@@ -0,0 +1,33 @@
+# Non-breakable space filter
+
+This filter replaces regular spaces with non-breakable spaces according to 
+predefined conditions.
+
+Rules for space replacement are defined for two languages: English and Czech
+(default is English) in `prefixes` tables. Also, non-breakable spaces are
+inserted in front of dashes and in front of numbers. Rules for inserting 
+non-breakable spaces in English are not as firm as in authors native language 
+(Czech), but some typographic conventions suggest to insert non-breakable space
+after words: "I", "the", "The", "a", "A". Any suggestions regarding improvement 
+of English support in this filter are highly welcome.
+Some extra effort is taken in detecting these patterns in *not-fully* parsed
+strings (for example, if this filter is used after some macro replacing 
+filter).
+
+In this regard this filter functions similarly like TeX `vlna` preprocessor
+(only Czech) or LuaTeX `luavlna` package (international).
+
+The default settings can be changed easily by user customization in filter file
+`pandocVlna.lua` by changing contents of `prefixes` or `dashes` tables.
+
+Currently supported formats are:
+
+* LaTeX a ConTeXt
+* Open Office Document
+* MS Word
+* HTML
+
+For other formats filter defaults to insert escaped Unicode sequence `\u{a0}`.
+
+**NOTE**: Using this filter increases strain on line-breaking patterns. Whenever 
+possible, consider allowing hyphenation.
@@ -0,0 +1,12 @@
+<h1 id="tests">Tests</h1>
+<h2 id="basic-test">Basic test</h2>
+<p>a&nbsp;test i&nbsp;test k&nbsp;test o&nbsp;test s&nbsp;test u&nbsp;test v&nbsp;test z&nbsp;test A&nbsp;test I&nbsp;test K&nbsp;test O&nbsp;test S&nbsp;test U&nbsp;test V&nbsp;test Z&nbsp;test&nbsp;– test&nbsp;– test</p>
+<h2 id="test-with-numbers">Test with numbers</h2>
+<p>Test&nbsp;19 test “19” test</p>
+<h2 id="test-of-double-prefixes.">Test of double prefixes.</h2>
+<p>A&nbsp;i&nbsp;test, i&nbsp;v&nbsp;test, a&nbsp;k&nbsp;test, a&nbsp;v&nbsp;test.</p>
+<h2 id="test-of-block-code">Test of block code</h2>
+<pre><code>a = 5
+k = &quot;test&quot;</code></pre>
+<h2 id="test-of-inline-code">Test of inline code</h2>
+<p>Test <code>a = 5</code> test</p>
@@ -0,0 +1,12 @@
+<h1 id="tests">Tests</h1>
+<h2 id="basic-test">Basic test</h2>
+<p>a&nbsp;test i test A&nbsp;test I&nbsp;test the&nbsp;test The&nbsp;test&nbsp;– test&nbsp;– test</p>
+<h2 id="test-with-numbers">Test with numbers</h2>
+<p>Test&nbsp;19 test “19” test</p>
+<h2 id="test-of-double-prefixes.">Test of double prefixes.</h2>
+<p>A&nbsp;i test, i v test, a&nbsp;k test, a&nbsp;v test.</p>
+<h2 id="test-of-block-code">Test of block code</h2>
+<pre><code>a = 5
+k = &quot;test&quot;</code></pre>
+<h2 id="test-of-inline-code">Test of inline code</h2>
+<p>Test <code>a = 5</code> test</p>
@@ -0,0 +1,6 @@
+DIFF ?= diff --strip-trailing-cr -u
+
+test:
+ @pandoc --lua-filter=pandocVlna.lua sampleCZ.md | $(DIFF) expectedCZ.html -
+ @pandoc --lua-filter=pandocVlna.lua sampleEN.md | $(DIFF) expectedEN.html -
+.PHONY: test
@@ -0,0 +1,183 @@
+--[[
+pandocVlna.lua - Filter to automatically insert non-breakable spaces in specific
+locations in text.
+
+Currently supports czech and english languages, with default being set to
+english. PRs or suggestions leading to improvement of current features or
+to add supported for other languages is highly welcome.
+Inspired by simillar tools in TeX toolchain: `luavlna` and `vlna`.
+
+Author: Tomas Krulis (with substantial help from Albert Krewinkel)
+License: MIT - more details in LICENSE file in repository root directory
+--]]
+
+local utils = require 'pandoc.utils'
+local stringify = utils.stringify
+
+--[[
+Table of one-letter prefixes, after which should be inserted '\160'.
+Verbose, but can be changed per user requirements.
+--]]
+
+local prefixes = {}
+
+local prefixesEN = {
+ ['I'] = true,
+ ['a'] = true,
+ ['A'] = true,
+ ['the'] = true,
+ ['The'] = true
+}
+
+local prefixesCZ = {
+ ['a'] = true,
+ ['i'] = true,
+ ['k'] = true,
+ ['o'] = true,
+ ['s'] = true,
+ ['u'] = true,
+ ['v'] = true,
+ ['z'] = true,
+ ['A'] = true,
+ ['I'] = true,
+ ['K'] = true,
+ ['O'] = true,
+ ['S'] = true,
+ ['U'] = true,
+ ['V'] = true,
+ ['Z'] = true
+}
+
+-- Set `prefixes` according to `lang` metadata value
+function Meta(meta)
+ if meta.lang then
+ langSet = stringify(meta.lang)
+
+ if langSet == 'cs' then
+ prefixes = prefixesCZ
+ else
+ prefixes = prefixesEN --default to english prefixes
+ end
+
+ else
+ prefixes = prefixesEN --default to english prefixes
+ end
+
+end
+
+--[[
+Some languages (czech among them) require nonbreakable space *before* long dash
+--]]
+
+local dashes = {
+ ['--'] = true,
+ ['–'] = true
+}
+
+--[[
+Table of replacement elements
+--]]
+
+local nonbreakablespaces = {
+ html = '&nbsp;',
+ latex = '~',
+ context = '~'
+}
+
+--[[
+Function to determine Space element replacement for non-breakable space
+according to output format
+--]]
+
+function insert_nonbreakable_space(format)
+ if format == 'html' then
+ return pandoc.RawInline('html', nonbreakablespaces.html)
+ elseif format:match 'latex' then
+ return pandoc.RawInline('tex',nonbreakablespaces.latex)
+ elseif format:match 'context' then
+ return pandoc.RawInline('tex',nonbreakablespaces.latex)
+ else
+ -- fallback to inserting non-breakable space unicode symbol
+ -- pandoc.Str '\xc2\xa0' -- also works
+ return pandoc.Str '\u{a0}'
+ end
+end
+
+--[[
+Core filter function:
+
+* It iterates over all inline elements in block
+* If it finds Space element, uses previously defined functions to find
+`prefixes` or `dashes`
+* Replaces Space element with `Str '\u{a0}'`, which is non-breakable space
+representation
+* Returns modified list of inlines
+--]]
+
+function Inlines (inlines)
+
+ -- variable holding replacement value for the non-breakable space
+ local nbsp = insert_nonbreakable_space(FORMAT)
+
+ for i = 2, #inlines-1 do -- test from second position, to prevent error if
+ -- `Space` element would be first in `Inlines` block
+
+ --assign elements to variables for more readability
+ local currentEl = inlines[i]
+ local previousEl = inlines[i-1]
+ local nextEl = inlines[i+1]
+
+ if currentEl.t == 'Space'
+ or currentEl.t == 'SoftBreak' then
+
+ -- Check for one-letter prefixes in Str before Space
+
+ if previousEl.t == 'Str' and prefixes[previousEl.text] then
+ -- if elements in table (`prefixes`) are mapped to bolean values,
+ -- it is possible to test like `prefixes[argument]` instead of
+ -- `if prefixes[argument] == true`
+ inlines[i] = nbsp
+ end
+
+ -- Check for dashes in Str after Space
+
+ if nextEl.t == 'Str' and dashes[nextEl.text] then
+ inlines[i] = nbsp
+ end
+
+ -- Check for digit `Str` elements. Those elements might not be fully
+ -- parsed (in case there were other filters executed before this one),
+ -- so following regex checks for any characters or whitespace wrapping
+ -- around `Str` element containing digits
+
+ if nextEl.t == 'Str' and string.match(nextEl.text, '%.*%s*%d+%s*%.*') then
+ inlines[i] = nbsp
+ end
+
+ end
+
+ --[[
+ Check for Str containing sequence " prefix ", which might occur in case of
+ preceding filter creates it inside Str element.
+ --]]
+
+ if currentEl.t == 'Str' then
+ for prefix, _ in pairs(prefixes) do
+ if string.match(currentEl.text, '%.*%s+' .. prefix .. '%s+%.*') then
+ front, detection, replacement, back = string.match(currentEl.text,
+ '(%.*)(%s+' .. prefix .. ')(%s+)(%.*)')
+
+ inlines[i].text = front .. detection .. nbsp .. back
+ end
+ end
+ end
+
+ end
+ return inlines
+end
+
+-- This should change the order of running functions: Meta - Inlines - rest ...
+return {
+ {Meta = Meta},
+ {Inlines = Inlines},
+}
@@ -0,0 +1,29 @@
+---
+lang: cs
+---
+
+# Tests
+
+## Basic test
+
+a test i test k test o test s test u test v test z test A test I test K test O
+test S test U test V test Z test -- test – test
+
+## Test with numbers
+
+Test 19 test "19" test
+
+## Test of double prefixes.
+
+A i test, i v test, a k test, a v test.
+
+## Test of block code
+
+```
+a = 5
+k = "test"
+```
+
+## Test of inline code
+
+Test `a = 5` test
@@ -0,0 +1,24 @@
+# Tests
+
+## Basic test
+
+a test i test A test I test the test The test -- test – test
+
+## Test with numbers
+
+Test 19 test "19" test
+
+## Test of double prefixes.
+
+A i test, i v test, a k test, a v test.
+
+## Test of block code
+
+```
+a = 5
+k = "test"
+```
+
+## Test of inline code
+
+Test `a = 5` test