Skip to content

Commit

Permalink
RST reader: flatten nested inlines, closes #4368 (#4554)
Browse files Browse the repository at this point in the history
nested inlines are not valid RST syntax, so we flatten them following
some readability criteria discussed in #4368.
  • Loading branch information
danse authored and jgm committed Apr 26, 2018
1 parent cfa4eee commit eef1c21
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 10 deletions.
78 changes: 75 additions & 3 deletions src/Text/Pandoc/Writers/RST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST ( writeRST ) where
module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
import Prelude
import Control.Monad.State.Strict
import Data.Char (isSpace, toLower)
Expand Down Expand Up @@ -377,8 +377,10 @@ blockListToRST :: PandocMonad m
blockListToRST = blockListToRST' False

transformInlines :: [Inline] -> [Inline]
transformInlines = stripLeadingTrailingSpace . insertBS
. filter hasContents . removeSpaceAfterDisplayMath
transformInlines = insertBS .
filter hasContents .
removeSpaceAfterDisplayMath .
concatMap (transformNested . flatten)
where -- empty inlines are not valid RST syntax
hasContents :: Inline -> Bool
hasContents (Str "") = False
Expand Down Expand Up @@ -412,6 +414,8 @@ transformInlines = stripLeadingTrailingSpace . insertBS
x : insertBS (y : zs)
insertBS (x:ys) = x : insertBS ys
insertBS [] = []
transformNested :: [Inline] -> [Inline]
transformNested = map (mapNested stripLeadingTrailingSpace)
surroundComplex :: Inline -> Inline -> Bool
surroundComplex (Str s@(_:_)) (Str s'@(_:_)) =
case (last s, head s') of
Expand Down Expand Up @@ -449,6 +453,74 @@ transformInlines = stripLeadingTrailingSpace . insertBS
isComplex (Span _ (x:_)) = isComplex x
isComplex _ = False

-- | Flattens nested inlines. Extracts nested inlines and goes through
-- them either collapsing them in the outer inline container or
-- pulling them out of it
flatten :: Inline -> [Inline]
flatten outer = combineAll $ dropInlineParent outer
where combineAll = foldl combine []

combine :: [Inline] -> Inline -> [Inline]
combine f i =
case (outer, i) of
-- quotes are not rendered using RST inlines, so we can keep
-- them and they will be readable and parsable
(Quoted _ _, _) -> keep f i
(_, Quoted _ _) -> keep f i
-- parent inlines would prevent links from being correctly
-- parsed, in this case we prioritise the content over the
-- style
(_, Link _ _ _) -> emerge f i
-- always give priority to strong text over emphasis
(Emph _, Strong _) -> emerge f i
-- drop all other nested styles
(_, _) -> collapse f i

emerge f i = f <> [i]
keep f i = appendToLast f [i]
collapse f i = appendToLast f $ dropInlineParent i

appendToLast :: [Inline] -> [Inline] -> [Inline]
appendToLast [] toAppend = [setInlineChildren outer toAppend]
appendToLast flattened toAppend
| isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend]
| otherwise = flattened <> [setInlineChildren outer toAppend]
where lastFlat = last flattened
appendTo o i = mapNested (<> i) o
isOuter i = emptyParent i == emptyParent outer
emptyParent i = setInlineChildren i []

mapNested :: ([Inline] -> [Inline]) -> Inline -> Inline
mapNested f i = setInlineChildren i (f (dropInlineParent i))

dropInlineParent :: Inline -> [Inline]
dropInlineParent (Link _ i _) = i
dropInlineParent (Emph i) = i
dropInlineParent (Strong i) = i
dropInlineParent (Strikeout i) = i
dropInlineParent (Superscript i) = i
dropInlineParent (Subscript i) = i
dropInlineParent (SmallCaps i) = i
dropInlineParent (Cite _ i) = i
dropInlineParent (Image _ i _) = i
dropInlineParent (Span _ i) = i
dropInlineParent (Quoted _ i) = i
dropInlineParent i = [i] -- not a parent, like Str or Space

setInlineChildren :: Inline -> [Inline] -> Inline
setInlineChildren (Link a _ t) i = Link a i t
setInlineChildren (Emph _) i = Emph i
setInlineChildren (Strong _) i = Strong i
setInlineChildren (Strikeout _) i = Strikeout i
setInlineChildren (Superscript _) i = Superscript i
setInlineChildren (Subscript _) i = Subscript i
setInlineChildren (SmallCaps _) i = SmallCaps i
setInlineChildren (Quoted q _) i = Quoted q i
setInlineChildren (Cite c _) i = Cite c i
setInlineChildren (Image a _ t) i = Image a i t
setInlineChildren (Span a _) i = Span a i
setInlineChildren leaf _ = leaf

inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc
inlineListToRST = writeInlines . walk transformInlines

Expand Down
24 changes: 24 additions & 0 deletions test/Tests/Writers/RST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ module Tests.Writers.RST (tests) where

import Prelude
import Test.Tasty
import Test.Tasty.HUnit
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
import Text.Pandoc.Writers.RST

infix 4 =:
(=:) :: (ToString a, ToPandoc a)
Expand Down Expand Up @@ -52,6 +54,17 @@ tests = [ testGroup "rubrics"
, ""
, " quoted"]
]
, testGroup "flatten"
[ testCase "emerges nested styles as expected" $
flatten (Emph [Str "1", Strong [Str "2"], Str "3"]) @?=
[Emph [Str "1"], Strong [Str "2"], Emph [Str "3"]]
, testCase "could introduce trailing spaces" $
flatten (Emph [Str "f", Space, Strong [Str "2"]]) @?=
[Emph [Str "f", Space], Strong [Str "2"]]
-- the test above is the reason why we call
-- stripLeadingTrailingSpace through transformNested after
-- flatten
]
, testGroup "inlines"
[ "are removed when empty" =: -- #4434
plain (strong (str "")) =?> ""
Expand All @@ -64,6 +77,17 @@ tests = [ testGroup "rubrics"
strong (space <> str "text" <> space <> space) =?> "**text**"
, "single space stripped" =:
strong space =?> ""
, "give priority to strong style over emphasis" =:
strong (emph (strong (str "s"))) =?> "**s**"
, "links are not elided by outer style" =:
strong (emph (link "loc" "" (str "text"))) =?>
"`text <loc>`__"
, "RST inlines cannot start nor end with spaces" =:
emph (str "f" <> space <> strong (str "d") <> space <> str "l") =?>
"*f*\\ **d**\\ *l*"
, "keeps quotes" =:
strong (str "f" <> doubleQuoted (str "d") <> str "l") =?>
"**f“d”l**"
]
, testGroup "headings"
[ "normal heading" =:
Expand Down
14 changes: 7 additions & 7 deletions test/writer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -615,21 +615,21 @@ This is *emphasized*, and so *is this*.

This is **strong**, and so **is this**.

An *`emphasized link </url>`__*.
An `emphasized link </url>`__.

***This is strong and em.***
**This is strong and em.**

So is ***this*** word.
So is **this** word.

***This is strong and em.***
**This is strong and em.**

So is ***this*** word.
So is **this** word.

This is code: ``>``, ``$``, ``\``, ``\$``, ``<html>``.

[STRIKEOUT:This is *strikeout*.]
[STRIKEOUT:This is strikeout.]

Superscripts: a\ :sup:`bc`\ d a\ :sup:`*hello*` a\ :sup:`hello there`.
Superscripts: a\ :sup:`bc`\ d a\ :sup:`hello` a\ :sup:`hello there`.

Subscripts: H\ :sub:`2`\ O, H\ :sub:`23`\ O, H\ :sub:`many of them`\ O.

Expand Down

0 comments on commit eef1c21

Please sign in to comment.