Skip to content

Commit

Permalink
remove multiple inline styles when writing RST, closes jgm#4368, closes
Browse files Browse the repository at this point in the history
  • Loading branch information
danse committed Mar 7, 2018
1 parent 3776404 commit 226e03f
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 14 deletions.
142 changes: 137 additions & 5 deletions src/Text/Pandoc/Writers/RST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to reStructuredText.
reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST ( writeRST ) where
module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
import Control.Monad.State.Strict
import Data.Char (isSpace, toLower)
import Data.List (isPrefixOf, stripPrefix)
Expand Down Expand Up @@ -376,12 +376,144 @@ blockListToRST :: PandocMonad m
-> RST m Doc
blockListToRST = blockListToRST' False

maybeGetInlines :: Inline -> Maybe [Inline]
maybeGetInlines (Link _ i _) = Just i
maybeGetInlines (Emph i) = Just i
maybeGetInlines (Strong i) = Just i
maybeGetInlines (Strikeout i) = Just i
maybeGetInlines (Superscript i) = Just i
maybeGetInlines (Subscript i) = Just i
maybeGetInlines (SmallCaps i) = Just i
maybeGetInlines (Quoted _ i) = Just i
maybeGetInlines (Cite _ i) = Just i
maybeGetInlines (Image _ i _) = Just i
maybeGetInlines (Span _ i) = Just i
maybeGetInlines _ = Nothing

getInlines :: Inline -> [Inline]
getInlines = maybe [] id . maybeGetInlines

setInlines :: [Inline] -> Inline -> Inline
setInlines i (Link a _ t) = Link a i t
setInlines i (Emph _) = Emph i
setInlines i (Strong _) = Strong i
setInlines i (Strikeout _) = Strikeout i
setInlines i (Superscript _) = Superscript i
setInlines i (Subscript _) = Subscript i
setInlines i (SmallCaps _) = SmallCaps i
setInlines i (Quoted q _) = Quoted q i
setInlines i (Cite c _) = Cite c i
setInlines i (Image a _ t) = Image a i t
setInlines i (Span a _) = Span a i
setInlines _ leaf = leaf

data Flattening = Flattening {
stOuter :: Inline,
stReady :: [Inline]
}

type Fla = State Flattening

applyToOuter :: (Inline -> Inline) -> Fla ()
applyToOuter f = do
outer <- gets stOuter
modify $ \st -> st{ stOuter = f outer }

clearStOuter :: Fla ()
clearStOuter = applyToOuter (setInlines [])

addReady :: Inline -> Fla ()
addReady i = do
ready <- gets stReady
modify $ \st -> st{ stReady = ready <> [i] }

-- | consider contents into `outer` ready and prepare `outer` for new contents
flushOuter :: Fla ()
flushOuter = do
outer <- gets stOuter
addReady outer
clearStOuter

-- | add contents from `inner` into `outer`
collapse :: Inline -> Fla ()
collapse inner = applyToOuter (addContentsTo inner)
where -- | `addContentsTo first second` will move the contents from
-- `second` into `first` stripping some inline style and
-- leaving other inline elements unchanged
addContentsTo :: Inline -> Inline -> Inline
addContentsTo i1 i2 = setInlines (getInlines i2 <> dropStyle i1) i2
dropStyle :: Inline -> [Inline]
dropStyle (Strong i) = i
dropStyle (Emph i) = i
-- other inline constructors correspond to inlines that don't
-- look ugly or confusing to the user when rendered, for
-- example Quoted. even if an RST parser wouldn't recognise
-- them, it seems a good idea to keep them at the moment. more
-- constructors can be matched here in case we will want to
-- drop those as well eventually
dropStyle i = [i]

-- | flush `outer` and `inner`
emerge :: Inline -> Fla ()
emerge inner = do
flushOuter
addReady inner

combine :: Inline -> Fla ()
combine i = do
outer <- gets stOuter -- this inline has been cleared but it does not matter
case (outer, i) of
(Link _ _ _, _) -> collapse i -- preserve outer links
(_, Link _ _ _) -> emerge i -- preserve inner links
(Strong _, Emph _) -> collapse i -- ignore inner emphs
(Emph _, Strong _) -> emerge i -- pull inner strongs out of an emph
(_, _) -> collapse i -- by default strip inner markup

-- | collect a list of inlines combining a containing one with the contents
collectReady :: Fla [Inline]
collectReady = do
outer <- gets stOuter
-- remove contained inlines, they will be either collapsed inside
-- again or pulled out of the nesting
clearStOuter
-- combine will not change the type of the outer, it will just
-- affect its contents
mapM combine (emergeCollapse $ getInlines outer)
-- after we went through all contents we could have some contents in
-- the outer inline, in that case we flush it to the list of ready
-- ones
flushOuter
-- here are our collapsed or emerged inlines
gets stReady

flatten :: Inline -> [Inline]
flatten o = fst $ runState collectReady (ini o)
where ini o' = Flattening { stOuter = o', stReady = [] } -- initial state

-- | emerge some inlines out of the containing one while removing others
emergeCollapse :: [Inline] -> [Inline]
emergeCollapse = concatMap flatten

-- | empty inline style like `****` would be an RST syntax error
dropEmpty :: [Inline] -> [Inline]
dropEmpty = filter hasContents
where hasContents :: Inline -> Bool
-- decide whether we want to render this inline or not. images
-- and links are the only inline containers we want to render
-- even when they don't contain any inline, because they carry
-- some content in the target
hasContents (Image _ _ _) = True
hasContents (Link _ _ _) = True
-- we render non-empty inline containers and all inlines that
-- are not containers
hasContents i = maybe True (not . emptyInlines) $ maybeGetInlines i
emptyInlines i = null i || all (== Str "") i

-- | Convert list of Pandoc inline elements to RST.
inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc
inlineListToRST lst =
mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>=
return . hcat
where -- remove spaces after displaymath, as they screw up indentation:
inlineListToRST lst = mapM inlineToRST (prepareInlines lst) >>= return . hcat
where prepareInlines = removeSpaceAfterDisplayMath . insertBS . dropEmpty . emergeCollapse
-- remove spaces after displaymath, as they screw up indentation:
removeSpaceAfterDisplayMath (Math DisplayMath x : zs) =
Math DisplayMath x : dropWhile (==Space) zs
removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs
Expand Down
31 changes: 31 additions & 0 deletions test/Tests/Writers/RST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
module Tests.Writers.RST (tests) where

import Test.Tasty
import Test.Tasty.HUnit
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
import Text.Pandoc.Writers.RST

infix 4 =:
(=:) :: (ToString a, ToPandoc a)
Expand Down Expand Up @@ -50,6 +52,35 @@ tests = [ testGroup "rubrics"
, ""
, " quoted"]
]
, testGroup "inlines"
[ "gives priority to strong style over emphasis" =: -- issue #4368
emph (str "first" <> strong (str "second")) =?> "*first*\\ **second**",
"filters out empty style inlines" =: -- #4434
strong (str "") =?> ""
]
, testGroup "flatten"
[ testCase "drops all inner styles" $
flatten (Strong [Emph [Strong [Str "s"]]]) @?=
[Strong [Str "s"]]
, testCase "drops outer styles that contain a link" $
flatten (Strong [Emph [Link ("", [], [])[Str "s"] ("", "")]]) @?=
[Strong [],Link ("",[],[]) [Str "s"] ("",""),Strong []]
, testCase "preserves strong text in an emphasis " $
flatten (Emph [Str "f", Str "s", Strong [Str "d"], Str "l"]) @?=
[Emph [Str "f", Str "s"], Strong [Str "d"], Emph [Str "l"]]
, testCase "drops emphasis in a strong inline" $
flatten (Strong [Str "f", Str "s", Emph [Str "d"], Str "l"]) @?=
[Strong [Str "f", Str "s", Str "d", Str "l"]]
, testCase "does not break links" $
flatten (Link ("", [], [])[Str "f", Strong [Str "d"]] ("", "")) @?=
[Link ("", [], [])[Str "f", Str "d"] ("", "")]
, testCase "keeps inlines not matched by dropStyle" $
flatten (Strong [Str "f", Subscript [Str "d"], Str "l"]) @?=
[Strong [Str "f", Subscript [Str "d"], Str "l"]]
, testCase "keeps an image even when it does not contain inlines" $
flatten (Image ("",[],[]) [] ("image5.jpeg","")) @?=
[Image ("",[],[]) [] ("image5.jpeg","")]
]
, testGroup "headings"
[ "normal heading" =:
header 1 (text "foo") =?>
Expand Down
18 changes: 9 additions & 9 deletions test/writer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -615,21 +615,21 @@ This is *emphasized*, and so *is this*.

This is **strong**, and so **is this**.

An *`emphasized link </url>`__*.
An `emphasized link </url>`__.

***This is strong and em.***
**This is strong and em.**

So is ***this*** word.
So is **this** word.

***This is strong and em.***
**This is strong and em.**

So is ***this*** word.
So is **this** word.

This is code: ``>``, ``$``, ``\``, ``\$``, ``<html>``.

[STRIKEOUT:This is *strikeout*.]
[STRIKEOUT:This is strikeout.]

Superscripts: a\ :sup:`bc`\ d a\ :sup:`*hello*` a\ :sup:`hello there`.
Superscripts: a\ :sup:`bc`\ d a\ :sup:`hello` a\ :sup:`hello there`.

Subscripts: H\ :sub:`2`\ O, H\ :sub:`23`\ O, H\ :sub:`many of them`\ O.

Expand All @@ -649,8 +649,8 @@ Smart quotes, ellipses, dashes

‘He said, “I want to go.”’ Were you alive in the 70’s?

Here is some quoted ‘``code``’ and a `quoted
link <http://example.com/?foo=1&bar=2>`__.
Here is some quoted ‘``code``’ and a `quoted
link <http://example.com/?foo=1&bar=2>`__.

Some dashes: one—two — three—four — five.

Expand Down

0 comments on commit 226e03f

Please sign in to comment.