From 217de7ce651b202453e7a895ebb3d715159ac324 Mon Sep 17 00:00:00 2001 From: danse Date: Wed, 14 Feb 2018 19:20:44 +0100 Subject: [PATCH] remove multiple inline styles when writing RST, closes #4368 --- src/Text/Pandoc/Writers/RST.hs | 137 +++++++++++++++++++++++++++++++-- test/Tests/Writers/RST.hs | 29 +++++++ test/writer.rst | 18 ++--- 3 files changed, 170 insertions(+), 14 deletions(-) diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 95cb466435a72..07edc27b8c624 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: -} -module Text.Pandoc.Writers.RST ( writeRST ) where +module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Control.Monad.State.Strict import Data.Char (isSpace, toLower) import Data.List (isPrefixOf, stripPrefix) @@ -376,12 +376,139 @@ blockListToRST :: PandocMonad m -> RST m Doc blockListToRST = blockListToRST' False +maybeGetInlines :: Inline -> Maybe [Inline] +maybeGetInlines (Link _ i _) = Just i +maybeGetInlines (Emph i) = Just i +maybeGetInlines (Strong i) = Just i +maybeGetInlines (Strikeout i) = Just i +maybeGetInlines (Superscript i) = Just i +maybeGetInlines (Subscript i) = Just i +maybeGetInlines (SmallCaps i) = Just i +maybeGetInlines (Quoted _ i) = Just i +maybeGetInlines (Cite _ i) = Just i +maybeGetInlines (Image _ i _) = Just i +maybeGetInlines (Span _ i) = Just i +maybeGetInlines _ = Nothing + +getInlines :: Inline -> [Inline] +getInlines = maybe [] id . maybeGetInlines + +setInlines :: [Inline] -> Inline -> Inline +setInlines i (Link a _ t) = Link a i t +setInlines i (Emph _) = Emph i +setInlines i (Strong _) = Strong i +setInlines i (Strikeout _) = Strikeout i +setInlines i (Superscript _) = Superscript i +setInlines i (Subscript _) = Subscript i +setInlines i (SmallCaps _) = SmallCaps i +setInlines i (Quoted q _) = Quoted q i +setInlines i (Cite c _) = Cite c i +setInlines i (Image a _ t) = Image a i t +setInlines i (Span a _) = Span a i +setInlines _ leaf = leaf + +data Flattening = Flattening { + stOuter :: Inline, + stReady :: [Inline] + } + +type Fla = State Flattening + +applyToOuter :: (Inline -> Inline) -> Fla () +applyToOuter f = do + outer <- gets stOuter + modify $ \st -> st{ stOuter = f outer } + +clearStOuter :: Fla () +clearStOuter = applyToOuter (setInlines []) + +addReady :: Inline -> Fla () +addReady i = do + ready <- gets stReady + modify $ \st -> st{ stReady = ready <> [i] } + +-- | consider contents into `outer` ready and prepare `outer` for new contents +flushOuter :: Fla () +flushOuter = do + outer <- gets stOuter + when (hasContents outer) $ addReady outer + clearStOuter + where hasContents :: Inline -> Bool + -- decide whether we want to render this inline or not. images + -- and links are the only inline containers we want to render + -- even when they don't contain any inline, because they carry + -- some content in the target + hasContents (Image _ _ _) = True + hasContents (Link _ _ _) = True + -- we render non-empty inline containers and all inlines that + -- are not containers + hasContents i = maybe True (not . null) $ maybeGetInlines i + +-- | add contents from `inner` into `outer` +collapse :: Inline -> Fla () +collapse inner = applyToOuter (addContentsTo inner) + where -- | `addContentsTo first second` will move the contents from + -- `second` into `first` stripping some inline style and + -- leaving other inline elements unchanged + addContentsTo :: Inline -> Inline -> Inline + addContentsTo i1 i2 = setInlines (getInlines i2 <> dropStyle i1) i2 + dropStyle :: Inline -> [Inline] + dropStyle (Strong i) = i + dropStyle (Emph i) = i + -- other inline constructors correspond to inlines that don't + -- look ugly or confusing to the user when rendered, for + -- example Quoted. even if an RST parser wouldn't recognise + -- them, it seems a good idea to keep them at the moment. more + -- constructors can be matched here in case we will want to + -- drop those as well eventually + dropStyle i = [i] + +-- | flush `outer` and `inner` +emerge :: Inline -> Fla () +emerge inner = do + flushOuter + addReady inner + +combine :: Inline -> Fla () +combine i = do + outer <- gets stOuter -- this inline has been cleared but it does not matter + case (outer, i) of + (Link _ _ _, _) -> collapse i -- preserve outer links + (_, Link _ _ _) -> emerge i -- preserve inner links + (Strong _, Emph _) -> collapse i -- ignore inner emphs + (Emph _, Strong _) -> emerge i -- pull inner strongs out of an emph + (_, _) -> collapse i -- by default strip inner markup + +-- | collect a list of inlines combining a containing one with the contents +collectReady :: Fla [Inline] +collectReady = do + outer <- gets stOuter + -- remove contained inlines, they will be either collapsed inside + -- again or pulled out of the nesting + clearStOuter + -- combine will not change the type of the outer, it will just + -- affect its contents + mapM combine (emergeCollapse $ getInlines outer) + -- after we went through all contents we could have some contents in + -- the outer inline, in that case we flush it to the list of ready + -- ones + flushOuter + -- here are our collapsed or emerged inlines + gets stReady + +flatten :: Inline -> [Inline] +flatten o = fst $ runState collectReady (ini o) + where ini o' = Flattening { stOuter = o', stReady = [] } -- initial state + +-- | emerge some inlines out of the containing one while removing others +emergeCollapse :: [Inline] -> [Inline] +emergeCollapse = concatMap flatten + -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc -inlineListToRST lst = - mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= - return . hcat - where -- remove spaces after displaymath, as they screw up indentation: +inlineListToRST lst = mapM inlineToRST (prepareInlines lst) >>= return . hcat + where prepareInlines = removeSpaceAfterDisplayMath . insertBS . emergeCollapse + -- remove spaces after displaymath, as they screw up indentation: removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = Math DisplayMath x : dropWhile (==Space) zs removeSpaceAfterDisplayMath (x:xs) = x : removeSpaceAfterDisplayMath xs diff --git a/test/Tests/Writers/RST.hs b/test/Tests/Writers/RST.hs index 4c0a926bb5563..982e9e3817cc2 100644 --- a/test/Tests/Writers/RST.hs +++ b/test/Tests/Writers/RST.hs @@ -2,10 +2,12 @@ module Tests.Writers.RST (tests) where import Test.Tasty +import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder +import Text.Pandoc.Writers.RST infix 4 =: (=:) :: (ToString a, ToPandoc a) @@ -50,6 +52,33 @@ tests = [ testGroup "rubrics" , "" , " quoted"] ] + , testGroup "inlines" + [ "gives priority to strong style over emphasis" =: -- issue #4368 + emph (str "first" <> strong (str "second")) =?> "*first*\\ **second**" + ] + , testGroup "flatten" + [ testCase "drops all inner styles" $ + flatten (Strong [Emph [Strong [Str "s"]]]) @?= + [Strong [Str "s"]] + , testCase "drops outer styles that contain a link" $ + flatten (Strong [Emph [Link ("", [], [])[Str "s"] ("", "")]]) @?= + [Link ("", [], []) [Str "s"] ("", "")] + , testCase "preserves strong text in an emphasis " $ + flatten (Emph [Str "f", Str "s", Strong [Str "d"], Str "l"]) @?= + [Emph [Str "f", Str "s"], Strong [Str "d"], Emph [Str "l"]] + , testCase "drops emphasis in a strong inline" $ + flatten (Strong [Str "f", Str "s", Emph [Str "d"], Str "l"]) @?= + [Strong [Str "f", Str "s", Str "d", Str "l"]] + , testCase "does not break links" $ + flatten (Link ("", [], [])[Str "f", Strong [Str "d"]] ("", "")) @?= + [Link ("", [], [])[Str "f", Str "d"] ("", "")] + , testCase "keeps inlines not matched by dropStyle" $ + flatten (Strong [Str "f", Subscript [Str "d"], Str "l"]) @?= + [Strong [Str "f", Subscript [Str "d"], Str "l"]] + , testCase "keeps an image even when it does not contain inlines" $ + flatten (Image ("",[],[]) [] ("image5.jpeg","")) @?= + [Image ("",[],[]) [] ("image5.jpeg","")] + ] , testGroup "headings" [ "normal heading" =: header 1 (text "foo") =?> diff --git a/test/writer.rst b/test/writer.rst index 93158f0c3292d..118e2e174bc55 100644 --- a/test/writer.rst +++ b/test/writer.rst @@ -615,21 +615,21 @@ This is *emphasized*, and so *is this*. This is **strong**, and so **is this**. -An *`emphasized link `__*. +An `emphasized link `__. -***This is strong and em.*** +**This is strong and em.** -So is ***this*** word. +So is **this** word. -***This is strong and em.*** +**This is strong and em.** -So is ***this*** word. +So is **this** word. This is code: ``>``, ``$``, ``\``, ``\$``, ````. -[STRIKEOUT:This is *strikeout*.] +[STRIKEOUT:This is strikeout.] -Superscripts: a\ :sup:`bc`\ d a\ :sup:`*hello*` a\ :sup:`hello there`. +Superscripts: a\ :sup:`bc`\ d a\ :sup:`hello` a\ :sup:`hello there`. Subscripts: H\ :sub:`2`\ O, H\ :sub:`23`\ O, H\ :sub:`many of them`\ O. @@ -649,8 +649,8 @@ Smart quotes, ellipses, dashes ‘He said, “I want to go.”’ Were you alive in the 70’s? -Here is some quoted ‘``code``’ and a “`quoted -link `__”. +Here is some quoted ‘``code``’ and a `quoted +link `__. Some dashes: one—two — three—four — five.