From 12ffe7848e7d95814be7613011b691bcdeff76b2 Mon Sep 17 00:00:00 2001 From: danse Date: Fri, 13 Apr 2018 10:52:41 +0200 Subject: [PATCH] RST reader: flatten nested inlines, closes #4368 nested inlines are not valid RST syntax, so we flatten them following some readability criteria discussed in #4368. --- src/Text/Pandoc/Writers/RST.hs | 72 ++++++++++++++++++++++++++++++++-- test/Tests/Writers/RST.hs | 24 ++++++++++++ test/writer.rst | 14 +++---- 3 files changed, 100 insertions(+), 10 deletions(-) diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 74fc4dca4090b..5ccfb5ccb3b5f 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: -} -module Text.Pandoc.Writers.RST ( writeRST ) where +module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Prelude import Control.Monad.State.Strict import Data.Char (isSpace, toLower) @@ -380,8 +380,10 @@ blockListToRST :: PandocMonad m blockListToRST = blockListToRST' False transformInlines :: [Inline] -> [Inline] -transformInlines = stripLeadingTrailingSpace . insertBS - . filter hasContents . removeSpaceAfterDisplayMath +transformInlines = insertBS . + filter hasContents . + removeSpaceAfterDisplayMath . + concatMap (transformNested . flatten) where -- empty inlines are not valid RST syntax hasContents :: Inline -> Bool hasContents (Str "") = False @@ -415,6 +417,8 @@ transformInlines = stripLeadingTrailingSpace . insertBS x : insertBS (y : zs) insertBS (x:ys) = x : insertBS ys insertBS [] = [] + transformNested :: [Inline] -> [Inline] + transformNested = map (mapNested stripLeadingTrailingSpace) surroundComplex :: Inline -> Inline -> Bool surroundComplex (Str s@(_:_)) (Str s'@(_:_)) = case (last s, head s') of @@ -452,6 +456,68 @@ transformInlines = stripLeadingTrailingSpace . insertBS isComplex (Span _ (x:_)) = isComplex x isComplex _ = False +-- | Flattens nested inlines. Extracts nested inlines and goes through +-- them either collapsing them in the outer inline container or +-- pulling them out of it +flatten :: Inline -> [Inline] +flatten outer = combineAll $ dropInlineParent outer + where combineAll = foldl combine [] + + combine :: [Inline] -> Inline -> [Inline] + combine f i = + case (outer, i) of + (Quoted _ _, Link _ _ _) -> keep f i -- quoted links still work + (_, Link _ _ _) -> emerge f i -- preserve inner links + (Strong _, Emph _) -> collapse f i -- ignore inner emphs + (Emph _, Strong _) -> emerge f i -- strong over emph + (_, Quoted _ _) -> keep f i -- inner quotes are readable + (_, _) -> collapse f i -- drop other nested styles + + emerge f i = f <> [i] + keep f i = appendToLast f [i] + collapse f i = appendToLast f $ dropInlineParent i + + appendToLast :: [Inline] -> [Inline] -> [Inline] + appendToLast [] toAppend = [setInlineChildren outer toAppend] + appendToLast flattened toAppend + | isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend] + | otherwise = flattened <> [setInlineChildren outer toAppend] + where lastFlat = last flattened + appendTo o i = mapNested (<> i) o + isOuter i = emptyParent i == emptyParent outer + emptyParent i = setInlineChildren i [] + +mapNested :: ([Inline] -> [Inline]) -> Inline -> Inline +mapNested f i = setInlineChildren i (f (dropInlineParent i)) + +dropInlineParent :: Inline -> [Inline] +dropInlineParent (Link _ i _) = i +dropInlineParent (Emph i) = i +dropInlineParent (Strong i) = i +dropInlineParent (Strikeout i) = i +dropInlineParent (Superscript i) = i +dropInlineParent (Subscript i) = i +dropInlineParent (SmallCaps i) = i +dropInlineParent (Cite _ i) = i +dropInlineParent (Image _ i _) = i +dropInlineParent (Span _ i) = i +dropInlineParent (Quoted _ i) = i +dropInlineParent i = [i] -- not a parent, like Str or Space + +setInlineChildren :: Inline -> [Inline] -> Inline +setInlineChildren (Link a _ t) i = Link a i t +setInlineChildren (Emph _) i = Emph i +setInlineChildren (Strong _) i = Strong i +setInlineChildren (Strikeout _) i = Strikeout i +setInlineChildren (Superscript _) i = Superscript i +setInlineChildren (Subscript _) i = Subscript i +setInlineChildren (SmallCaps _) i = SmallCaps i +setInlineChildren (Quoted q _) i = Quoted q i +setInlineChildren (Cite c _) i = Cite c i +setInlineChildren (Image a _ t) i = Image a i t +setInlineChildren (Span a _) i = Span a i +setInlineChildren leaf _ = leaf + inlineListToRST :: PandocMonad m => [Inline] -> RST m Doc inlineListToRST = writeInlines . walk transformInlines diff --git a/test/Tests/Writers/RST.hs b/test/Tests/Writers/RST.hs index 64367a108ed3a..08c7d75ee5c36 100644 --- a/test/Tests/Writers/RST.hs +++ b/test/Tests/Writers/RST.hs @@ -4,10 +4,12 @@ module Tests.Writers.RST (tests) where import Prelude import Test.Tasty +import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder +import Text.Pandoc.Writers.RST infix 4 =: (=:) :: (ToString a, ToPandoc a) @@ -52,6 +54,17 @@ tests = [ testGroup "rubrics" , "" , " quoted"] ] + , testGroup "flatten" + [ testCase "emerges nested styles as expected" $ + flatten (Emph [Str "1", Strong [Str "2"], Str "3"]) @?= + [Emph [Str "1"], Strong [Str "2"], Emph [Str "3"]] + , testCase "could introduce trailing spaces" $ + flatten (Emph [Str "f", Space, Strong [Str "2"]]) @?= + [Emph [Str "f", Space], Strong [Str "2"]] + -- the test above is the reason why we call + -- stripLeadingTrailingSpace through transformNested after + -- flatten + ] , testGroup "inlines" [ "are removed when empty" =: -- #4434 plain (strong (str "")) =?> "" @@ -64,6 +77,17 @@ tests = [ testGroup "rubrics" strong (space <> str "text" <> space <> space) =?> "**text**" , "single space stripped" =: strong space =?> "" + , "give priority to strong style over emphasis" =: + strong (emph (strong (str "s"))) =?> "**s**" + , "links are not elided by outer style" =: + strong (emph (link "loc" "" (str "text"))) =?> + "`text `__" + , "RST inlines cannot start nor end with spaces" =: + emph (str "f" <> space <> strong (str "d") <> space <> str "l") =?> + "*f*\\ **d**\\ *l*" + , "keeps nested inlines that are not confusing" =: + strong (str "f" <> doubleQuoted (str "d") <> str "l") =?> + "**f“d”l**" ] , testGroup "headings" [ "normal heading" =: diff --git a/test/writer.rst b/test/writer.rst index 93158f0c3292d..0a399bcb74fd8 100644 --- a/test/writer.rst +++ b/test/writer.rst @@ -615,21 +615,21 @@ This is *emphasized*, and so *is this*. This is **strong**, and so **is this**. -An *`emphasized link `__*. +An `emphasized link `__. -***This is strong and em.*** +**This is strong and em.** -So is ***this*** word. +So is **this** word. -***This is strong and em.*** +**This is strong and em.** -So is ***this*** word. +So is **this** word. This is code: ``>``, ``$``, ``\``, ``\$``, ````. -[STRIKEOUT:This is *strikeout*.] +[STRIKEOUT:This is strikeout.] -Superscripts: a\ :sup:`bc`\ d a\ :sup:`*hello*` a\ :sup:`hello there`. +Superscripts: a\ :sup:`bc`\ d a\ :sup:`hello` a\ :sup:`hello there`. Subscripts: H\ :sub:`2`\ O, H\ :sub:`23`\ O, H\ :sub:`many of them`\ O.