From 0515c209c29d239a2b5560614b5f44de373a655e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 3 Oct 2024 18:07:00 -0700 Subject: [PATCH] Docx reader: refactor BodyPart. + Remove ListItem constructor from BodyPart. + Changed numbered field of ParagraphStyle to a Maybe Number. + Add Number type to store numbering information. This makes sense because headings can have numbering information, and we sometimes need to know what it is (#10258). --- src/Text/Pandoc/Readers/Docx.hs | 63 +++++++++++++-------------- src/Text/Pandoc/Readers/Docx/Parse.hs | 45 +++++++------------ 2 files changed, 46 insertions(+), 62 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4130a749f7d6..17d5b0809f6f 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -78,7 +78,7 @@ import Data.List (delete, intersect, foldl') import Data.Char (isSpace) import qualified Data.Map as M import qualified Data.Text as T -import Data.Maybe (isJust, fromMaybe, mapMaybe) +import Data.Maybe (isJust, isNothing, fromMaybe, mapMaybe) import Data.Sequence (ViewL (..), viewl) import qualified Data.Sequence as Seq import qualified Data.Set as Set @@ -634,7 +634,7 @@ extraAttr s = ("", [], [("custom-style", fromStyleName $ getStyleName s)]) paragraphStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) paragraphStyleToTransform pPr = - let transform = if relativeIndent pPr > 0 && not (numbered pPr) && + let transform = if relativeIndent pPr > 0 && isNothing (numbered pPr) && not (any ((`elem` listParagraphStyles) . getStyleName) (pStyle pPr)) then blockQuote else id @@ -688,11 +688,35 @@ bodyPartToBlocks (Paragraph pPr parparts) $ getStyleNames (pStyle pPr) hasNumbering <- gets docxNumberedHeadings - let addNum = if hasNumbering && not (numbered pPr) + let addNum = if hasNumbering && isNothing (numbered pPr) then (++ ["unnumbered"]) else id makeHeaderAnchor $ headerWith ("", addNum classes, []) n ils + | Just Number{ numberNumId = numId + , numberLvl = lvl + , numberLvlInfo = levelInfo } <- numbered pPr = do + -- We check whether this current numId has previously been used, + -- since Docx expects us to pick up where we left off. + listState <- gets docxListState + let startFromState = M.lookup (numId, lvl) listState + Level _ fmt txt startFromLevelInfo = levelInfo + start = case startFromState of + Just n -> n + 1 + Nothing -> fromMaybe 1 startFromLevelInfo + kvs = [ ("level", lvl) + , ("num-id", numId) + , ("format", fmt) + , ("text", txt) + , ("start", tshow start) + ] + modify $ \st -> st{ docxListState = + -- expire all the continuation data for lists of level > this one: + -- a new level 1 list item resets continuation for level 2+ + let notExpired (_, lvl') _ = lvl' <= lvl + in M.insert (numId, lvl) start (M.filterWithKey notExpired listState) } + blks <- bodyPartToBlocks (Paragraph pPr{ numbered = Nothing } parparts) + return $ divWith ("", ["list-item"], kvs) blks | otherwise = do ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts prevParaIls <- gets docxPrevPara @@ -740,32 +764,6 @@ bodyPartToBlocks (Paragraph pPr parparts) return $ transform $ paraOrPlain $ ils'' <> insertMark _ -> handleInsertion -bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do - -- We check whether this current numId has previously been used, - -- since Docx expects us to pick up where we left off. - listState <- gets docxListState - let startFromState = M.lookup (numId, lvl) listState - Level _ fmt txt startFromLevelInfo = levelInfo - start = case startFromState of - Just n -> n + 1 - Nothing -> fromMaybe 1 startFromLevelInfo - kvs = [ ("level", lvl) - , ("num-id", numId) - , ("format", fmt) - , ("text", txt) - , ("start", tshow start) - ] - modify $ \st -> st{ docxListState = - -- expire all the continuation data for lists of level > this one: - -- a new level 1 list item resets continuation for level 2+ - let notExpired (_, lvl') _ = lvl' <= lvl - in M.insert (numId, lvl) start (M.filterWithKey notExpired listState) } - blks <- bodyPartToBlocks (Paragraph pPr parparts) - return $ divWith ("", ["list-item"], kvs) blks -bodyPartToBlocks (ListItem pPr _ _ _ parparts) = - let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr} - in - bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Captioned parstyle parparts bpart) = do bs <- bodyPartToBlocks bpart captContents <- bodyPartToBlocks (Paragraph parstyle parparts) @@ -844,9 +842,10 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block]) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps - let isNumberedPara (Paragraph pPr _) = numbered pPr - isNumberedPara _ = False - modify (\s -> s { docxNumberedHeadings = any isNumberedPara blkbps }) + let isNumberedHeading (Paragraph pPr _) = isJust (numbered pPr) + && isJust (pHeading pPr) + isNumberedHeading _ = False + modify (\s -> s { docxNumberedHeadings = any isNumberedHeading blkbps }) blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks blks'' <- removeOrphanAnchors blks' diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 3c493022ee7c..112ed5d0fefb 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -18,6 +18,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , Document(..) , Body(..) , BodyPart(..) + , Number(..) , TblLook(..) , Extent , ParPart(..) @@ -262,7 +263,7 @@ data Justification = JustifyBoth | JustifyLeft | JustifyRight | JustifyCenter data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] , indentation :: Maybe ParIndentation , justification :: Maybe Justification - , numbered :: Bool + , numbered :: Maybe Number , dropCap :: Bool , pChange :: Maybe TrackedChange , pBidi :: Maybe Bool @@ -274,16 +275,19 @@ defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , justification = Nothing - , numbered = False + , numbered = Nothing , dropCap = False , pChange = Nothing , pBidi = Just False , pKeepNext = False } +data Number = Number{ numberNumId :: T.Text + , numberLvl :: T.Text + , numberLvlInfo :: Level } + deriving Show data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] | Tbl T.Text TblGrid TblLook [Row] | Captioned ParagraphStyle [ParPart] BodyPart | HRule @@ -751,14 +755,6 @@ testBitMask bitMaskS n = pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle -pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) -pNumInfo = getParStyleField numInfo . pStyle - -mkListItem :: ParagraphStyle -> Text -> Text -> [ParPart] -> D BodyPart -mkListItem parstyle numId lvl parparts = do - lvlInfo <- lookupLevel numId lvl <$> asks envNumbering - return $ ListItem parstyle numId lvl lvlInfo parparts - pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation pStyleIndentation style = (getParStyleField indent . pStyle) style @@ -788,16 +784,6 @@ elemToBodyPart ns element <$> asks envParStyles <*> asks envNumbering return $ Paragraph parstyle [OMathPara expsLst] -elemToBodyPart ns element - | isElem ns "w" "p" element - , Just (numId, lvl) <- getNumInfo ns element = do - parstyle <- elemToParagraphStyle ns element - <$> asks envParStyles - <*> asks envNumbering - parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element) - case pHeading parstyle of - Nothing -> mkListItem parstyle numId lvl parparts - Just _ -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "p" element , [Elem ppr] <- elContent element @@ -830,13 +816,8 @@ elemToBodyPart ns element parparts' <- mconcat <$> mapD (elemToParPart ns) children fldCharState <- gets stateFldCharState modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState} - -- Word uses list enumeration for numbered headings, so we only - -- want to infer a list from the styles if it is NOT a heading. let parparts = parparts' ++ openFldCharsToParParts fldCharState - case pHeading parstyle of - Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do - mkListItem parstyle numId lvl parparts - _ -> return $ Paragraph parstyle parparts + return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -1235,9 +1216,13 @@ elemToParagraphStyle ns element sty numbering pStyle' = mapMaybe (`M.lookup` sty) style in ParagraphStyle {pStyle = pStyle' - , numbered = case getNumInfo ns element of - Just (numId, lvl) -> isJust $ lookupLevel numId lvl numbering - Nothing -> isJust $ getParStyleField numInfo pStyle' + , numbered = case getNumInfo ns element <|> getParStyleField numInfo pStyle' of + Just (numId, lvl) -> case lookupLevel numId lvl numbering of + Nothing -> Nothing + Just levinfo -> Just Number{ numberNumId = numId + , numberLvl = lvl + , numberLvlInfo = levinfo } + Nothing -> Nothing , justification = case findChildByName ns "w" "jc" pPr >>= findAttrByName ns "w" "val" of Nothing -> Nothing