Skip to content

Commit

Permalink
Docx reader: refactor BodyPart.
Browse files Browse the repository at this point in the history
+ Remove ListItem constructor from BodyPart.
+ Changed numbered field of ParagraphStyle to a Maybe Number.
+ Add Number type to store numbering information.

This makes sense because headings can have numbering information,
and we sometimes need to know what it is (#10258).
  • Loading branch information
jgm committed Oct 4, 2024
1 parent a2dfda0 commit 0515c20
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 62 deletions.
63 changes: 31 additions & 32 deletions src/Text/Pandoc/Readers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ import Data.List (delete, intersect, foldl')
import Data.Char (isSpace)
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Maybe (isJust, fromMaybe, mapMaybe)
import Data.Maybe (isJust, isNothing, fromMaybe, mapMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
Expand Down Expand Up @@ -634,7 +634,7 @@ extraAttr s = ("", [], [("custom-style", fromStyleName $ getStyleName s)])

paragraphStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
paragraphStyleToTransform pPr =
let transform = if relativeIndent pPr > 0 && not (numbered pPr) &&
let transform = if relativeIndent pPr > 0 && isNothing (numbered pPr) &&
not (any ((`elem` listParagraphStyles) . getStyleName) (pStyle pPr))
then blockQuote
else id
Expand Down Expand Up @@ -688,11 +688,35 @@ bodyPartToBlocks (Paragraph pPr parparts)
$ getStyleNames (pStyle pPr)

hasNumbering <- gets docxNumberedHeadings
let addNum = if hasNumbering && not (numbered pPr)
let addNum = if hasNumbering && isNothing (numbered pPr)
then (++ ["unnumbered"])
else id
makeHeaderAnchor $
headerWith ("", addNum classes, []) n ils
| Just Number{ numberNumId = numId
, numberLvl = lvl
, numberLvlInfo = levelInfo } <- numbered pPr = do
-- We check whether this current numId has previously been used,
-- since Docx expects us to pick up where we left off.
listState <- gets docxListState
let startFromState = M.lookup (numId, lvl) listState
Level _ fmt txt startFromLevelInfo = levelInfo
start = case startFromState of
Just n -> n + 1
Nothing -> fromMaybe 1 startFromLevelInfo
kvs = [ ("level", lvl)
, ("num-id", numId)
, ("format", fmt)
, ("text", txt)
, ("start", tshow start)
]
modify $ \st -> st{ docxListState =
-- expire all the continuation data for lists of level > this one:
-- a new level 1 list item resets continuation for level 2+
let notExpired (_, lvl') _ = lvl' <= lvl
in M.insert (numId, lvl) start (M.filterWithKey notExpired listState) }
blks <- bodyPartToBlocks (Paragraph pPr{ numbered = Nothing } parparts)
return $ divWith ("", ["list-item"], kvs) blks
| otherwise = do
ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
Expand Down Expand Up @@ -740,32 +764,6 @@ bodyPartToBlocks (Paragraph pPr parparts)
return $ transform $
paraOrPlain $ ils'' <> insertMark
_ -> handleInsertion
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
-- We check whether this current numId has previously been used,
-- since Docx expects us to pick up where we left off.
listState <- gets docxListState
let startFromState = M.lookup (numId, lvl) listState
Level _ fmt txt startFromLevelInfo = levelInfo
start = case startFromState of
Just n -> n + 1
Nothing -> fromMaybe 1 startFromLevelInfo
kvs = [ ("level", lvl)
, ("num-id", numId)
, ("format", fmt)
, ("text", txt)
, ("start", tshow start)
]
modify $ \st -> st{ docxListState =
-- expire all the continuation data for lists of level > this one:
-- a new level 1 list item resets continuation for level 2+
let notExpired (_, lvl') _ = lvl' <= lvl
in M.insert (numId, lvl) start (M.filterWithKey notExpired listState) }
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Captioned parstyle parparts bpart) = do
bs <- bodyPartToBlocks bpart
captContents <- bodyPartToBlocks (Paragraph parstyle parparts)
Expand Down Expand Up @@ -844,9 +842,10 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
let isNumberedPara (Paragraph pPr _) = numbered pPr
isNumberedPara _ = False
modify (\s -> s { docxNumberedHeadings = any isNumberedPara blkbps })
let isNumberedHeading (Paragraph pPr _) = isJust (numbered pPr)
&& isJust (pHeading pPr)
isNumberedHeading _ = False
modify (\s -> s { docxNumberedHeadings = any isNumberedHeading blkbps })
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
blks'' <- removeOrphanAnchors blks'
Expand Down
45 changes: 15 additions & 30 deletions src/Text/Pandoc/Readers/Docx/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, Document(..)
, Body(..)
, BodyPart(..)
, Number(..)
, TblLook(..)
, Extent
, ParPart(..)
Expand Down Expand Up @@ -262,7 +263,7 @@ data Justification = JustifyBoth | JustifyLeft | JustifyRight | JustifyCenter
data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle]
, indentation :: Maybe ParIndentation
, justification :: Maybe Justification
, numbered :: Bool
, numbered :: Maybe Number
, dropCap :: Bool
, pChange :: Maybe TrackedChange
, pBidi :: Maybe Bool
Expand All @@ -274,16 +275,19 @@ defaultParagraphStyle :: ParagraphStyle
defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, justification = Nothing
, numbered = False
, numbered = Nothing
, dropCap = False
, pChange = Nothing
, pBidi = Just False
, pKeepNext = False
}

data Number = Number{ numberNumId :: T.Text
, numberLvl :: T.Text
, numberLvlInfo :: Level }
deriving Show

data BodyPart = Paragraph ParagraphStyle [ParPart]
| ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
| Tbl T.Text TblGrid TblLook [Row]
| Captioned ParagraphStyle [ParPart] BodyPart
| HRule
Expand Down Expand Up @@ -751,14 +755,6 @@ testBitMask bitMaskS n =
pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
pHeading = getParStyleField headingLev . pStyle

pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
pNumInfo = getParStyleField numInfo . pStyle

mkListItem :: ParagraphStyle -> Text -> Text -> [ParPart] -> D BodyPart
mkListItem parstyle numId lvl parparts = do
lvlInfo <- lookupLevel numId lvl <$> asks envNumbering
return $ ListItem parstyle numId lvl lvlInfo parparts

pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation
pStyleIndentation style = (getParStyleField indent . pStyle) style

Expand Down Expand Up @@ -788,16 +784,6 @@ elemToBodyPart ns element
<$> asks envParStyles
<*> asks envNumbering
return $ Paragraph parstyle [OMathPara expsLst]
elemToBodyPart ns element
| isElem ns "w" "p" element
, Just (numId, lvl) <- getNumInfo ns element = do
parstyle <- elemToParagraphStyle ns element
<$> asks envParStyles
<*> asks envNumbering
parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
case pHeading parstyle of
Nothing -> mkListItem parstyle numId lvl parparts
Just _ -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "p" element
, [Elem ppr] <- elContent element
Expand Down Expand Up @@ -830,13 +816,8 @@ elemToBodyPart ns element
parparts' <- mconcat <$> mapD (elemToParPart ns) children
fldCharState <- gets stateFldCharState
modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
-- Word uses list enumeration for numbered headings, so we only
-- want to infer a list from the styles if it is NOT a heading.
let parparts = parparts' ++ openFldCharsToParParts fldCharState
case pHeading parstyle of
Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
mkListItem parstyle numId lvl parparts
_ -> return $ Paragraph parstyle parparts
return $ Paragraph parstyle parparts

elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
Expand Down Expand Up @@ -1235,9 +1216,13 @@ elemToParagraphStyle ns element sty numbering
pStyle' = mapMaybe (`M.lookup` sty) style
in ParagraphStyle
{pStyle = pStyle'
, numbered = case getNumInfo ns element of
Just (numId, lvl) -> isJust $ lookupLevel numId lvl numbering
Nothing -> isJust $ getParStyleField numInfo pStyle'
, numbered = case getNumInfo ns element <|> getParStyleField numInfo pStyle' of
Just (numId, lvl) -> case lookupLevel numId lvl numbering of
Nothing -> Nothing
Just levinfo -> Just Number{ numberNumId = numId
, numberLvl = lvl
, numberLvlInfo = levinfo }
Nothing -> Nothing
, justification =
case findChildByName ns "w" "jc" pPr >>= findAttrByName ns "w" "val" of
Nothing -> Nothing
Expand Down

0 comments on commit 0515c20

Please sign in to comment.