diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 5ceb6e22afef..73dd8fd1f09b 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -2144,6 +2145,8 @@ parseAligns = try $ do toColWidth _ = ColWidthDefault toSpec (x, y, z) = (x, toColWidth y, z) +-- N.B. this parser returns a Row that may have erroneous empty cells +-- in it. See the note above fixTableHead for details. parseTableRow :: PandocMonad m => Text -- ^ table environment name -> [([Tok], [Tok])] -- ^ pref/suffixes @@ -2168,9 +2171,7 @@ parseTableRow envname prefsufs = do cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells setInput oldInput spaces - -- Because of table normalization performed by Text.Pandoc.Builder.table, - -- we need to remove empty cells - return $ Row nullAttr $ filter (\c -> c /= emptyCell) cells + return $ Row nullAttr cells parseTableCell :: PandocMonad m => LP m Cell parseTableCell = do @@ -2246,6 +2247,80 @@ multicolumnCell = controlSeq "multicolumn" >> do parseSimpleCell :: PandocMonad m => LP m Cell parseSimpleCell = simpleCell <$> (plainify <$> blocks) +-- LaTeX tables are stored with empty cells underneath multirow cells +-- denoting the grid spaces taken up by them. More specifically, if a +-- cell spans m rows, then it will overwrite all the cells in the +-- columns it spans for (m-1) rows underneath it, requiring padding +-- cells in these places. These padding cells need to be removed for +-- proper table reading. See #6603. +-- +-- These fixTable functions do not otherwise fix up malformed +-- input tables: that is left to the table builder. +fixTableHead :: TableHead -> TableHead +fixTableHead (TableHead attr rows) = TableHead attr rows' + where + rows' = fixTableRows rows + +fixTableBody :: TableBody -> TableBody +fixTableBody (TableBody attr rhc th tb) + = TableBody attr rhc th' tb' + where + th' = fixTableRows th + tb' = fixTableRows tb + +fixTableRows :: [Row] -> [Row] +fixTableRows = fixTableRows' $ repeat Nothing + where + fixTableRows' oldHang (Row attr cells : rs) + = let (newHang, cells') = fixTableRow oldHang cells + rs' = fixTableRows' newHang rs + in Row attr cells' : rs' + fixTableRows' _ [] = [] + +-- The overhang is represented as Just (relative cell dimensions) or +-- Nothing for an empty grid space. +fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) +fixTableRow oldHang cells + -- If there's overhang, drop cells until their total width meets the + -- width of the occupied grid spaces (or we run out) + | (n, prefHang, restHang) <- splitHang oldHang + , n > 0 + = let cells' = dropToWidth getCellW n cells + (restHang', cells'') = fixTableRow restHang cells' + in (prefHang restHang', cells'') + -- Otherwise record the overhang of a pending cell and fix the rest + -- of the row + | c@(Cell _ _ h w _):cells' <- cells + = let h' = max 1 h + w' = max 1 w + oldHang' = dropToWidth getHangW w' oldHang + (newHang, cells'') = fixTableRow oldHang' cells' + in (toHang w' h' <> newHang, c : cells'') + | otherwise + = (oldHang, []) + where + getCellW (Cell _ _ _ w _) = w + getHangW = maybe 1 fst + getCS (ColSpan n) = n + + toHang c r + | r > 1 = [Just (c, r)] + | otherwise = replicate (getCS c) Nothing + + -- Take the prefix of the overhang list representing filled grid + -- spaces. Also return the remainder and the length of this prefix. + splitHang = splitHang' 0 id + + splitHang' !n l (Just (c, r):xs) + = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs + splitHang' n l xs = (n, l, xs) + + -- Drop list items until the total width of the dropped items + -- exceeds the passed width. + dropToWidth _ n l | n < 1 = l + dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs + dropToWidth _ _ [] = [] + simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks simpTable envname hasWidthParameter = try $ do when hasWidthParameter $ () <$ (spaces >> tok) @@ -2273,11 +2348,10 @@ simpTable envname hasWidthParameter = try $ do optional lbreak spaces lookAhead $ controlSeq "end" -- make sure we're at end - return $ table emptyCaption - (zip aligns widths) - (TableHead nullAttr header') - [TableBody nullAttr 0 [] rows] - (TableFoot nullAttr []) + let th = fixTableHead $ TableHead nullAttr header' + let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] + let tf = TableFoot nullAttr [] + return $ table emptyCaption (zip aligns widths) th tbs tf addTableCaption :: PandocMonad m => Blocks -> LP m Blocks addTableCaption = walkM go diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index 87074e990f81..c50c91ca118c 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -174,20 +174,21 @@ tests = [ testGroup "tokenization" , Row nullAttr [ simpleCell (plain "Two") ] ] , "Table with nested multirow/multicolumn item" =: - T.unlines [ "\\begin{tabular}{c c c}" - , "\\multicolumn{2}{c}{\\multirow{2}{5em}{One}}&Two\\\\" - , "& & Three\\\\" - , "Four&Five&Six\\\\" + T.unlines [ "\\begin{tabular}{c c c c}" + , "\\multicolumn{3}{c}{\\multirow{2}{5em}{One}}&Two\\\\" + , "\\multicolumn{2}{c}{} & & Three\\\\" + , "Four&Five&Six&Seven\\\\" , "\\end{tabular}" ] =?> - table' [AlignCenter, AlignCenter, AlignCenter] - [ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 2) (plain "One") + table' [AlignCenter, AlignCenter, AlignCenter, AlignCenter] + [ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 3) (plain "One") , simpleCell (plain "Two") ] , Row nullAttr [ simpleCell (plain "Three") ] , Row nullAttr [ simpleCell (plain "Four") , simpleCell (plain "Five") , simpleCell (plain "Six") + , simpleCell (plain "Seven") ] ] , "Table with multicolumn header" =: @@ -205,6 +206,25 @@ tests = [ testGroup "tokenization" ] ] (TableFoot nullAttr []) + , "Table with normal empty cells" =: + T.unlines [ "\\begin{tabular}{|r|r|r|}" + , "A & & B \\\\" + , " & C &" + , "\\end{tabular}" + ] =?> + table emptyCaption + (replicate 3 (AlignRight, ColWidthDefault)) + (TableHead nullAttr []) + [TableBody nullAttr 0 [] + [Row nullAttr [ simpleCell (plain "A") + , emptyCell + , simpleCell (plain "B") + ] + ,Row nullAttr [ emptyCell + , simpleCell (plain "C") + , emptyCell + ]]] + (TableFoot nullAttr []) ] , testGroup "citations"