Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LaTeX reader: fix improper empty cell filtering #6689

Merged
merged 1 commit into from
Sep 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 82 additions & 8 deletions src/Text/Pandoc/Readers/LaTeX.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
Expand Down Expand Up @@ -2144,6 +2145,8 @@ parseAligns = try $ do
toColWidth _ = ColWidthDefault
toSpec (x, y, z) = (x, toColWidth y, z)

-- N.B. this parser returns a Row that may have erroneous empty cells
-- in it. See the note above fixTableHead for details.
parseTableRow :: PandocMonad m
=> Text -- ^ table environment name
-> [([Tok], [Tok])] -- ^ pref/suffixes
Expand All @@ -2168,9 +2171,7 @@ parseTableRow envname prefsufs = do
cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
setInput oldInput
spaces
-- Because of table normalization performed by Text.Pandoc.Builder.table,
-- we need to remove empty cells
return $ Row nullAttr $ filter (\c -> c /= emptyCell) cells
return $ Row nullAttr cells

parseTableCell :: PandocMonad m => LP m Cell
parseTableCell = do
Expand Down Expand Up @@ -2246,6 +2247,80 @@ multicolumnCell = controlSeq "multicolumn" >> do
parseSimpleCell :: PandocMonad m => LP m Cell
parseSimpleCell = simpleCell <$> (plainify <$> blocks)

-- LaTeX tables are stored with empty cells underneath multirow cells
-- denoting the grid spaces taken up by them. More specifically, if a
-- cell spans m rows, then it will overwrite all the cells in the
-- columns it spans for (m-1) rows underneath it, requiring padding
-- cells in these places. These padding cells need to be removed for
-- proper table reading. See #6603.
--
-- These fixTable functions do not otherwise fix up malformed
-- input tables: that is left to the table builder.
fixTableHead :: TableHead -> TableHead
fixTableHead (TableHead attr rows) = TableHead attr rows'
where
rows' = fixTableRows rows

fixTableBody :: TableBody -> TableBody
fixTableBody (TableBody attr rhc th tb)
= TableBody attr rhc th' tb'
where
th' = fixTableRows th
tb' = fixTableRows tb

fixTableRows :: [Row] -> [Row]
fixTableRows = fixTableRows' $ repeat Nothing
where
fixTableRows' oldHang (Row attr cells : rs)
= let (newHang, cells') = fixTableRow oldHang cells
rs' = fixTableRows' newHang rs
in Row attr cells' : rs'
fixTableRows' _ [] = []

-- The overhang is represented as Just (relative cell dimensions) or
-- Nothing for an empty grid space.
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
fixTableRow oldHang cells
-- If there's overhang, drop cells until their total width meets the
-- width of the occupied grid spaces (or we run out)
| (n, prefHang, restHang) <- splitHang oldHang
, n > 0
= let cells' = dropToWidth getCellW n cells
(restHang', cells'') = fixTableRow restHang cells'
in (prefHang restHang', cells'')
-- Otherwise record the overhang of a pending cell and fix the rest
-- of the row
| c@(Cell _ _ h w _):cells' <- cells
= let h' = max 1 h
w' = max 1 w
oldHang' = dropToWidth getHangW w' oldHang
(newHang, cells'') = fixTableRow oldHang' cells'
in (toHang w' h' <> newHang, c : cells'')
| otherwise
= (oldHang, [])
where
getCellW (Cell _ _ _ w _) = w
getHangW = maybe 1 fst
getCS (ColSpan n) = n

toHang c r
| r > 1 = [Just (c, r)]
| otherwise = replicate (getCS c) Nothing

-- Take the prefix of the overhang list representing filled grid
-- spaces. Also return the remainder and the length of this prefix.
splitHang = splitHang' 0 id

splitHang' !n l (Just (c, r):xs)
= splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
splitHang' n l xs = (n, l, xs)

-- Drop list items until the total width of the dropped items
-- exceeds the passed width.
dropToWidth _ n l | n < 1 = l
dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
dropToWidth _ _ [] = []

simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
simpTable envname hasWidthParameter = try $ do
when hasWidthParameter $ () <$ (spaces >> tok)
Expand Down Expand Up @@ -2273,11 +2348,10 @@ simpTable envname hasWidthParameter = try $ do
optional lbreak
spaces
lookAhead $ controlSeq "end" -- make sure we're at end
return $ table emptyCaption
(zip aligns widths)
(TableHead nullAttr header')
[TableBody nullAttr 0 [] rows]
(TableFoot nullAttr [])
let th = fixTableHead $ TableHead nullAttr header'
let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
let tf = TableFoot nullAttr []
return $ table emptyCaption (zip aligns widths) th tbs tf

addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
Expand Down
32 changes: 26 additions & 6 deletions test/Tests/Readers/LaTeX.hs
Original file line number Diff line number Diff line change
Expand Up @@ -174,20 +174,21 @@ tests = [ testGroup "tokenization"
, Row nullAttr [ simpleCell (plain "Two") ]
]
, "Table with nested multirow/multicolumn item" =:
T.unlines [ "\\begin{tabular}{c c c}"
, "\\multicolumn{2}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
, "& & Three\\\\"
, "Four&Five&Six\\\\"
T.unlines [ "\\begin{tabular}{c c c c}"
, "\\multicolumn{3}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
, "\\multicolumn{2}{c}{} & & Three\\\\"
, "Four&Five&Six&Seven\\\\"
, "\\end{tabular}"
] =?>
table' [AlignCenter, AlignCenter, AlignCenter]
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 2) (plain "One")
table' [AlignCenter, AlignCenter, AlignCenter, AlignCenter]
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 3) (plain "One")
, simpleCell (plain "Two")
]
, Row nullAttr [ simpleCell (plain "Three") ]
, Row nullAttr [ simpleCell (plain "Four")
, simpleCell (plain "Five")
, simpleCell (plain "Six")
, simpleCell (plain "Seven")
]
]
, "Table with multicolumn header" =:
Expand All @@ -205,6 +206,25 @@ tests = [ testGroup "tokenization"
]
]
(TableFoot nullAttr [])
, "Table with normal empty cells" =:
T.unlines [ "\\begin{tabular}{|r|r|r|}"
, "A & & B \\\\"
, " & C &"
, "\\end{tabular}"
] =?>
table emptyCaption
(replicate 3 (AlignRight, ColWidthDefault))
(TableHead nullAttr [])
[TableBody nullAttr 0 []
[Row nullAttr [ simpleCell (plain "A")
, emptyCell
, simpleCell (plain "B")
]
,Row nullAttr [ emptyCell
, simpleCell (plain "C")
, emptyCell
]]]
(TableFoot nullAttr [])
]

, testGroup "citations"
Expand Down