Skip to content

Commit

Permalink
JATS writer: support advanced table features
Browse files Browse the repository at this point in the history
  • Loading branch information
tarleb committed Nov 19, 2020
1 parent 0c8ab8a commit d286242
Show file tree
Hide file tree
Showing 10 changed files with 775 additions and 379 deletions.
31 changes: 21 additions & 10 deletions .github/workflows/format-validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ on:
- 'test/writer.jats_publishing'
- 'test/writer.jats_archiving'
- 'test/tables.jats_archiving'
- 'test/tables/nordics.jats_archiving'
- 'test/tables/planets.jats_archiving'
- 'test/tables/students.jats_archiving'
pull_request:
branches:
- '*'
Expand All @@ -19,6 +22,9 @@ on:
- 'test/writer.jats_publishing'
- 'test/writer.jats_archiving'
- 'test/tables.jats_archiving'
- 'test/tables/nordics.jats_archiving'
- 'test/tables/planets.jats_archiving'
- 'test/tables/students.jats_archiving'

jobs:
jats:
Expand Down Expand Up @@ -65,13 +71,18 @@ jobs:
EOF
)"
jats_file="$(mktemp jats-tables.XXXXX)"
printf "$tmpl" "$(cat test/tables.jats_archiving)" > "$jats_file"
json="$(curl --form "xml=@${jats_file}" --silent "$VALIDATOR_URL")"
err_count="$(printf "%s" "$json" | jq '.errors | length')"
if [ "$err_count" -eq 0 ]; then
printf "Table output is valid when used as body content.\n"
exit 0
else
printf "Validator report:\n%s" "$json"
exit 1
fi
exit_code=0
for f in tables tables/nordics tables/planets tables/students; do
filename=test/$f.jats_archiving
printf "Validating %s...\n" "$filename"
printf "$tmpl" "$(cat $filename)" > "$jats_file"
json="$(curl --form "xml=@${jats_file}" --silent "$VALIDATOR_URL")"
err_count="$(printf "%s" "$json" | jq '.errors | length')"
if [ "$err_count" -eq 0 ]; then
printf "Table output is valid when used as body content.\n"
else
printf "Validator report:\n%s" "$json"
exit_code=1
fi
done
exit "$exit_code"
5 changes: 3 additions & 2 deletions src/Text/Pandoc/Writers/JATS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
import Text.Pandoc.XML
import Text.TeXMath
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
import qualified Text.XML.Light as Xml

-- | Convert a @'Pandoc'@ document to JATS (Archiving and Interchange
Expand Down Expand Up @@ -349,8 +350,8 @@ blockToJATS _ b@(RawBlock f str)
report $ BlockNotRendered b
return empty
blockToJATS _ HorizontalRule = return empty -- not semantic
blockToJATS opts (Table attr blkCapt specs th tb tf) =
tableToJATS opts attr blkCapt specs th tb tf
blockToJATS opts (Table attr caption colspecs thead tbody tfoot) =
tableToJATS opts (Ann.toTable attr caption colspecs thead tbody tfoot)

-- | Convert a list of inline elements to JATS.
inlinesToJATS :: PandocMonad m => WriterOptions -> [Inline] -> JATS m (Doc Text)
Expand Down
262 changes: 214 additions & 48 deletions src/Text/Pandoc/Writers/JATS/Table.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{- |
Module : Text.Pandoc.Writers.JATS.Table
Copyright : © 2020 Albert Krewinkel
Expand All @@ -14,69 +16,233 @@ module Text.Pandoc.Writers.JATS.Table
( tableToJATS
) where
import Control.Monad.Reader (asks)
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text (Text)
import Text.DocLayout (Doc, empty, vcat, ($$))
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions)
import Text.DocLayout (Doc, empty, vcat, ($$))
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Writers.JATS.Types
import Text.Pandoc.Writers.Shared (toLegacyTable)
import Text.Pandoc.XML (inTags, inTagsIndented, selfClosingTag)

import qualified Data.Text as T
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann

tableToJATS :: PandocMonad m
=> WriterOptions
-> Attr -> Caption -> [ColSpec] -> TableHead
-> [TableBody] -> TableFoot
-> Ann.Table
-> JATS m (Doc Text)
tableToJATS opts _attr blkCapt specs th tb tf = do
blockToJATS <- asks jatsBlockWriter
let (caption, aligns, widths, headers, rows) =
toLegacyTable blkCapt specs th tb tf
captionDoc <- if null caption
then return mempty
else inTagsIndented "caption" <$> blockToJATS opts (Para caption)
tbl <- captionlessTable aligns widths headers rows
tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do
let (Caption _maybeShortCaption captionBlocks) = caption
tbl <- captionlessTable opts attr colspecs thead tbodies tfoot
captionDoc <- if null captionBlocks
then return empty
else do
blockToJATS <- asks jatsBlockWriter
inTagsIndented "caption" . vcat <$>
mapM (blockToJATS opts) captionBlocks
return $ inTags True "table-wrap" [] $ captionDoc $$ tbl

captionlessTable :: PandocMonad m
=> WriterOptions
-> Attr
-> [ColSpec]
-> Ann.TableHead
-> [Ann.TableBody]
-> Ann.TableFoot
-> JATS m (Doc Text)
captionlessTable opts attr colspecs thead tbodies tfoot = do
head' <- tableHeadToJats opts thead
bodies <- mapM (tableBodyToJats opts) tbodies
foot' <- tableFootToJats opts tfoot
let validAttribs = [ "border", "cellpadding", "cellspacing", "content-type"
, "frame", "rules", "specific-use", "style", "summary"
, "width"
]
let attribs = toAttribs attr validAttribs
return $ inTags True "table" attribs $ vcat
[ colSpecListToJATS colspecs
, head'
, foot'
, vcat bodies
]

validTablePartAttribs :: [Text]
validTablePartAttribs =
[ "align", "char", "charoff", "content-type", "style", "valign" ]

tableBodyToJats :: PandocMonad m
=> WriterOptions
-> Ann.TableBody
-> JATS m (Doc Text)
tableBodyToJats opts (Ann.TableBody attr _rowHeadCols inthead rows) = do
let attribs = toAttribs attr validTablePartAttribs
intermediateHead <- if null inthead
then return mempty
else headerRowsToJats opts Thead inthead
bodyRows <- bodyRowsToJats opts rows
return $ inTags True "tbody" attribs $ intermediateHead $$ bodyRows

tableHeadToJats :: PandocMonad m
=> WriterOptions
-> Ann.TableHead
-> JATS m (Doc Text)
tableHeadToJats opts (Ann.TableHead attr rows) =
tablePartToJats opts Thead attr rows

tableFootToJats :: PandocMonad m
=> WriterOptions
-> Ann.TableFoot
-> JATS m (Doc Text)
tableFootToJats opts (Ann.TableFoot attr rows) =
tablePartToJats opts Tfoot attr rows

tablePartToJats :: PandocMonad m
=> WriterOptions
-> TablePart
-> Attr
-> [Ann.HeaderRow]
-> JATS m (Doc Text)
tablePartToJats opts tblpart attr rows =
if null rows || all isEmptyRow rows
then return mempty
else do
let tag' = case tblpart of
Thead -> "thead"
Tfoot -> "tfoot"
Tbody -> "tbody" -- this would be unexpected
let attribs = toAttribs attr validTablePartAttribs
inTags True tag' attribs <$> headerRowsToJats opts tblpart rows
where
captionlessTable aligns widths headers rows = do
let percent w = tshow (truncate (100*w) :: Integer) <> "*"
let coltags = vcat $ zipWith (\w al -> selfClosingTag "col"
([("width", percent w) | w > 0] ++
[("align", alignmentToText al)])) widths aligns
thead <- if all null headers
then return empty
else inTagsIndented "thead" <$> tableRowToJATS opts True headers
tbody <- inTagsIndented "tbody" . vcat <$>
mapM (tableRowToJATS opts False) rows
return $ inTags True "table" [] $ coltags $$ thead $$ tbody

alignmentToText :: Alignment -> Text
alignmentToText alignment = case alignment of
AlignLeft -> "left"
AlignRight -> "right"
AlignCenter -> "center"
AlignDefault -> "left"

tableRowToJATS :: PandocMonad m
isEmptyRow (Ann.HeaderRow _attr _rownum cells) = all isEmptyCell cells
isEmptyCell (Ann.Cell _colspecs _colnum cell) =
cell == Cell nullAttr AlignDefault (RowSpan 1) (ColSpan 1) []

-- | The part of a table; header, footer, or body.
data TablePart = Thead | Tfoot | Tbody
deriving (Eq)

data CellType = HeaderCell | BodyCell

data TableRow = TableRow TablePart Attr Ann.RowNumber Ann.RowHead Ann.RowBody

headerRowsToJats :: PandocMonad m
=> WriterOptions
-> TablePart
-> [Ann.HeaderRow]
-> JATS m (Doc Text)
headerRowsToJats opts tablepart =
rowListToJats opts . map toTableRow
where
toTableRow (Ann.HeaderRow attr rownum rowbody) =
TableRow tablepart attr rownum [] rowbody

bodyRowsToJats :: PandocMonad m
=> WriterOptions
-> [Ann.BodyRow]
-> JATS m (Doc Text)
bodyRowsToJats opts =
rowListToJats opts . zipWith toTableRow [1..]
where
toTableRow rownum (Ann.BodyRow attr _rownum rowhead rowbody) =
TableRow Tbody attr rownum rowhead rowbody

rowListToJats :: PandocMonad m
=> WriterOptions
-> [TableRow]
-> JATS m (Doc Text)
rowListToJats opts = fmap vcat . mapM (tableRowToJats opts)

colSpecListToJATS :: [ColSpec] -> Doc Text
colSpecListToJATS colspecs =
let hasDefaultWidth (_, ColWidthDefault) = True
hasDefaultWidth _ = False

percent w = tshow (round (100*w) :: Integer) <> "%"

col :: ColWidth -> Doc Text
col = selfClosingTag "col" . \case
ColWidthDefault -> mempty
ColWidth w -> [("width", percent w)]

in if all hasDefaultWidth colspecs
then mempty
else inTags True "colgroup" [] $ vcat $ map (col . snd) colspecs

tableRowToJats :: PandocMonad m
=> WriterOptions
-> Bool
-> [[Block]]
-> TableRow
-> JATS m (Doc Text)
tableRowToJATS opts isHeader cols =
inTagsIndented "tr" . vcat <$> mapM (tableItemToJATS opts isHeader) cols
tableRowToJats opts (TableRow tblpart attr _rownum rowhead rowbody) = do
let validAttribs = [ "align", "char", "charoff", "content-type"
, "style", "valign"
]
let attr' = toAttribs attr validAttribs
let celltype = case tblpart of
Thead -> HeaderCell
_ -> BodyCell
headcells <- mapM (cellToJats opts HeaderCell) rowhead
bodycells <- mapM (cellToJats opts celltype) rowbody
return $ inTags True "tr" attr' $ mconcat
[ vcat headcells
, vcat bodycells
]

alignmentAttrib :: Alignment -> Maybe (Text, Text)
alignmentAttrib = fmap ("align",) . \case
AlignLeft -> Just "left"
AlignRight -> Just "right"
AlignCenter -> Just "center"
AlignDefault -> Nothing

colspanAttrib :: ColSpan -> Maybe (Text, Text)
colspanAttrib = \case
ColSpan 1 -> Nothing
ColSpan n -> Just ("colspan", tshow n)

rowspanAttrib :: RowSpan -> Maybe (Text, Text)
rowspanAttrib = \case
RowSpan 1 -> Nothing
RowSpan n -> Just ("rowspan", tshow n)

cellToJats :: PandocMonad m
=> WriterOptions
-> CellType
-> Ann.Cell
-> JATS m (Doc Text)
cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) =
let align = fst colspec
in tableCellToJats opts celltype align cell

toAttribs :: Attr -> [Text] -> [(Text, Text)]
toAttribs (ident, _classes, kvs) knownAttribs =
(if T.null ident then id else (("id", ident) :)) $
filter ((`elem` knownAttribs) . fst) kvs

tableItemToJATS :: PandocMonad m
tableCellToJats :: PandocMonad m
=> WriterOptions
-> Bool
-> [Block]
-> CellType
-> Alignment
-> Cell
-> JATS m (Doc Text)
tableItemToJATS opts isHeader [Plain item] = do
inlinesToJATS <- asks jatsInlinesWriter
inTags False (if isHeader then "th" else "td") [] <$>
inlinesToJATS opts item
tableItemToJATS opts isHeader item = do
blockToJATS <- asks jatsBlockWriter
inTags False (if isHeader then "th" else "td") [] . vcat <$>
mapM (blockToJATS opts) item
tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
blockToJats <- asks jatsBlockWriter
inlinesToJats <- asks jatsInlinesWriter
let cellContents = \case
[Plain inlines] -> inlinesToJats opts inlines
blocks -> vcat <$> mapM (blockToJats opts) blocks
let tag' = case ctype of
BodyCell -> "td"
HeaderCell -> "th"
let align' = case align of
AlignDefault -> colAlign
_ -> align
let maybeCons = maybe id (:)
let validAttribs = [ "abbr", "align", "axis", "char", "charoff"
, "content-type", "headers", "scope", "style", "valign"
]
let attribs = maybeCons (alignmentAttrib align')
. maybeCons (rowspanAttrib rowspan)
. maybeCons (colspanAttrib colspan)
$ toAttribs attr validAttribs
inTags False tag' attribs <$> cellContents item
2 changes: 1 addition & 1 deletion test/Tests/Old.hs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ tests pandocPath =
, testGroup "jats"
[ testGroup "writer"
[ testGroup "jats_archiving" $
writerTests' "jats_archiving"
extWriterTests' "jats_archiving"
, testGroup "jats_articleauthoring" $
writerTests' "jats_articleauthoring"
, testGroup "jats_publishing" $
Expand Down
Loading

0 comments on commit d286242

Please sign in to comment.