Skip to content

Commit

Permalink
Use the Figure Block constructor.
Browse files Browse the repository at this point in the history
* It provides a specific representation for figures in the pandoc's AST.
* It uses the `SimpleFigure` pattern synonym to replace the previous
  construction:

  [Para [Image ("",[],[]) [Str "CAP2"] ("../media/rId25.jpg","fig:")]]
  • Loading branch information
argent0 committed Sep 9, 2021
1 parent 1f7c230 commit dd4fa65
Show file tree
Hide file tree
Showing 58 changed files with 727 additions and 157 deletions.
11 changes: 11 additions & 0 deletions MANUAL.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3171,6 +3171,17 @@ In the `context` output format this enables the use of [Natural Tables
Natural tables allow more fine-grained global customization but come
at a performance penalty compared to extreme tables.

#### Extension: `native_figures` ####

Use pandoc's native `Figure` element for content inside `<figure>` tags, in the
case of HTML, or `figure` environments, in case of LaTeX. This, in turn, allows
some writers to produce more accurate representations of figures. It also
allows the use of the `Figure` element in filters, for custom figure output.

This extension can be enabled/disabled for the following formats:

input formats
: `latex` `html`

# Pandoc's Markdown

Expand Down
16 changes: 16 additions & 0 deletions data/pandoc.lua
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,22 @@ M.Div = M.Block:create_constructor(
{{attr = {"identifier", "classes", "attributes"}}, "content"}
)


--- Creates a figure element.
-- @function Figure
-- @tparam {Block,...} content figure block contents
-- @tparam Caption caption figure caption
-- @tparam[opt] Attr attr element attributes
-- @treturn Block figure element
M.Figure = M.Block:create_constructor(
"Figure",
function(content, caption, attr)
return {c = {ensureAttr(attr), caption, ensureList(content)}}
end,
{{attr = {"identifier", "classes", "attributes"}}, "caption", "content"}
)


--- Creates a header element.
-- @function Header
-- @tparam int level header level
Expand Down
1 change: 1 addition & 0 deletions pandoc.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ extra-source-files:
test/bodybg.gif
test/*.native
test/command/*.md
test/command/figures/*.md
test/command/*.csl
test/command/biblio.bib
test/command/averroes.bib
Expand Down
3 changes: 3 additions & 0 deletions src/Text/Pandoc/Extensions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ data Extension =
| Ext_mmd_title_block -- ^ Multimarkdown metadata block
| Ext_multiline_tables -- ^ Pandoc-style multiline tables
| Ext_native_divs -- ^ Use Div blocks for contents of <div> tags
| Ext_native_figures -- ^ Use Figure blocks for contenst of <figure> tags.
| Ext_native_spans -- ^ Use Span inlines for contents of <span>
| Ext_native_numbering -- ^ Use output format's native numbering for figures and tables
| Ext_ntb -- ^ ConTeXt Natural Tables
Expand Down Expand Up @@ -527,6 +528,7 @@ getAllExtensions f = universalExtensions <> getAll f
getAll "html" = autoIdExtensions <>
extensionsFromList
[ Ext_native_divs
, Ext_native_figures
, Ext_line_blocks
, Ext_native_spans
, Ext_empty_paragraphs
Expand All @@ -552,6 +554,7 @@ getAllExtensions f = universalExtensions <> getAll f
, Ext_raw_tex
, Ext_task_lists
, Ext_literate_haskell
, Ext_native_figures
]
getAll "beamer" = getAll "latex"
getAll "context" = autoIdExtensions <>
Expand Down
3 changes: 3 additions & 0 deletions src/Text/Pandoc/Lua/Marshaling/AST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ pushBlock = \case
CodeBlock attr code -> pushViaConstructor "CodeBlock" code (LuaAttr attr)
DefinitionList items -> pushViaConstructor "DefinitionList" items
Div attr blcks -> pushViaConstructor "Div" blcks (LuaAttr attr)
Figure attr capt blcks -> pushViaConstructor "Figure" blcks capt (LuaAttr attr)
Header lvl attr inlns -> pushViaConstructor "Header" lvl inlns (LuaAttr attr)
HorizontalRule -> pushViaConstructor "HorizontalRule"
LineBlock blcks -> pushViaConstructor "LineBlock" blcks
Expand All @@ -182,6 +183,8 @@ peekBlock idx = defineHowTo "get Block value" $! do
"CodeBlock" -> withAttr CodeBlock <$!> elementContent
"DefinitionList" -> DefinitionList <$!> elementContent
"Div" -> withAttr Div <$!> elementContent
"Figure" -> (\(LuaAttr attr, capt, bs) -> Figure attr capt bs)
<$!> elementContent
"Header" -> (\(lvl, LuaAttr attr, lst) -> Header lvl attr lst)
<$!> elementContent
"HorizontalRule" -> return HorizontalRule
Expand Down
59 changes: 40 additions & 19 deletions src/Text/Pandoc/Readers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import Data.List.Split (splitWhen)
import Data.List (foldl')
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Either (partitionEithers)
import Data.Monoid (First (..))
import qualified Data.Set as Set
import Data.Text (Text)
Expand All @@ -57,7 +58,8 @@ import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex,
Ext_native_figures),
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
Expand Down Expand Up @@ -535,24 +537,43 @@ pPara = do
<|> return (B.para contents)

pFigure :: PandocMonad m => TagParser m Blocks
pFigure = try $ do
TagOpen _ _ <- pSatisfy (matchTagOpen "figure" [])
skipMany pBlank
let pImg = (\x -> (Just x, Nothing)) <$>
(pInTag TagsOmittable "p" pImage <* skipMany pBlank)
pCapt = (\x -> (Nothing, Just x)) <$> do
bs <- pInTags "figcaption" block
return $ blocksToInlines' $ B.toList bs
pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure")
res <- many (pImg <|> pCapt <|> pSkip)
let mbimg = msum $ map fst res
let mbcap = msum $ map snd res
TagClose _ <- pSatisfy (matchTagClose "figure")
let caption = fromMaybe mempty mbcap
case B.toList <$> mbimg of
Just [Image attr _ (url, tit)] ->
return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption
_ -> mzero
pFigure = do
has_native_figures <-
extensionEnabled Ext_native_figures <$> getOption readerExtensions
if has_native_figures
then pNativeFigure
else try $ do
TagOpen _ _ <- pSatisfy (matchTagOpen "figure" [])
skipMany pBlank
let pImg = (\x -> (Just x, Nothing)) <$>
(pInTag TagsOmittable "p" pImage <* skipMany pBlank)
pCapt = (\x -> (Nothing, Just x)) <$> do
bs <- pInTags "figcaption" block
return $ blocksToInlines' $ B.toList bs
pSkip = (Nothing, Nothing) <$ pSatisfy (not . matchTagClose "figure")
-- res :: [(Maybe Inlines, Maybe Inlines)]
-- [(Just img, Nothing), (Nothing, Just caption), ...]
res <- many (pImg <|> pCapt <|> pSkip)
-- Takes the first image and the first caption, if any, drop the rest.
let mbimg = msum $ map fst res
let mbcap = msum $ map snd res -- mbcap :: Maybe Inlines
TagClose _ <- pSatisfy (matchTagClose "figure")
let caption = fromMaybe mempty mbcap
-- only process one image
case B.toList <$> mbimg of
Just [Image attr _ (url, tit)] ->
return $ B.simpleFigureWith attr caption url tit
_ -> mzero

pNativeFigure :: PandocMonad m => TagParser m Blocks
pNativeFigure = try $ do
TagOpen tag attrList <- lookAhead $ pSatisfy (matchTagOpen "figure" [])
--let (ident, classes, kvs) = toAttr attr
contents <- pInTags tag (many $ Left <$> pInTags "figcaption" block <|> (Right <$> block))

let (captions, rest) = partitionEithers contents
-- I should capture the caption
return $ B.figureWith (toAttr attrList) (Caption Nothing (B.toList (mconcat captions))) $ mconcat rest

pCodeBlock :: PandocMonad m => TagParser m Blocks
pCodeBlock = try $ do
Expand Down
66 changes: 46 additions & 20 deletions src/Text/Pandoc/Readers/LaTeX.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import Data.Maybe (fromMaybe, maybeToList)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Data.Either (partitionEithers)
import Skylighting (defaultSyntaxMap)
import System.FilePath (addExtension, replaceExtension, takeExtension)
import Text.Collate.Lang (renderLang)
Expand Down Expand Up @@ -935,8 +936,8 @@ environments = M.union (tableEnvironments blocks inline) $
, ("letter", env "letter" letterContents)
, ("minipage", env "minipage" $
skipopts *> spaces *> optional braced *> spaces *> blocks)
, ("figure", env "figure" $ skipopts *> figure)
, ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
, ("figure", env "figure" $ skipopts *> Text.Pandoc.Readers.LaTeX.figure)
, ("subfigure", env "subfigure" $ skipopts *> tok *> Text.Pandoc.Readers.LaTeX.figure)
, ("center", divWith ("", ["center"], []) <$> env "center" blocks)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
Expand Down Expand Up @@ -1088,30 +1089,55 @@ letterContents = do
return $ addr <> bs -- sig added by \closing

figure :: PandocMonad m => LP m Blocks
figure = try $ do
figure = do
has_native_figures <-
extensionEnabled Ext_native_figures <$> getOption readerExtensions
if has_native_figures
then nativeFigure
else try $ do
resetCaption
blocks >>= addImageCaption

nativeFigure :: PandocMonad m => LP m Blocks
nativeFigure = try $ do
resetCaption
blocks >>= addImageCaption
innerContent <- many $ try (Left <$> label) <|> (Right <$> block)
let content = walk go $ mconcat $ snd $ partitionEithers innerContent
labelResult <- sLastLabel <$> getState
let attr = case labelResult of
Just lab -> (lab, [], [])
_ -> nullAttr
captResult <- sCaption <$> getState
case captResult of
Nothing -> return $ B.figureWith attr (Caption Nothing []) content
Just capt -> return $ B.figureWith attr (B.caption Nothing $ B.plain capt) content

where
-- Remove the `Image` caption b.c. it's on the `Figure`
go (Para [Image attr _ target]) = Plain [Image attr [] target]
go x = x

addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
where go (Image attr@(_, cls, kvs) alt (src,tit))
where go p@(Para [Image attr@(_, cls, kvs) _ (src, tit)])
| not ("fig:" `T.isPrefixOf` tit) = do
st <- getState
let (alt', tit') = case sCaption st of
Just ils -> (toList ils, "fig:" <> tit)
Nothing -> (alt, tit)
attr' = case sLastLabel st of
Just lab -> (lab, cls, kvs)
Nothing -> attr
case attr' of
("", _, _) -> return ()
(ident, _, _) -> do
num <- getNextNumber sLastFigureNum
setState
st{ sLastFigureNum = num
, sLabels = M.insert ident
[Str (renderDottedNum num)] (sLabels st) }
return $ Image attr' alt' (src, tit')
case sCaption st of
Nothing -> return p
Just figureCaption -> do
let attr' = case sLastLabel st of
Just lab -> (lab, cls, kvs)
Nothing -> attr
case attr' of
("", _, _) -> return ()
(ident, _, _) -> do
num <- getNextNumber sLastFigureNum
setState
st{ sLastFigureNum = num
, sLabels = M.insert ident
[Str (renderDottedNum num)] (sLabels st) }

return $ SimpleFigure attr' (B.toList figureCaption) (src, tit)
go x = return x

coloredBlock :: PandocMonad m => Text -> LP m Blocks
Expand Down
27 changes: 13 additions & 14 deletions src/Text/Pandoc/Readers/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1013,19 +1013,18 @@ normalDefinitionList = do
para :: PandocMonad m => MarkdownParser m (F Blocks)
para = try $ do
exts <- getOption readerExtensions
let implicitFigures x
| extensionEnabled Ext_implicit_figures exts = do
x' <- x
case B.toList x' of
[Image attr alt (src,tit)]
| not (null alt) ->
-- the fig: at beginning of title indicates a figure
return $ B.singleton
$ Image attr alt (src, "fig:" <> tit)
_ -> return x'
| otherwise = x
result <- implicitFigures . trimInlinesF <$> inlines1
option (B.plain <$> result)

result <- trimInlinesF <$> inlines1
let figureOr constr inlns =
case B.toList inlns of
[Image attr figCaption (src, tit)]
| extensionEnabled Ext_implicit_figures exts
, not (null figCaption) -> do
B.simpleFigureWith attr (B.fromList figCaption) src tit

_ -> constr inlns

option (figureOr B.plain <$> result)
$ try $ do
newline
(mempty <$ blanklines)
Expand All @@ -1047,7 +1046,7 @@ para = try $ do
if divLevel > 0
then lookAhead divFenceEnd
else mzero
return $ B.para <$> result
return $ figureOr B.para <$> result

plain :: PandocMonad m => MarkdownParser m (F Blocks)
plain = fmap B.plain . trimInlinesF <$> inlines1
Expand Down
9 changes: 7 additions & 2 deletions src/Text/Pandoc/Readers/MediaWiki.hs
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,12 @@ para = do
contents <- trimInlines . mconcat <$> many1 inline
if F.all (==Space) contents
then return mempty
else return $ B.para contents
else case B.toList contents of
-- For the MediaWiki format all images are considered figures
[Image attr figureCaption (src, title)] ->
return $ B.simpleFigureWith
attr (B.fromList figureCaption) src title
_ -> return $ B.para contents

table :: PandocMonad m => MWParser m Blocks
table = do
Expand Down Expand Up @@ -631,7 +636,7 @@ image = try $ do
let attr = ("", [], kvs)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
return $ B.imageWith attr fname ("fig:" <> stringify caption) caption
return $ B.imageWith attr fname (stringify caption) caption

imageOption :: PandocMonad m => MWParser m Text
imageOption = try $ char '|' *> opt
Expand Down
19 changes: 10 additions & 9 deletions src/Text/Pandoc/Readers/Org/Blocks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -474,15 +474,16 @@ figure = try $ do
figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
figKeyVals = blockAttrKeyValues figAttrs
attr = (figLabel, mempty, figKeyVals)
figTitle = (if isFigure then withFigPrefix else id) figName
in
B.para . B.imageWith attr imgSrc figTitle <$> figCaption

withFigPrefix :: Text -> Text
withFigPrefix cs =
if "fig:" `T.isPrefixOf` cs
then cs
else "fig:" <> cs
in if isFigure
then (\c ->
B.simpleFigureWith
attr c imgSrc (unstackFig figName)) <$> figCaption
else B.para . B.imageWith attr imgSrc figName <$> figCaption
unstackFig :: Text -> Text
unstackFig figName =
if "fig:" `T.isPrefixOf` figName
then T.drop 4 figName
else figName

-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
Expand Down
4 changes: 2 additions & 2 deletions src/Text/Pandoc/Readers/RST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -725,8 +725,8 @@ directive' = do
"figure" -> do
(caption, legend) <- parseFromString' extractCaption body'
let src = escapeURI $ trim top
return $ B.para (B.imageWith (imgAttr "figclass") src "fig:"
caption) <> legend
return $ B.simpleFigureWith
(imgAttr "figclass") caption src "" <> legend
"image" -> do
let src = escapeURI $ trim top
let alt = B.str $ maybe "image" trim $ lookup "alt" fields
Expand Down
1 change: 1 addition & 0 deletions src/Text/Pandoc/Shared.hs
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,7 @@ blockToInlines (Table _ _ _ (TableHead _ hbd) bodies (TableFoot _ fbd)) =
unTableBodies = concatMap unTableBody
blockToInlines (Div _ blks) = blocksToInlines' blks
blockToInlines Null = mempty
blockToInlines (Figure _ _ body) = blocksToInlines' body

blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines
blocksToInlinesWithSep sep =
Expand Down
5 changes: 2 additions & 3 deletions src/Text/Pandoc/Writers/AsciiDoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,8 @@ blockToAsciiDoc opts (Div (id',"section":_,_)
blockToAsciiDoc opts (Plain inlines) = do
contents <- inlineListToAsciiDoc opts inlines
return $ contents <> blankline
blockToAsciiDoc opts (Para [Image attr alternate (src,tgt)])
blockToAsciiDoc opts (SimpleFigure attr alternate (src, tit))
-- image::images/logo.png[Company logo, title="blah"]
| Just tit <- T.stripPrefix "fig:" tgt
= (\args -> "image::" <> args <> blankline) <$>
imageArguments opts attr alternate src tit
blockToAsciiDoc opts (Para inlines) = do
Expand Down Expand Up @@ -187,7 +186,7 @@ blockToAsciiDoc opts (Header level (ident,_,_) inlines) = do
return $ identifier $$
nowrap (text (replicate (level + 1) '=') <> space <> contents) <>
blankline

blockToAsciiDoc opts (Figure attr _ body) = blockToAsciiDoc opts $ Div attr body
blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ flush (
if null classes
then "...." $$ literal str $$ "...."
Expand Down
Loading

0 comments on commit dd4fa65

Please sign in to comment.