From 7a888e8603f70351094b0725fba76c4111e72dac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 8 Feb 2022 00:01:17 -0800 Subject: [PATCH] Fix parsing of epub footnotes. Closes #7884. --- src/Text/Pandoc/Readers/HTML.hs | 81 +++++++++++++++++++-------- src/Text/Pandoc/Readers/HTML/Types.hs | 1 + test/epub/wasteland.native | 2 +- 3 files changed, 61 insertions(+), 23 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d3b91c370e44..47b6af1932c4 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -89,7 +89,7 @@ readHtml opts inp = do result <- flip runReaderT def $ runParserT parseDoc (HTMLState def{ stateOptions = opts } - [] Nothing Set.empty [] M.empty opts) + [] Nothing Set.empty [] M.empty opts False) "source" tags case result of Right doc -> return doc @@ -106,8 +106,8 @@ stripPrefix x = x replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do - st <- getState - walkM (replaceNotes' (noteTable st)) bs + notes <- noteTable <$> getState + walkM (replaceNotes' notes) bs replaceNotes' :: PandocMonad m => [(Text, Blocks)] -> Inline -> TagParser m Inline @@ -178,6 +178,9 @@ block = ((do , epubExts , "chapter" `T.isInfixOf` type' -> eSection + _ | epubExts + , type' `elem` ["footnotes", "rearnotes"] + -> eFootnotes _ | epubExts , type' `elem` ["footnote", "rearnote"] -> mempty <$ eFootnote @@ -256,19 +259,39 @@ eCase = do Nothing -> Nothing <$ manyTill pAny (pSatisfy (matchTagClose "case")) eFootnote :: PandocMonad m => TagParser m () -eFootnote = try $ do - let notes = ["footnote", "rearnote"] +eFootnote = do guardEnabled Ext_epub_html_exts - (TagOpen tag attr') <- lookAhead pAny + TagOpen tag attr' <- lookAhead $ pSatisfy + (\case + TagOpen _ attr' + -> case lookup "type" attr' <|> lookup "epub:type" attr' of + Just "footnote" -> True + Just "rearnote" -> True + _ -> False + _ -> False) let attr = toStringAttr attr' - guard $ maybe False (`elem` notes) - (lookup "type" attr <|> lookup "epub:type" attr) let ident = fromMaybe "" (lookup "id" attr) content <- pInTags tag block - addNote ident content + updateState $ \s -> + s {noteTable = (ident, content) : noteTable s} -addNote :: PandocMonad m => Text -> Blocks -> TagParser m () -addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s}) +eFootnotes :: PandocMonad m => TagParser m Blocks +eFootnotes = try $ do + let notes = ["footnotes", "rearnotes"] + guardEnabled Ext_epub_html_exts + (TagOpen tag attr') <- lookAhead pAny + let attr = toStringAttr attr' + guard $ maybe False (`elem` notes) + (lookup "type" attr <|> lookup "epub:type" attr) + updateState $ \s -> s{ inFootnotes = True } + result <- pInTags tag block + updateState $ \s -> s{ inFootnotes = False } + if null result + -- if it just contains notes, we don't need the container: + then return result + -- but there might be content other than notes, in which case + -- we want a div: + else return $ B.divWith (toAttr attr') result eNoteref :: PandocMonad m => TagParser m Inlines eNoteref = try $ do @@ -337,6 +360,7 @@ parseTypeAttr _ = DefaultStyle pOrderedList :: PandocMonad m => TagParser m Blocks pOrderedList = try $ do TagOpen _ attribs' <- pSatisfy (matchTagOpen "ol" []) + isNoteList <- inFootnotes <$> getState let attribs = toStringAttr attribs' let start = fromMaybe 1 $ lookup "start" attribs >>= safeRead let style = fromMaybe DefaultStyle @@ -352,8 +376,14 @@ pOrderedList = try $ do -- note: if they have an
    or