Skip to content

Commit

Permalink
HTML reader: better handle non-li elements in ul and ol.
Browse files Browse the repository at this point in the history
For example, a `p` after a closed `li` will be incorporated into
the previous `li`. This mirrors what browsers do with this
invalid HTML.

Closes #9809.
  • Loading branch information
jgm committed May 28, 2024
1 parent 04dba0d commit 29fa97a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 16 deletions.
39 changes: 23 additions & 16 deletions src/Text/Pandoc/Readers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -328,28 +328,32 @@ eTOC = try $ do
pBulletList :: PandocMonad m => TagParser m Blocks
pBulletList = try $ do
pSatisfy (matchTagOpen "ul" [])
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (matchTagClose "ul" t))
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pListItem nonItem) (pCloses "ul")
return $ B.bulletList $ map (fixPlains True) items

pListItem :: PandocMonad m => TagParser m a -> TagParser m Blocks
pListItem nonItem = setInListItem $ do
skipMany pBlank
orphans <- many (do notFollowedBy (pSatisfy (matchTagOpen "li" []))
notFollowedBy (pSatisfy isTagClose)
block) -- e.g. <ul>, see #9187
items <- manyTill pListItem (pCloses "ul")
let items' = case orphans of
[] -> items
xs -> mconcat xs : items
return $ B.bulletList $ map (fixPlains True) items'

pListItem :: PandocMonad m => TagParser m Blocks
pListItem = setInListItem $ do
TagOpen _ attr' <- lookAhead $ pSatisfy (matchTagOpen "li" [])
let attr = toStringAttr attr'
let addId ident bs = case B.toList bs of
(Plain ils:xs) -> B.fromList (Plain
[Span (ident, [], []) ils] : xs)
_ -> B.divWith (ident, [], []) bs
item <- pInTags "li" block
skipMany nonItem
skipMany pBlank
orphans <- many (do notFollowedBy (pSatisfy (matchTagOpen "li" []))
notFollowedBy (pSatisfy isTagClose)
block) -- e.g. <ul>, see #9187
skipMany pBlank
return $ maybe id addId (lookup "id" attr) $ item <> mconcat orphans

pCheckbox :: PandocMonad m => TagParser m Inlines
Expand Down Expand Up @@ -391,20 +395,23 @@ pOrderedList = try $ do
where
pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]

let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (matchTagClose "ol" t))
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
skipMany pBlank
orphans <- many (do notFollowedBy (pSatisfy (matchTagOpen "li" []))
notFollowedBy (pSatisfy isTagClose)
block) -- e.g. <ul>, see #9187
if isNoteList
then do
_ <- manyTill (eFootnote <|> pBlank) (pCloses "ol")
return mempty
else do
items <- manyTill (pListItem nonItem) (pCloses "ol")
items <- manyTill pListItem (pCloses "ol")
let items' = case orphans of
[] -> items
xs -> mconcat xs : items
return $ B.orderedListWith (start, style, DefaultDelim) $
map (fixPlains True) items
map (fixPlains True) items'

pDefinitionList :: PandocMonad m => TagParser m Blocks
pDefinitionList = try $ do
Expand Down
14 changes: 14 additions & 0 deletions test/command/9809.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
```
% pandoc -f html -t native
<ul>
<li>Bullet point.</li>
<p>Nested line.</p>
</ul>
^D
[ BulletList
[ [ Para [ Str "Bullet" , Space , Str "point." ]
, Para [ Str "Nested" , Space , Str "line." ]
]
]
]
```

0 comments on commit 29fa97a

Please sign in to comment.