Skip to content

Commit

Permalink
Merge pull request #547 from flairNLP/fix-mirror
Browse files Browse the repository at this point in the history
Fix The Mirror
  • Loading branch information
addie9800 authored Jul 1, 2024
2 parents 9fe80b0 + 4896def commit bd39339
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/fundus/publishers/uk/the_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,13 @@

class TheMirrorParser(ParserProxy):
class V1(BaseParser):
_paragraph_selector = XPath("/html/body/main/article/div[2]/p[text()]")
_summary_selector = XPath("/html/body/main/article/div[1]/p")
_paragraph_selector = XPath(
"/html/body/main/article/div[@class='article-body']/p[text()] | //div[@class='article-body']//div[@class='live-event-lead-entry']/p[text()] | //div[@class='article-body']//div[@class='entry-content']/p[text()]"
)
_summary_selector = XPath("/html/body/main/article/div[@class='lead-content']/p")
_subheadline_selector = XPath(
"//div[@class='article-body']/h3 | //div[@class='article-body']//div[@class='entry-content']/h3"
)
_datetime_selector = XPath("//li/span[contains(@class, 'time-container')]")

@attribute
Expand All @@ -25,6 +30,7 @@ def body(self) -> ArticleBody:
self.precomputed.doc,
summary_selector=self._summary_selector,
paragraph_selector=self._paragraph_selector,
subheadline_selector=self._subheadline_selector,
)
return body

Expand Down

0 comments on commit bd39339

Please sign in to comment.