From d51416717064d61678bd07a40f271bbf0d971ff4 Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Wed, 1 Jun 2022 09:55:07 +0300 Subject: [PATCH 1/3] [EconomistWorldInBriefBridge] Add bridge --- bridges/EconomistWorldInBriefBridge.php | 144 ++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 bridges/EconomistWorldInBriefBridge.php diff --git a/bridges/EconomistWorldInBriefBridge.php b/bridges/EconomistWorldInBriefBridge.php new file mode 100644 index 00000000000..3a3b817432a --- /dev/null +++ b/bridges/EconomistWorldInBriefBridge.php @@ -0,0 +1,144 @@ + array( + 'splitGobbets' => array( + 'name' => 'Split the short stories', + 'required' => true, + 'type' => 'checkbox', + 'defaultValue' => false, + 'title' => 'Whether to split the short stories into separate entries' + ), + 'limit' => array( + 'name' => 'Truncate headers for the short stories', + 'type' => 'number', + 'defaultValue' => 100 + ), + 'agenda' => array( + 'name' => 'Add agenda for the day', + 'required' => true, + 'type' => 'checkbox', + 'defaultValue' => 'checked' + ), + 'agendaPictures' => array( + 'name' => 'Include pictures to the agenda', + 'required' => true, + 'type' => 'checkbox', + 'defaultValue' => 'checked' + ), + 'quote' => array( + 'name' => 'Include the quote of the day', + 'required' => true, + 'type' => 'checkbox' + ) + ) + ); + + public function collectData() + { + $html = getSimpleHTMLDOM(self::URI); + $gobbets = $html->find('._gobbets', 0); + if ($this->getInput('splitGobbets') == 1) { + $this->splitGobbets($gobbets); + } else { + $this->mergeGobbets($gobbets); + }; + if ($this->getInput('agenda') == 1) { + $articles = $html->find('._articles', 0); + $this->collectArticles($articles); + } + if ($this->getInput('quote') == 1) { + $quote = $html->find('._quote-container', 0); + $this->addQuote($quote); + } + } + + private function splitGobbets($gobbets) + { + $today = new Datetime(); + $today->setTime(0, 0, 0, 0); + $limit = $this->getInput('limit'); + foreach ($gobbets->find('._gobbet') as $gobbet) { + $title = $gobbet->plaintext; + $match = preg_match('/[\.,]/', $title, $matches, PREG_OFFSET_CAPTURE); + if ($match > 0) { + $point = $matches[0][1]; + $title = substr($title, 0, $point); + } + if ($limit && strlen($title) > $limit) { + $title = substr($title, 0, $limit) . '...'; + } + $item = array( + 'uri' => self::URI, + 'title' => $title, + 'content' => $gobbet->innertext, + 'timestamp' => $today->format('U'), + 'uid' => md5($gobbet->plaintext) + ); + $this->items[] = $item; + } + } + + private function mergeGobbets($gobbets) + { + $today = new Datetime(); + $today->setTime(0, 0, 0, 0); + $contents = ''; + foreach ($gobbets->find('._gobbet') as $gobbet) { + $contents .= "

{$gobbet->innertext}"; + } + $this->items[] = array( + 'uri' => self::URI, + 'title' => 'World in brief at ' . $today->format('Y.m.d'), + 'content' => $contents, + 'timestamp' => $today->format('U'), + 'uid' => 'world-at-frief-' . $today->format('U') + ); + } + + private function collectArticles($articles) + { + $i = 0; + $today = new Datetime(); + $today->setTime(0, 0, 0, 0); + foreach ($articles->find('._article') as $article) { + $title = $article->find('._headline', 0)->plaintext; + $image = $article->find('._main-image', 0); + $content = $article->find('._content', 0); + + $res_content = ''; + if ($image != null && $this->getInput('agendaPictures') == 1) { + $res_content .= $image->outertext; + } + $res_content .= $content->innertext; + $this->items[] = array( + 'uri' => self::URI, + 'title' => $title, + 'content' => $res_content, + 'timestamp' => $today->format('U'), + 'uid' => 'story-' . $today->format('U') . "{$i}", + ); + $i++; + } + } + + private function addQuote($quote) { + $today = new Datetime(); + $today->setTime(0, 0, 0, 0); + $this->items[] = array( + 'uri' => self::URI, + 'title' => 'Quote of the day ' . $today->format('Y.m.d'), + 'content' => $quote->innertext, + 'timestamp' => $today->format('U'), + 'uid' => 'quote-' . $today->format('U') + ); + } +} From 2dbce80c6842339a8266a152fea6bccc14b3c6f7 Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Sun, 5 Jun 2022 17:10:48 +0300 Subject: [PATCH 2/3] [EconomistWorldInBriefBridge] Fix PR --- bridges/EconomistWorldInBriefBridge.php | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/bridges/EconomistWorldInBriefBridge.php b/bridges/EconomistWorldInBriefBridge.php index 3a3b817432a..91d8e1d6b6c 100644 --- a/bridges/EconomistWorldInBriefBridge.php +++ b/bridges/EconomistWorldInBriefBridge.php @@ -12,7 +12,6 @@ class EconomistWorldInBriefBridge extends BridgeAbstract '' => array( 'splitGobbets' => array( 'name' => 'Split the short stories', - 'required' => true, 'type' => 'checkbox', 'defaultValue' => false, 'title' => 'Whether to split the short stories into separate entries' @@ -24,19 +23,16 @@ class EconomistWorldInBriefBridge extends BridgeAbstract ), 'agenda' => array( 'name' => 'Add agenda for the day', - 'required' => true, 'type' => 'checkbox', 'defaultValue' => 'checked' ), 'agendaPictures' => array( 'name' => 'Include pictures to the agenda', - 'required' => true, 'type' => 'checkbox', 'defaultValue' => 'checked' ), 'quote' => array( 'name' => 'Include the quote of the day', - 'required' => true, 'type' => 'checkbox' ) ) @@ -71,10 +67,10 @@ private function splitGobbets($gobbets) $match = preg_match('/[\.,]/', $title, $matches, PREG_OFFSET_CAPTURE); if ($match > 0) { $point = $matches[0][1]; - $title = substr($title, 0, $point); + $title = mb_substr($title, 0, $point); } - if ($limit && strlen($title) > $limit) { - $title = substr($title, 0, $limit) . '...'; + if ($limit && mb_strlen($title) > $limit) { + $title = mb_substr($title, 0, $limit) . '...'; } $item = array( 'uri' => self::URI, @@ -116,7 +112,8 @@ private function collectArticles($articles) $res_content = ''; if ($image != null && $this->getInput('agendaPictures') == 1) { - $res_content .= $image->outertext; + $img = $image->find('img', 0); + $res_content .= ''; } $res_content .= $content->innertext; $this->items[] = array( From 9f31d62788d90e410213da5ee52a8e1cbca91b93 Mon Sep 17 00:00:00 2001 From: SqrtMinusOne Date: Sun, 5 Jun 2022 17:23:06 +0300 Subject: [PATCH 3/3] [EconomistWorldInBriefBridge] Fix typo in UID --- bridges/EconomistWorldInBriefBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/EconomistWorldInBriefBridge.php b/bridges/EconomistWorldInBriefBridge.php index 91d8e1d6b6c..02407cf9563 100644 --- a/bridges/EconomistWorldInBriefBridge.php +++ b/bridges/EconomistWorldInBriefBridge.php @@ -96,7 +96,7 @@ private function mergeGobbets($gobbets) 'title' => 'World in brief at ' . $today->format('Y.m.d'), 'content' => $contents, 'timestamp' => $today->format('U'), - 'uid' => 'world-at-frief-' . $today->format('U') + 'uid' => 'world-in-brief-' . $today->format('U') ); }