From f7c71f64ee302a4855d2393c2e93a19ec2959fc2 Mon Sep 17 00:00:00 2001 From: Tobias Alexander Franke Date: Fri, 15 Mar 2019 14:18:53 +0100 Subject: [PATCH 1/5] [EconomistBridge] Added new bridge --- bridges/EconomistBridge.php | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 bridges/EconomistBridge.php diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php new file mode 100644 index 00000000000..dfbd84d5785 --- /dev/null +++ b/bridges/EconomistBridge.php @@ -0,0 +1,64 @@ +find('article') as $element) { + if($limit >= 10) + break; + + $a = $element->find('a', 0); + $href = self::URI . $a->href; + $full = getSimpleHTMLDOM($href); + $article = $full->find('article', 0); + + $header = $article->find('h1', 0); + $author = $article->find('span[itemprop="author"]', 0); + $time = $article->find('time[itemprop="dateCreated"]', 0); + $content = $article->find('div[itemprop="description"]', 0); + + // Remove newsletter subscription box + $newsletter = $content->find('div[class="newsletter-form__message"]', 0); + if ($newsletter) + $newsletter->outertext = ''; + + $newsletterForm = $content->find('form', 0); + if ($newsletterForm) + $newsletterForm->outertext = ''; + + // Remove next and previous article URLs at the bottom + $nextprev = $content->find('div[class="blog-post__next-previous-wrapper"]', 0); + if ($nextprev) + $nextprev->outertext = ''; + + $full->save(); + + $section = [ $article->find('h3[itemprop="articleSection"]', 0)->plaintext ]; + + $item = array(); + $item['title'] = $header->find('span', 0)->innertext . ': ' + . $header->find('span', 1)->innertext; + + $item['uri'] = $href; + $item['timestamp'] = strtotime($time->datetime); + $item['author'] = $author->innertext; + $item['categories'] = $section; + + $item['content'] = '' . $content->innertext; + + $limit++; + + $this->items[] = $item; + } + } +} From 61b01c681d5d4504c39877bd1c7beccbeb4ed6f1 Mon Sep 17 00:00:00 2001 From: Tobias Alexander Franke Date: Fri, 15 Mar 2019 14:28:30 +0100 Subject: [PATCH 2/5] [EconomistBridge] Use cached fetch for articles --- bridges/EconomistBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index dfbd84d5785..4ea5f1b6657 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -18,7 +18,7 @@ public function collectData() { $a = $element->find('a', 0); $href = self::URI . $a->href; - $full = getSimpleHTMLDOM($href); + $full = getSimpleHTMLDOMCached($href); $article = $full->find('article', 0); $header = $article->find('h1', 0); From ba71fe5613c41e8c24f0ab57c81aa7d09ecf2376 Mon Sep 17 00:00:00 2001 From: Tobias Alexander Franke Date: Sun, 17 Mar 2019 14:33:01 +0100 Subject: [PATCH 3/5] [EconomistBridge] Fix TravisCI issues --- bridges/EconomistBridge.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 4ea5f1b6657..38338583982 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -4,7 +4,7 @@ class EconomistBridge extends BridgeAbstract { const URI = 'https://www.economist.com'; const DESCRIPTION = 'Fetches the latest updates from the Economist.'; const MAINTAINER = 'thefranke'; - const CACHE_TIMEOUT = 60*60; // 1h + const CACHE_TIMEOUT = 60 * 60; // 1h public function collectData() { $html = getSimpleHTMLDOM(self::URI . '/latest/') @@ -13,7 +13,7 @@ public function collectData() { $limit = 0; foreach($html->find('article') as $element) { - if($limit >= 10) + if($limit >= 10) break; $a = $element->find('a', 0); @@ -25,12 +25,12 @@ public function collectData() { $author = $article->find('span[itemprop="author"]', 0); $time = $article->find('time[itemprop="dateCreated"]', 0); $content = $article->find('div[itemprop="description"]', 0); - + // Remove newsletter subscription box $newsletter = $content->find('div[class="newsletter-form__message"]', 0); if ($newsletter) $newsletter->outertext = ''; - + $newsletterForm = $content->find('form', 0); if ($newsletterForm) $newsletterForm->outertext = ''; @@ -43,7 +43,7 @@ public function collectData() { $full->save(); $section = [ $article->find('h3[itemprop="articleSection"]', 0)->plaintext ]; - + $item = array(); $item['title'] = $header->find('span', 0)->innertext . ': ' . $header->find('span', 1)->innertext; @@ -52,10 +52,10 @@ public function collectData() { $item['timestamp'] = strtotime($time->datetime); $item['author'] = $author->innertext; $item['categories'] = $section; - + $item['content'] = '' . $content->innertext; - + $limit++; $this->items[] = $item; From ddb79a860210da2d23993b8706a14565f948fd5b Mon Sep 17 00:00:00 2001 From: Tobias Alexander Franke Date: Sun, 17 Mar 2019 21:35:52 +0100 Subject: [PATCH 4/5] [EconomistBridge] Added PR suggestions --- bridges/EconomistBridge.php | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 38338583982..38427255847 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -4,17 +4,17 @@ class EconomistBridge extends BridgeAbstract { const URI = 'https://www.economist.com'; const DESCRIPTION = 'Fetches the latest updates from the Economist.'; const MAINTAINER = 'thefranke'; - const CACHE_TIMEOUT = 60 * 60; // 1h + const CACHE_TIMEOUT = 3600; // 1h + + public function getIcon() { + return 'https://www.economist.com/sites/default/files/econfinal_favicon.ico'; + } public function collectData() { $html = getSimpleHTMLDOM(self::URI . '/latest/') or returnServerError('Could not fetch latest updates form The Economist.'); - $limit = 0; - foreach($html->find('article') as $element) { - if($limit >= 10) - break; $a = $element->find('a', 0); $href = self::URI . $a->href; @@ -40,8 +40,6 @@ public function collectData() { if ($nextprev) $nextprev->outertext = ''; - $full->save(); - $section = [ $article->find('h3[itemprop="articleSection"]', 0)->plaintext ]; $item = array(); @@ -56,7 +54,8 @@ public function collectData() { $item['content'] = '' . $content->innertext; - $limit++; + if (count($this->items) >= 10) + break; $this->items[] = $item; } From b37d9bd5c39e38ae8b94a1938cf218440de6cbfd Mon Sep 17 00:00:00 2001 From: Tobias Alexander Franke Date: Fri, 29 Mar 2019 13:43:31 +0100 Subject: [PATCH 5/5] [EconomistBridge] Fix count of items --- bridges/EconomistBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 38427255847..1256be45504 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -54,10 +54,10 @@ public function collectData() { $item['content'] = '' . $content->innertext; + $this->items[] = $item; + if (count($this->items) >= 10) break; - - $this->items[] = $item; } } }