forked from RSS-Bridge/rss-bridge
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GuardianBridge] - New bridge for the Guardian (RSS-Bridge#1249)
* [GuardianBridge] - New bridge for the Guardian
- Loading branch information
Showing
1 changed file
with
96 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
<?php | ||
class TheGuardianBridge extends FeedExpander { | ||
const MAINTAINER = 'IceWreck'; | ||
const NAME = 'The Guardian Bridge'; | ||
const URI = 'https://www.theguardian.com/'; | ||
const CACHE_TIMEOUT = 600; // This is a news site, so don't cache for more than 10 mins | ||
const DESCRIPTION = 'RSS feed for The Guardian'; | ||
const PARAMETERS = array( array( | ||
'feed' => array( | ||
'name' => 'Feed', | ||
'type' => 'list', | ||
'values' => array( | ||
'World News' => 'world/rss', | ||
'US News' => '/us-news/rss', | ||
'UK News' => '/uk-news/rss', | ||
'Europe News' => '/world/europe-news/rss', | ||
'Asia News' => '/world/asia/rss', | ||
'Tech' => '/uk/technology/rss', | ||
'Business News' => '/uk/business/rss', | ||
'Opinion' => '/uk/commentisfree/rss', | ||
'Lifestyle' => '/uk/lifeandstyle/rss', | ||
'Culture' => '/uk/culture/rss', | ||
'Sports' => '/uk/sport/rss' | ||
) | ||
) | ||
|
||
/* | ||
Topicwise Links | ||
You can find the base feed for any topic by appending /rss to the url. | ||
Example: | ||
https://feeds.theguardian.com/theguardian/uk-news/rss | ||
https://feeds.theguardian.com/theguardian/us-news/rss | ||
Or simply | ||
https://www.theguardian.com/world/rss | ||
Just add that topic as a value in the PARAMETERS const. | ||
*/ | ||
|
||
|
||
)); | ||
|
||
public function collectData(){ | ||
$feed = $this->getInput('feed'); | ||
$feedURL = 'https://feeds.theguardian.com/theguardian/' . $feed; | ||
$this->collectExpandableDatas($feedURL, 10); | ||
} | ||
|
||
protected function parseItem($newsItem){ | ||
$item = parent::parseItem($newsItem); | ||
|
||
// --- Recovering the article --- | ||
|
||
// $articlePage gets the entire page's contents | ||
$articlePage = getSimpleHTMLDOM($newsItem->link); | ||
// figure contain's the main article image | ||
$article = $articlePage->find('figure', 0); | ||
// content__article-body has the actual article | ||
foreach($articlePage->find('.content__article-body') as $element) | ||
$article = $article . $element; | ||
|
||
// --- Fixing ugly elements --- | ||
|
||
// Replace the image viewer and BS with the image itself | ||
foreach($articlePage->find('a.article__img-container') as $uslElementLoc) { | ||
$main_img = $uslElementLoc->find('img', 0); | ||
$article = str_replace($uslElementLoc, $main_img, $article); | ||
} | ||
|
||
// List of all the crap in the article | ||
$uselessElements = array( | ||
'#show-caption', | ||
'.element-atom', | ||
'.submeta', | ||
'youtube-media-atom', | ||
'svg' | ||
); | ||
|
||
// Remove the listed crap | ||
foreach($uselessElements as $uslElement) { | ||
foreach($articlePage->find($uslElement) as $uslElementLoc) { | ||
$article = str_replace($uslElementLoc, '', $article); | ||
} | ||
} | ||
|
||
$item['content'] = $article; | ||
|
||
return $item; | ||
} | ||
} |