Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EuronewsBridge] Add bridge #2786

Merged
merged 3 commits into from
Jun 7, 2022
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions bridges/EuronewsBridge.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
<?php
class EuronewsBridge extends BridgeAbstract
{
const MAINTAINER = 'sqrtminusone';
const NAME = 'Euronews Bridge';
const URI = 'https://www.euronews.com/';
const CACHE_TIMEOUT = 600; // 10 minutes
const DESCRIPTION = 'Return articles from the "Just In" feed of Euronews.';

const PARAMETERS = array(
'' => array(
'lang' => array(
'name' => 'Language',
'type' => 'list',
'defaultValue' => 'euronews.com',
'values' => array(
'English' => 'euronews.com',
'French' => 'fr.euronews.com',
'German' => 'de.euronews.com',
'Italian' => 'it.euronews.com',
'Spanish' => 'es.euronews.com',
'Portuguese' => 'pt.euronews.com',
'Russian' => 'ru.euronews.com',
'Turkish' => 'tr.euronews.com',
'Greek' => 'gr.euronews.com',
'Hungarian' => 'hu.euronews.com',
'Persian' => 'per.euronews.com',
'Arabic' => 'arabic.euronews.com',
/* These versions don't have timeline.json */
// 'Albanian' => 'euronews.al',
// 'Romanian' => 'euronews.ro',
// 'Georigian' => 'euronewsgeorgia.com',
// 'Bulgarian' => 'euronewsbulgaria.com'
// 'Serbian' => 'euronews.rs'
)
),
'limit' => array(
'name' => 'Limit of items per feed',
'required' => true,
'type' => 'number',
'defaultValue' => 10,
'title' => 'Maximum number of returned feed items. Maximum 50, default 10'
),
)
);

public function collectData()
{
$limit = $this->getInput('limit');
$root_url = 'https://' . $this->getInput('lang');
$url = $root_url . '/api/timeline.json?limit=' . $limit;
$json = getContents($url);
$data = json_decode($json, true);

foreach ($data as $datum) {
$datum_uri = $root_url . $datum['fullUrl'];
$url_datum = $this->getContent($datum_uri);
$categories = array();
if (array_key_exists('program', $datum)) {
if (array_key_exists('title', $datum['program'])) {
$categories[] = $datum['program']['title'];
}
}
if (array_key_exists('themes', $datum)) {
foreach ($datum['themes'] as $theme) {
$categories[] = $theme['title'];
}
}
$item = array(
'uri' => $datum_uri,
'title' => $datum['title'],
'uid' => strval($datum['id']),
'timestamp' => $datum['publishedAt'],
'content' => $url_datum['content'],
'author' => $url_datum['author'],
'enclosures' => $url_datum['enclosures'],
'categories' => array_unique($categories)
);
$this->items[] = $item;
}
}

private function getContent($url)
yamanq marked this conversation as resolved.
Show resolved Hide resolved
{
try {
$html = getSimpleHTMLDOMCached($url);
} catch (Exception $e) {
// Every once in a while it fails with too many redirects
yamanq marked this conversation as resolved.
Show resolved Hide resolved
return array('author' => null, 'content' => null, 'enclosures' => null);
}
$data = $html->find('script[type="application/ld+json"]', 0)->innertext;
$json = json_decode($data, true);
$author = null;
yamanq marked this conversation as resolved.
Show resolved Hide resolved
$content = '';
$enclosures = array();
if (array_key_exists('@graph', $json)) {
foreach ($json['@graph'] as $item) {
if ($item['@type'] == 'NewsArticle') {
if (array_key_exists('author', $item)) {
$author = $item['author']['name'];
}
if (array_key_exists('image', $item)) {
$content .= '<figure>';
$content .= '<img src="' . $item['image']['url'] . '">';
$content .= '<figcaption>' . $item['image']['caption'] . '</figcaption>';
$content .= '</figure><br>';
}
if (array_key_exists('video', $item)) {
$enclosures[] = $item['video']['contentUrl'];
}
}
}
}

// Normal article
$article_content = $html->find('.c-article-content', 0);
if ($article_content) {
// Usually the .c-article-content is the root of the
// content, but once in a blue moon the root is the second
// div
if ((count($article_content->children()) == 2)
&& ($article_content->children(1)->tag == 'div')
) {
$article_content = $article_content->children(1);
}
// The content is interspersed with links and stuff, so we
// iterate over the children
foreach ($article_content->children() as $element) {
if ($element->tag == 'p') {
$scribble_live = $element->find('#scribblelive-items', 0);
if (is_null($scribble_live)) {
// A normal pargraph
yamanq marked this conversation as resolved.
Show resolved Hide resolved
$content .= '<p>' . $element->innertext . '</p>';
} else {
// LIVE mode
foreach ($scribble_live->children() as $child) {
if ($child->tag == 'div') {
$content .= '<div>' . $child->innertext . '</div>';
}
}
}
} elseif (preg_match('/h[1-6]/', $element->tag)) {
// Header
$content .= '<h' . $element->tag[1] . '>' . $element->innertext . '</h' . $element->tag[1] . '>';
} elseif ($element->tag == 'div') {
if (preg_match('/.*widget--type-image.*/', $element->class)) {
// Image
$content .= '<figure>';
$content .= '<img src="' . $element->find('img', 0)->src . '">';
$caption = $element->find('figcaption', 0);
if ($caption) {
$content .= '<figcaption>' . $element->plaintext . '</figcaption>';
}
$content .= '</figure><br>';
} elseif (preg_match('/.*widget--type-quotation.*/', $element->class)) {
// Quotation
$quote = $element->find('.widget__quoteText', 0);
$author = $element->find('.widget__author', 0);
$content .= '<figure>';
$content .= '<blockquote>' . $quote->plaintext . '</blockquote>';
if ($author) {
$content .= '<figcaption>' . $author->plaintext . '</figcaption>';
}
$content .= '</figure><br>';
}
}
}
}

// Video article
if (is_null($article_content)) {
$image = $html->find('.c-article-media__img', 0);
if ($image) {
$content .= '<figure>';
$content .= '<img src="' . $image->src . '">';
$content .= '</figure><br>';
}

$description = $html->find('.m-object__description', 0);
if ($description) {
// In some editions the description is a link to the
// current page
$content .= '<div>' . $description->plaintext . '</div>';
}

// Euronews usually hosts videos on dailymotion...
$player_div = $html->find('.dmPlayer', 0);
if ($player_div) {
$video_id = $player_div->getAttribute('data-video-id');
$video_url = 'https://www.dailymotion.com/video/' . $video_id;
$content .= '<a href="' . $video_url . '">' . $video_url . '</a>';
}

// ...or on YouTube
$player_div = $html->find('.js-player-pfp', 0);
if ($player_div) {
$video_id = $player_div->getAttribute('data-video-id');
$video_url = 'https://www.youtube.com/watch?v=' . $video_id;
$content .= '<a href="' . $video_url . '">' . $video_url . '</a>';
}
}

return array(
'author' => $author,
'content' => $content,
'enclosures' => $enclosures
);
}
}