From 598ee5b51eaba62dc672f9e9f6f96ac628e56263 Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 14 Feb 2024 16:02:54 +0100 Subject: [PATCH 01/97] fix(pinterest): set enclosure so it emits mrss media:content prop (#3980) --- bridges/PinterestBridge.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index fc5b1c19795..8338fb25bf3 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -39,6 +39,9 @@ private function fixLowRes() $pattern = '/https\:\/\/i\.pinimg\.com\/[a-zA-Z0-9]*x\//'; foreach ($this->items as $item) { $item['content'] = preg_replace($pattern, 'https://i.pinimg.com/originals/', $item['content']); + $item['enclosures'] = [ + $item['uri'], + ]; $newitems[] = $item; } $this->items = $newitems; From 4d15ffd2cf44807210cb5d2783ce3bc5c8476275 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Fri, 16 Feb 2024 03:58:15 +0100 Subject: [PATCH 02/97] [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] (#3982) Exclude thread results Some categories showed some thread in the middle of the deals : now only the deals are handled Updated the "no results" text to follow the sites changes --- bridges/DealabsBridge.php | 2 +- bridges/HotUKDealsBridge.php | 2 +- bridges/MydealsBridge.php | 2 +- bridges/PepperBridgeAbstract.php | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bridges/DealabsBridge.php b/bridges/DealabsBridge.php index a5a3771b3f8..c65f0c75296 100644 --- a/bridges/DealabsBridge.php +++ b/bridges/DealabsBridge.php @@ -1913,7 +1913,7 @@ class DealabsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/bons-plans?merchant-id=', 'request-error' => 'Impossible de joindre Dealabs', 'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré', - 'no-results' => 'Il n'y a rien à afficher pour le moment :(', + 'no-results' => 'Aucun résultat', 'currency' => '€', 'relative-date-indicator' => [ 'il y a', diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index 44da417a1c8..1f059123b41 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -3277,7 +3277,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/deals?merchant-id=', 'request-error' => 'Could not request HotUKDeals', 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', - 'no-results' => 'Ooops, looks like we could', + 'no-results' => 'no results', 'currency' => '£', 'relative-date-indicator' => [ 'ago', diff --git a/bridges/MydealsBridge.php b/bridges/MydealsBridge.php index dda3d2a9076..08e32a0c370 100644 --- a/bridges/MydealsBridge.php +++ b/bridges/MydealsBridge.php @@ -2024,7 +2024,7 @@ class MydealsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/gutscheine?merchant-id=', 'request-error' => 'Could not request mydeals', 'thread-error' => 'Die ID der Diskussion kann nicht ermittelt werden. Überprüfen Sie die eingegebene URL', - 'no-results' => 'Ups, wir konnten nichts', + 'no-results' => 'keine Ergebnisse', 'currency' => '€', 'relative-date-indicator' => [ 'vor', diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 2516fc1ee33..d0e152384c7 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -44,7 +44,7 @@ protected function collectDataKeywords() protected function collectDeals($url) { $html = getSimpleHTMLDOM($url); - $list = $html->find('article[id]'); + $list = $html->find('article[id][class*=thread--deal]]'); // Deal Image Link CSS Selector $selectorImageLink = implode( @@ -109,9 +109,9 @@ protected function collectDeals($url) $item['content'] = '
' . $this->getImage($deal) - . '"/>' + . '' . $this->getHTMLTitle($item) . $this->getPrice($jsonDealData) . $this->getDiscount($jsonDealData) @@ -430,7 +430,7 @@ private function getImage($deal) { // Get thread Image JSON content $content = Json::decode($deal->find('div[class*=threadGrid-image]', 0)->find('div[class=js-vue2]', 0)->getAttribute('data-vue2')); - return $content['props']['threadImageUrl']; + return ''; } /** From 7813f4564e9c4f6054c827cead26fe6a6b2605c5 Mon Sep 17 00:00:00 2001 From: July Date: Thu, 15 Feb 2024 22:14:17 -0500 Subject: [PATCH 03/97] AO3Bridge: add options to fetch chapter contents and list titles (#3981) * AO3Bridge: add options to fetch chapter contents and titles for list feeds and add downloads for each fic to enclosures * AO3Bridge: fix list default value * AO3Bridge: fix erroneous dynamic property usage * AO3Bridge: fix unit test failure for getURI --- bridges/AO3Bridge.php | 110 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 91 insertions(+), 19 deletions(-) diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index e30c6b70c96..32bbb0a2418 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -12,8 +12,20 @@ class AO3Bridge extends BridgeAbstract 'url' => [ 'name' => 'url', 'required' => true, - // Example: F/F tag, complete works only - 'exampleValue' => 'https://archiveofourown.org/works?work_search[complete]=T&tag_id=F*s*F', + // Example: F/F tag + 'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works', + ], + 'range' => [ + 'name' => 'Chapter Content', + 'title' => 'Chapter(s) to include in each work\'s feed entry', + 'defaultValue' => null, + 'type' => 'list', + 'values' => [ + 'None' => null, + 'First' => 'first', + 'Latest' => 'last', + 'Entire work' => 'all', + ], ], ], 'Bookmarks' => [ @@ -39,18 +51,13 @@ public function collectData() { switch ($this->queriedContext) { case 'Bookmarks': - $user = $this->getInput('user'); - $this->title = $user; - $url = self::URI - . '/users/' . $user - . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; - $this->collectList($url); + $this->collectList($this->getURI()); break; case 'List': - $this->collectList($this->getInput('url')); + $this->collectList($this->getURI()); break; case 'Work': - $this->collectWork($this->getInput('id')); + $this->collectWork($this->getURI()); break; } } @@ -61,9 +68,21 @@ public function collectData() */ private function collectList($url) { - $html = getSimpleHTMLDOM($url); + $httpClient = RssBridge::getHttpClient(); + $version = 'v0.0.1'; + $agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"]; + + $response = $httpClient->request($url, $agent); + $html = \str_get_html($response->getBody()); $html = defaultLinkTo($html, self::URI); + // Get list title. Will include page range + count in some cases + $heading = ($html->find('#main > h2', 0)); + if ($heading->find('a.tag')) { + $heading = $heading->find('a.tag', 0); + } + $this->title = $heading->plaintext; + foreach ($html->find('.index.group > li') as $element) { $item = []; @@ -83,6 +102,36 @@ private function collectList($url) $chapters = (isset($chapters) ? $chapters->plaintext : 0); $item['uid'] = $item['uri'] . "/$strdate/$chapters"; + // Fetch workskin of desired chapter(s) in list + if ($this->getInput('range')) { + $url = $item['uri']; + switch ($this->getInput('range')) { + case ('all'): + $url .= '?view_full_work=true'; + break; + case ('first'): + break; + case ('last'): + // only way to get this is using the navigate page unfortunately + $url .= '/navigate'; + $response = $httpClient->request($url, $agent); + $html = \str_get_html($response->getBody()); + $html = defaultLinkTo($html, self::URI); + $url = $html->find('ol.index.group > li > a', -1)->href; + break; + } + $response = $httpClient->request($url, $agent); + $html = \str_get_html($response->getBody()); + $html = defaultLinkTo($html, self::URI); + $item['content'] .= $html->find('#workskin', 0); + } + + // Use predictability of download links to generate enclosures + $wid = explode('/', $item['uri'])[4]; + foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) { + $item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext; + } + $this->items[] = $item; } } @@ -90,26 +139,29 @@ private function collectList($url) /** * Feed for recent chapters of a specific work. */ - private function collectWork($id) + private function collectWork($url) { - $url = self::URI . "/works/$id/navigate"; $httpClient = RssBridge::getHttpClient(); - $version = 'v0.0.1'; - $response = $httpClient->request($url, [ - 'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)", - ]); + $agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"]; + $response = $httpClient->request($url . '/navigate', $agent); $html = \str_get_html($response->getBody()); $html = defaultLinkTo($html, self::URI); + $response = $httpClient->request($url . '?view_full_work=true', $agent); + $workhtml = \str_get_html($response->getBody()); + $workhtml = defaultLinkTo($workhtml, self::URI); + $this->title = $html->find('h2 a', 0)->plaintext; - foreach ($html->find('ol.index.group > li') as $element) { + $nav = $html->find('ol.index.group > li'); + for ($i = 0; $i < count($nav); $i++) { $item = []; + $element = $nav[$i]; $item['title'] = $element->find('a', 0)->plaintext; - $item['content'] = $element; + $item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0); $item['uri'] = $element->find('a', 0)->href; $strdate = $element->find('span.datetime', 0)->plaintext; @@ -138,4 +190,24 @@ public function getIcon() { return self::URI . '/favicon.ico'; } + + public function getURI() + { + $url = parent::getURI(); + switch ($this->queriedContext) { + case 'Bookmarks': + $user = $this->getInput('user'); + $url = self::URI + . '/users/' . $user + . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; + break; + case 'List': + $url = $this->getInput('url'); + break; + case 'Work': + $url = self::URI . '/works/' . $this->getInput('id'); + break; + } + return $url; + } } From e65155f440b9d66771fd6096f4c13b77323a6217 Mon Sep 17 00:00:00 2001 From: Korytov Pavel Date: Sat, 17 Feb 2024 00:24:13 +0300 Subject: [PATCH 04/97] [OpenCVEBridge] Add bridge (#3978) * [OpenCVEBridge] Add bridge * [OpenCVEBridge] Fix tests * [OpenCVEBridge] Fix description of the filter parameter --- bridges/OpenCVEBridge.php | 427 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) create mode 100644 bridges/OpenCVEBridge.php diff --git a/bridges/OpenCVEBridge.php b/bridges/OpenCVEBridge.php new file mode 100644 index 00000000000..594bb9ece3c --- /dev/null +++ b/bridges/OpenCVEBridge.php @@ -0,0 +1,427 @@ + [ + 'instance' => [ + 'name' => 'OpenCVE Instance', + 'required' => true, + 'defaultValue' => 'https://www.opencve.io', + 'exampleValue' => 'https://www.opencve.io' + ], + 'login' => [ + 'name' => 'Login', + 'type' => 'text', + 'required' => true + ], + 'password' => [ + 'name' => 'Password', + 'type' => 'text', + 'required' => true + ], + 'pages' => [ + 'name' => 'Number of pages', + 'type' => 'number', + 'required' => false, + 'exampleValue' => 1, + 'defaultValue' => 1 + ], + 'filter' => [ + 'name' => 'Filter', + 'type' => 'text', + 'required' => false, + 'exampleValue' => 'search:jenkins;product:gitlab,cvss:critical', + 'title' => 'Syntax: param1:value1,param2:value2;param1query2:param2query2. See https://docs.opencve.io/api/cve/ for parameters' + ], + 'upd_timestamp' => [ + 'name' => 'Use updated_at instead of created_at as timestamp', + 'type' => 'checkbox' + ], + 'trunc_summary' => [ + 'name' => 'Truncate summary for header', + 'type' => 'number', + 'defaultValue' => 100 + ], + 'fetch_contents' => [ + 'name' => 'Fetch detailed contents for CVEs', + 'defaultValue' => 'checked', + 'type' => 'checkbox' + ] + ] + ]; + + const CSS = ' + '; + + public function collectData() + { + $creds = $this->getInput('login') . ':' . $this->getInput('password'); + $authHeader = 'Authorization: Basic ' . base64_encode($creds); + $instance = $this->getInput('instance'); + + $queries = []; + $filter = $this->getInput('filter'); + $filterValues = []; + if ($filter && mb_strlen($filter) > 0) { + $filterValues = explode(';', $filter); + } else { + $queries[''] = []; + } + foreach ($filterValues as $filterValue) { + $params = explode(',', $filterValue); + $queryName = $filterValue; + $query = []; + foreach ($params as $param) { + [$key, $value] = explode(':', $param); + if ($key == 'title') { + $queryName = $value; + } else { + $query[$key] = $value; + } + } + $queries[$queryName] = $query; + } + + $fetchedIds = []; + + foreach ($queries as $queryName => $query) { + for ($i = 1; $i <= $this->getInput('pages'); $i++) { + $queryPaginated = array_merge($query, ['page' => $i]); + $url = $instance . '/api/cve?' . http_build_query($queryPaginated); + $response = getContents( + $url, + [$authHeader] + ); + $titlePrefix = ''; + if (count($queries) > 1) { + $titlePrefix = '[' . $queryName . '] '; + } + + foreach (json_decode($response) as $cveItem) { + if (array_key_exists($cveItem->id, $fetchedIds)) { + continue; + } + $fetchedIds[$cveItem->id] = true; + $item = [ + 'uri' => $instance . '/cve/' . $cveItem->id, + 'uid' => $cveItem->id, + ]; + if ($this->getInput('upd_timestamp') == 1) { + $item['timestamp'] = strtotime($cveItem->updated_at); + } else { + $item['timestamp'] = strtotime($cveItem->created_at); + } + if ($this->getInput('fetch_contents')) { + [$content, $title] = $this->fetchContents( + $cveItem, + $titlePrefix, + $instance, + $authHeader + ); + $item['content'] = $content; + $item['title'] = $title; + } else { + $item['content'] = $cveItem->summary . $this->getLinks($cveItem->id); + $item['title'] = $this->getTitle($titlePrefix, $cveItem); + } + $this->items[] = $item; + } + } + } + usort($this->items, function ($a, $b) { + return $b['timestamp'] - $a['timestamp']; + }); + } + + private function getTitle($titlePrefix, $cveItem) + { + $summary = $cveItem->summary; + $limit = $this->getInput('limit'); + if ($limit && mb_strlen($summary) > 100) { + $summary = mb_substr($summary, 0, $limit) + '...'; + } + return $titlePrefix . $cveItem->id . '. ' . $summary; + } + + private function fetchContents($cveItem, $titlePrefix, $instance, $authHeader) + { + $url = $instance . '/api/cve/' . $cveItem->id; + $response = getContents( + $url, + [$authHeader] + ); + $datum = json_decode($response); + + $title = $this->getTitleFromDatum($datum, $titlePrefix); + + $result = self::CSS; + $result .= '

' . $cveItem->id . '

'; + $result .= $this->getCVSSLabels($datum); + $result .= '

' . $datum->summary . '

'; + $result .= <<Information: +

+

    +
  • Publication date: {$datum->raw_nvd_data->published} +
  • Last modified: {$datum->raw_nvd_data->lastModified} +
  • Last modified: {$datum->raw_nvd_data->lastModified} +
+

+ EOD; + + $result .= $this->getV3Table($datum); + $result .= $this->getV2Table($datum); + + $result .= $this->getLinks($datum->id); + $result .= $this->getReferences($datum); + + $result .= $this->getVendors($datum); + + return [$result, $title]; + } + + private function getTitleFromDatum($datum, $titlePrefix) + { + $title = $titlePrefix; + if ($datum->cvss->v3) { + $title .= "[v3: {$datum->cvss->v3}] "; + } + if ($datum->cvss->v2) { + $title .= "[v2: {$datum->cvss->v2}] "; + } + $title .= $datum->id . '. '; + $titlePostfix = $datum->summary; + $limit = $this->getInput('limit'); + if ($limit && mb_strlen($titlePostfix) > 100) { + $titlePostfix = mb_substr($titlePostfix, 0, $limit) + '...'; + } + $title .= $titlePostfix; + return $title; + } + + private function getCVSSLabels($datum) + { + $CVSSv2Text = 'n/a'; + $CVSSv2Class = 'cvss-na-color'; + if ($datum->cvss->v2) { + $importance = ''; + if ($datum->cvss->v2 >= 7) { + $importance = 'HIGH'; + $CVSSv2Class = 'cvss-high-color'; + } else if ($datum->cvss->v2 >= 4) { + $importance = 'MEDIUM'; + $CVSSv2Class = 'cvss-medium-color'; + } else { + $importance = 'LOW'; + $CVSSv2Class = 'cvss-low-color'; + } + $CVSSv2Text = sprintf('[%s] %.1f', $importance, $datum->cvss->v2); + } + $CVSSv2Item = "
CVSS v2:
{$CVSSv2Text}
"; + + $CVSSv3Text = 'n/a'; + $CVSSv3Class = 'cvss-na-color'; + if ($datum->cvss->v3) { + $importance = ''; + if ($datum->cvss->v3 >= 9) { + $importance = 'CRITICAL'; + $CVSSv3Class = 'cvss-crit-color'; + } else if ($datum->cvss->v3 >= 7) { + $importance = 'HIGH'; + $CVSSv3Class = 'cvss-high-color'; + } else if ($datum->cvss->v3 >= 4) { + $importance = 'MEDIUM'; + $CVSSv3Class = 'cvss-medium-color'; + } else { + $importance = 'LOW'; + $CVSSv3Class = 'cvss-low-color'; + } + $CVSSv3Text = sprintf('[%s] %.1f', $importance, $datum->cvss->v3); + } + $CVSSv3Item = "
CVSS v3:
{$CVSSv3Text}
"; + return '
' . $CVSSv3Item . $CVSSv2Item . '
'; + } + + private function getReferences($datum) + { + if (count($datum->raw_nvd_data->references) == 0) { + return ''; + } + $res = '

References:

    '; + foreach ($datum->raw_nvd_data->references as $ref) { + $item = '
  • '; + if (isset($ref->tags) && count($ref->tags) > 0) { + $item .= '[' . implode(', ', $ref->tags) . '] '; + } + $item .= "url}\">{$ref->url}"; + $item .= '
  • '; + $res .= $item; + } + $res .= '

'; + return $res; + } + + private function getLinks($id) + { + return <<Links +

+

+

+ EOD; + } + + private function getV3Table($datum) + { + $metrics = $datum->raw_nvd_data->metrics; + if (!isset($metrics->cvssMetricV31) || count($metrics->cvssMetricV31) == 0) { + return ''; + } + $v3 = $metrics->cvssMetricV31[0]; + $data = $v3->cvssData; + return << +

CVSS v3 details

+ + + + + + + + + + + + + + + + + + + + + +
Impact score{$v3->impactScore}Exploitability score{$v3->exploitabilityScore}
Attack vector{$data->attackVector}Confidentiality Impact{$data->confidentialityImpact}
Attack complexity{$data->attackComplexity}Integrity Impact{$data->integrityImpact}
Privileges Required{$data->privilegesRequired}Availability Impact{$data->availabilityImpact}
User Interaction{$data->userInteraction}Scope{$data->scope}
+ + EOD; + } + + private function getV2Table($datum) + { + $metrics = $datum->raw_nvd_data->metrics; + if (!isset($metrics->cvssMetricV2) || count($metrics->cvssMetricV2) == 0) { + return ''; + } + $v2 = $metrics->cvssMetricV2[0]; + $data = $v2->cvssData; + return << +

CVSS v2 details

+ + + + + + + + + + + + + + + + + + +
Impact score{$v2->impactScore}Exploitability score{$v2->exploitabilityScore}
Access Vector{$data->accessVector}Confidentiality Impact{$data->confidentialityImpact}
Access Complexity{$data->accessComplexity}Integrity Impact{$data->integrityImpact}
Authentication{$data->authentication}Availability Impact{$data->availabilityImpact}
+ + EOD; + } + + private function getVendors($datum) + { + if (count((array)$datum->vendors) == 0) { + return ''; + } + $res = '

Affected products

    '; + foreach ($datum->vendors as $vendor => $products) { + $res .= "
  • {$vendor}"; + if (count($products) > 0) { + $res .= '
      '; + foreach ($products as $product) { + $res .= '
    • ' . $product . '
    • '; + } + $res .= '
    '; + } + $res .= '
  • '; + } + $res .= '

'; + } +} From 932f20d4341344807d6ae25d53ba811ba1e4280e Mon Sep 17 00:00:00 2001 From: hleskien <34342248+hleskien@users.noreply.github.com> Date: Sun, 18 Feb 2024 19:19:33 +0100 Subject: [PATCH 05/97] fixed date with time in LuftfahrtBundesAmtBridge (#3987) --- bridges/LuftfahrtBundesAmtBridge.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bridges/LuftfahrtBundesAmtBridge.php b/bridges/LuftfahrtBundesAmtBridge.php index 2b0384a21c2..406d2476450 100644 --- a/bridges/LuftfahrtBundesAmtBridge.php +++ b/bridges/LuftfahrtBundesAmtBridge.php @@ -26,8 +26,13 @@ protected function provideFeedIcon(\DOMXPath $xpath) protected function formatItemTimestamp($value) { $value = trim($value); - $dti = DateTimeImmutable::createFromFormat('d.m.Y', $value); - $dti = $dti->setTime(0, 0, 0); + if (strpos($value, 'Uhr') !== false) { + $value = str_replace(' Uhr', '', $value); + $dti = DateTimeImmutable::createFromFormat('d.m.Y G:i', $value); + } else { + $dti = DateTimeImmutable::createFromFormat('d.m.Y', $value); + $dti = $dti->setTime(0, 0); + } return $dti->getTimestamp(); } From 35f6e62e458c88f47bc857a3a186cf24edcae502 Mon Sep 17 00:00:00 2001 From: xduugu Date: Tue, 20 Feb 2024 07:03:04 +0000 Subject: [PATCH 06/97] docker: Use pre-built curl-impersonate library from github releases (#3984) The docker image is only available for `amd64` architecture and therefore cannot be used for arm images. Fixes #3983 --- Dockerfile | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2f1f4f3d93a..1326dba0dbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,3 @@ -FROM lwthiker/curl-impersonate:0.5-ff-slim-buster AS curlimpersonate - FROM debian:12-slim AS rssbridge LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one." @@ -7,7 +5,8 @@ LABEL repository="https://github.com/RSS-Bridge/rss-bridge" LABEL website="https://github.com/RSS-Bridge/rss-bridge" ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update && \ +RUN set -xe && \ + apt-get update && \ apt-get install --yes --no-install-recommends \ ca-certificates \ nginx \ @@ -24,18 +23,44 @@ RUN apt-get update && \ php-xml \ php-zip \ # php-zlib is enabled by default with PHP 8.2 in Debian 12 + # for downloading libcurl-impersonate + curl \ && \ + # install curl-impersonate library + curlimpersonate_version=0.6.0 && \ + { \ + { \ + [ $(arch) = 'aarch64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \ + sha512sum="d04b1eabe71f3af06aa1ce99b39a49c5e1d33b636acedcd9fad163bc58156af5c3eb3f75aa706f335515791f7b9c7a6c40ffdfa47430796483ecef929abd905d" \ + ; } \ + || { \ + [ $(arch) = 'armv7l' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \ + sha512sum="05906b4efa1a6ed8f3b716fd83d476b6eea6bfc68e3dbc5212d65a2962dcaa7bd1f938c9096a7535252b11d1d08fb93adccc633585ff8cb8cec5e58bfe969bc9" \ + ; } \ + || { \ + [ $(arch) = 'x86_64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \ + sha512sum="480bbe9452cd9aff2c0daaaf91f1057b3a96385f79011628a9237223757a9b0d090c59cb5982dc54ea0d07191657299ea91ca170a25ced3d7d410fcdff130ace" \ + ; } \ + } && \ + curl -LO "https://github.com/lwthiker/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \ + echo "$sha512sum $archive" | sha512sum -c - && \ + mkdir -p /usr/local/lib/curl-impersonate && \ + tar xaf "$archive" -C /usr/local/lib/curl-impersonate --wildcards 'libcurl-impersonate-ff.so*' && \ + rm "$archive" && \ + apt-get purge --assume-yes curl && \ rm -rf /var/lib/apt/lists/* +ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so +ENV CURL_IMPERSONATE ff91esr + # logs should go to stdout / stderr RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \ ln -sfT /dev/stdout /var/log/nginx/access.log; \ chown -R --no-dereference www-data:adm /var/log/nginx/ -COPY --from=curlimpersonate /usr/local/lib/libcurl-impersonate-ff.so /usr/local/lib/curl-impersonate/ -ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so -ENV CURL_IMPERSONATE ff91esr - COPY ./config/nginx.conf /etc/nginx/sites-available/default COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini From 4c355ba3083ded7a2d621baa42c1233a46cd51dd Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 20 Feb 2024 19:32:31 +0100 Subject: [PATCH 07/97] fix(FilterBridge): trim title so that regex filter works as expected (#3989) The fix is in FeedParser, so this fixes all usages of FeedParser where title is now trimmed. fix #3985 --- lib/FeedParser.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 37d3005bbca..b774cc14e23 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -92,7 +92,7 @@ public function parseAtomItem(\SimpleXMLElement $feedItem): array $item['uri'] = (string)$feedItem->id; } if (isset($feedItem->title)) { - $item['title'] = html_entity_decode((string)$feedItem->title); + $item['title'] = trim(html_entity_decode((string)$feedItem->title)); } if (isset($feedItem->updated)) { $item['timestamp'] = strtotime((string)$feedItem->updated); @@ -154,7 +154,7 @@ public function parseRss2Item(\SimpleXMLElement $feedItem): array $item['uri'] = (string)$feedItem->link; } if (isset($feedItem->title)) { - $item['title'] = html_entity_decode((string)$feedItem->title); + $item['title'] = trim(html_entity_decode((string)$feedItem->title)); } if (isset($feedItem->description)) { $item['content'] = (string)$feedItem->description; From 683c968d646dbaf9719a1ef7b59797240fec7617 Mon Sep 17 00:00:00 2001 From: D5k H3h <85834680+dhuschde@users.noreply.github.com> Date: Fri, 1 Mar 2024 20:24:14 +0100 Subject: [PATCH 08/97] [Rooster Teeth] Add Camp Camp channel (#3992) --- bridges/RoosterTeethBridge.php | 1 + 1 file changed, 1 insertion(+) diff --git a/bridges/RoosterTeethBridge.php b/bridges/RoosterTeethBridge.php index 21bac4fec50..464c83a8c9b 100644 --- a/bridges/RoosterTeethBridge.php +++ b/bridges/RoosterTeethBridge.php @@ -17,6 +17,7 @@ class RoosterTeethBridge extends BridgeAbstract 'values' => [ 'All channels' => 'all', 'Achievement Hunter' => 'achievement-hunter', + 'Camp Camp' => 'camp-camp', 'Cow Chop' => 'cow-chop', 'Death Battle' => 'death-battle', 'Friends of RT' => 'friends-of-rt', From 7a7f8d5050177dfc4eac763feb257745ce39c238 Mon Sep 17 00:00:00 2001 From: July Date: Tue, 5 Mar 2024 19:28:24 -0500 Subject: [PATCH 09/97] AnnasArchiveBridge: correctly handling partial matches and file links (#3997) --- bridges/AnnasArchiveBridge.php | 40 +++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/bridges/AnnasArchiveBridge.php b/bridges/AnnasArchiveBridge.php index acb943b497b..b857fadfc80 100644 --- a/bridges/AnnasArchiveBridge.php +++ b/bridges/AnnasArchiveBridge.php @@ -126,30 +126,36 @@ public function collectData() return; } - $elements = $list->find('.w-full > .mb-4 > div > a'); + $elements = $list->find('.w-full > .mb-4 > div'); foreach ($elements as $element) { - $item = []; - $item['title'] = $element->find('h3', 0)->plaintext; - $item['author'] = $element->find('div.italic', 0)->plaintext; - $item['uri'] = $element->href; - $item['content'] = $element->plaintext; - $item['uid'] = $item['uri']; + // stop added entries once partial match list starts + if (str_contains($element->innertext, 'partial match')) { + break; + } + if ($element = $element->find('a', 0)) { + $item = []; + $item['title'] = $element->find('h3', 0)->plaintext; + $item['author'] = $element->find('div.italic', 0)->plaintext; + $item['uri'] = $element->href; + $item['content'] = $element->plaintext; + $item['uid'] = $item['uri']; - $item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20); - if ($item_html) { - $item_html = defaultLinkTo($item_html, self::URI); - $item['content'] .= $item_html->find('main img', 0); - $item['content'] .= $item_html->find('main .mt-4', 0); // Summary - if ($links = $item_html->find('main ul.mb-4', -1)) { - foreach ($links->find('li > a.js-download-link') as $file) { - $item['enclosures'][] = $file->href; + $item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20); + if ($item_html) { + $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] .= $item_html->find('main img', 0); + $item['content'] .= $item_html->find('main .mt-4', 0); // Summary + foreach ($item_html->find('main ul.mb-4 > li > a.js-download-link') as $file) { + if (!str_contains($file->href, 'fast_download')) { + $item['enclosures'][] = $file->href; + } } // Remove bulk torrents from enclosures list $item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']); } - } - $this->items[] = $item; + $this->items[] = $item; + } } } From f7c1b7193961b957dfa0fe192b4a95d09ad9b1c6 Mon Sep 17 00:00:00 2001 From: July Date: Wed, 6 Mar 2024 13:40:59 -0500 Subject: [PATCH 10/97] NyaaTorrentsBridge: add torrent to enclosures and generate better feed name (#3996) * NyaaTorrentsBridge: add torrent to enclosures and generate better feed name * NyaaTorrentsBridge: fix accidental () in bridge name --- bridges/NyaaTorrentsBridge.php | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index fcf2b1975c6..36708411410 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -66,22 +66,20 @@ public function collectData() $feed = $feedParser->parseFeed(getContents($this->getURI())); foreach ($feed['items'] as $item) { - $item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']); - $item['uri'] = str_replace('/download/', '/view/', $item['uri']); + $item['enclosures'] = [$item['uri']]; $item['uri'] = str_replace('.torrent', '', $item['uri']); + $item['uri'] = str_replace('/download/', '/view/', $item['uri']); + $item['id'] = str_replace('https://nyaa.si/view/', '', $item['uri']); $dom = getSimpleHTMLDOMCached($item['uri']); if ($dom) { $description = $dom->find('#torrent-description', 0)->innertext ?? ''; - $itemDom = str_get_html(markdownToHtml(html_entity_decode($description))); - $item_image = $this->getURI() . 'static/img/avatar/default.png'; - foreach ($itemDom->find('img') as $img) { - if (strpos($img->src, 'prez') === false) { - $item_image = $img->src; - break; - } - } - $item['enclosures'] = [$item_image]; - $item['content'] = (string) $itemDom; + $item['content'] = markdownToHtml(html_entity_decode($description)); + + $magnet = $dom->find('div.panel-footer.clearfix > a', 1)->href; + // can't put raw magnet link in enclosure, this gives information on + // magnet contents and works a way to sent magnet value + $magnet = 'https://torrent.parts/#' . html_entity_decode($magnet); + array_push($item['enclosures'], $magnet); } $this->items[] = $item; if (count($this->items) >= 10) { @@ -90,6 +88,15 @@ public function collectData() } } + public function getName() + { + $name = parent::getName(); + $name .= $this->getInput('u') ? ' - ' . $this->getInput('u') : ''; + $name .= $this->getInput('q') ? ' - ' . $this->getInput('q') : ''; + $name .= $this->getInput('c') ? ' (' . $this->getKey('c') . ')' : ''; + return $name; + } + public function getIcon() { return self::URI . 'static/favicon.png'; From 79699131e87e831fc5fa4fb785c210f5396e2bbd Mon Sep 17 00:00:00 2001 From: tillcash Date: Fri, 8 Mar 2024 17:16:32 +0530 Subject: [PATCH 11/97] [MaalaimalarBridge] New Bridge (#4001) --- bridges/MaalaimalarBridge.php | 117 ++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 bridges/MaalaimalarBridge.php diff --git a/bridges/MaalaimalarBridge.php b/bridges/MaalaimalarBridge.php new file mode 100644 index 00000000000..87f85694cd0 --- /dev/null +++ b/bridges/MaalaimalarBridge.php @@ -0,0 +1,117 @@ + [ + 'name' => 'topic', + 'type' => 'list', + 'values' => [ + 'news' => [ + 'tamilnadu' => 'news/state', + 'puducherry' => 'puducherry', + 'india' => 'news/national', + 'world' => 'news/world', + ], + 'district' => [ + 'chennai' => 'chennai', + 'ariyalur' => 'ariyalur', + 'chengalpattu' => 'chengalpattu', + 'coimbatore' => 'coimbatore', + 'cuddalore' => 'cuddalore', + 'dharmapuri' => 'dharmapuri', + 'dindugal' => 'dindugal', + 'erode' => 'erode', + 'kaanchepuram' => 'kaanchepuram', + 'kallakurichi' => 'kallakurichi', + 'kanyakumari' => 'kanyakumari', + 'karur' => 'karur', + 'krishnagiri' => 'krishnagiri', + 'madurai' => 'madurai', + 'mayiladuthurai' => 'mayiladuthurai', + 'nagapattinam' => 'nagapattinam', + 'namakal' => 'namakal', + 'nilgiris' => 'nilgiris', + 'perambalur' => 'perambalur', + 'pudukottai' => 'pudukottai', + 'ramanathapuram' => 'ramanathapuram', + 'ranipettai' => 'ranipettai', + 'salem' => 'salem', + 'sivagangai' => 'sivagangai', + 'tanjore' => 'tanjore', + 'theni' => 'theni', + 'thenkasi' => 'thenkasi', + 'thiruchirapalli' => 'thiruchirapalli', + 'thirunelveli' => 'thirunelveli', + 'thirupathur' => 'thirupathur', + 'thiruvarur' => 'thiruvarur', + 'thoothukudi' => 'thoothukudi', + 'tirupur' => 'tirupur', + 'tiruvallur' => 'tiruvallur', + 'tiruvannamalai' => 'tiruvannamalai', + 'vellore' => 'vellore', + 'villupuram' => 'villupuram', + 'virudhunagar' => 'virudhunagar', + ], + 'cinema' => [ + 'news' => 'cinema/cinemanews', + 'gossip' => 'cinema/gossip', + ], + ], + ], + ], + ]; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ' - ' . ucfirst($topic) : ''); + } + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI . $this->getInput('topic')); + $articles = $dom->find('div.mb-20.infinite-card-wrapper.white-section'); + + foreach ($articles as $article) { + $titleElement = $article->find('h2.title a', 0); + if (!$titleElement) { + continue; + } + + $dateElement = $article->find('time.h-date span', 0); + $date = $dateElement ? $dateElement->{'data-datestring'} . 'UTC' : ''; + + $content = $this->constructContent($article); + + $this->items[] = [ + 'content' => $content, + 'timestamp' => $date, + 'title' => $titleElement->plaintext, + 'uid' => $titleElement->href, + 'uri' => self::URI . $titleElement->href, + ]; + } + } + + private function constructContent($article) + { + $content = ''; + $imageElement = $article->find('div.ignore-autoplay img', 0); + if ($imageElement) { + $content .= '

'; + } + + $storyElement = $article->find('div.story-content', 0); + if ($storyElement) { + $content .= $storyElement->innertext; + } + + return $content; + } +} From 84b93e0f8f67dd69a242e3ecb1cfd743b5e167e4 Mon Sep 17 00:00:00 2001 From: Jonathan Kay Date: Sun, 10 Mar 2024 10:18:50 -0400 Subject: [PATCH 12/97] [ComicsKingdomBridge] Fix/Rewrite of ComicsKingdom Bridge (#4003) * Rewrite ComicsKingdom Bridge Rewrite of bridge as the existing one no longer works: - Now uses REST API - Added optional limit to get desired number of comics - Author now reflects the comic creators name - Feed name and comic titles now pulled from site - Added myself as the maintainer as I've been the one maintaining, and the existing code no longer is used * Change API to URI to pass test * Remove whitespace, add curly braces and switch to single quotes --- bridges/ComicsKingdomBridge.php | 66 ++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/bridges/ComicsKingdomBridge.php b/bridges/ComicsKingdomBridge.php index 8baf75118af..227426c4edb 100644 --- a/bridges/ComicsKingdomBridge.php +++ b/bridges/ComicsKingdomBridge.php @@ -2,59 +2,65 @@ class ComicsKingdomBridge extends BridgeAbstract { - const MAINTAINER = 'stjohnjohnson'; + const MAINTAINER = 'TReKiE'; + // const MAINTAINER = 'stjohnjohnson'; const NAME = 'Comics Kingdom Unofficial RSS'; - const URI = 'https://comicskingdom.com/'; + const URI = 'https://wp.comicskingdom.com/wp-json/wp/v2/ck_comic'; const CACHE_TIMEOUT = 21600; // 6h const DESCRIPTION = 'Comics Kingdom Unofficial RSS'; const PARAMETERS = [ [ 'comicname' => [ - 'name' => 'comicname', + 'name' => 'Name of comic', 'type' => 'text', 'exampleValue' => 'mutts', 'title' => 'The name of the comic in the URL after https://comicskingdom.com/', 'required' => true + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'The number of recent comics to get', + 'defaultValue' => 10 ] ]]; + protected $comicName; + public function collectData() { - $html = getSimpleHTMLDOM($this->getURI(), [], [], true, false); + $json = getContents($this->getURI()); + $data = json_decode($json, false); - // Get author from first page - $author = $html->find('div.author p', 0); - ; + if (isset($data[0]->_embedded->{'wp:term'}[0][0])) { + $this->comicName = $data[0]->_embedded->{'wp:term'}[0][0]->name; + } - // Get current date/link - $link = $html->find('meta[property=og:url]', -1)->content; - for ($i = 0; $i < 3; $i++) { + foreach ($data as $comicitem) { $item = []; - $page = getSimpleHTMLDOM($link); - - $imagelink = $page->find('meta[property=og:image]', 0)->content; - - $date = explode('/', $link); - - $item['id'] = $imagelink; - $item['uri'] = $link; - $item['author'] = $author; - $item['title'] = 'Comics Kingdom ' . $this->getInput('comicname'); - $item['timestamp'] = DateTime::createFromFormat('Y-m-d', $date[count($date) - 1])->getTimestamp(); - $item['content'] = ''; - + $item['id'] = $comicitem->id; + $item['uri'] = $comicitem->yoast_head_json->og_url; + $item['author'] = str_ireplace('By ', '', $comicitem->ck_comic_byline); + $item['title'] = $comicitem->yoast_head_json->title; + $item['timestamp'] = $comicitem->date; + $item['content'] = ''; $this->items[] = $item; - $link = $page->find('div.comic-viewer-inline a', 0)->href; - if (empty($link)) { - break; // allow bridge to continue if there's less than 3 comics - } } } public function getURI() { if (!is_null($this->getInput('comicname'))) { - return self::URI . urlencode($this->getInput('comicname')); + $params = [ + 'ck_feature' => $this->getInput('comicname'), + 'per_page' => $this->getInput('limit'), + 'date_inclusive' => 'true', + 'order' => 'desc', + 'page' => '1', + '_embed' => 'true' + ]; + + return self::URI . '?' . http_build_query($params); } return parent::getURI(); @@ -62,8 +68,8 @@ public function getURI() public function getName() { - if (!is_null($this->getInput('comicname'))) { - return $this->getInput('comicname') . ' - Comics Kingdom'; + if ($this->comicName) { + return $this->comicName . ' - Comics Kingdom'; } return parent::getName(); From 254efc281255a5d3308d33007e09ffd3bc2df49e Mon Sep 17 00:00:00 2001 From: Mynacol Date: Sun, 10 Mar 2024 22:21:10 +0100 Subject: [PATCH 13/97] [ZeitBridge] Remove doubled text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first two paragraphs were repeated at the end of articles. The first CSS selector filters those out (example 1). The second CSS selector removes a "Zum Anschauen benötigen wir Ihre Zustimmung" line from a poll widget. We can't load the widget successfully, therefore we should remove all embeds that seem to use javascript (example 2). 1: https://www.zeit.de/campus/2024-03/bundesregierung-wissenschaft-arbeitsvertrag-regeln 2: https://www.zeit.de/campus/2024-03/ausbildung-abgebrochen-gruende-azubi-aufruf --- bridges/ZeitBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/ZeitBridge.php b/bridges/ZeitBridge.php index 0ed9276bdc0..b9806e5ab45 100644 --- a/bridges/ZeitBridge.php +++ b/bridges/ZeitBridge.php @@ -87,7 +87,7 @@ private function parseArticle($item, $article) // remove known bad elements foreach ( $article->find( - 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast' + 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, div[data-paywall], .js-embed-consent' ) as $bad ) { $bad->remove(); From ecf61f6fa777fcaeb6ce0bdf1c82cdcbc89e74a1 Mon Sep 17 00:00:00 2001 From: tillcash Date: Tue, 12 Mar 2024 00:44:10 +0530 Subject: [PATCH 14/97] [DailythanthiBridge] New Bridge (#4006) --- bridges/DailythanthiBridge.php | 96 ++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 bridges/DailythanthiBridge.php diff --git a/bridges/DailythanthiBridge.php b/bridges/DailythanthiBridge.php new file mode 100644 index 00000000000..114f42d824a --- /dev/null +++ b/bridges/DailythanthiBridge.php @@ -0,0 +1,96 @@ + [ + 'name' => 'topic', + 'type' => 'list', + 'values' => [ + 'news' => [ + 'tamilnadu' => 'news/state', + 'india' => 'news/india', + 'world' => 'news/world', + 'sirappu-katturaigal' => 'news/sirappukatturaigal', + ], + 'cinema' => [ + 'news' => 'cinema/cinemanews', + ], + 'sports' => [ + 'sports' => 'sports', + 'cricket' => 'sports/cricket', + 'football' => 'sports/football', + 'tennis' => 'sports/tennis', + 'hockey' => 'sports/hockey', + 'other-sports' => 'sports/othersports', + ], + 'devotional' => [ + 'devotional' => 'others/devotional', + 'aalaya-varalaru' => 'aalaya-varalaru', + ], + ], + ], + ], + ]; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ' - ' . ucfirst($topic) : ''); + } + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI . $this->getInput('topic')); + + foreach ($dom->find('div.ListingNewsWithMEDImage') as $element) { + $slug = $element->find('a', 1); + $title = $element->find('h3', 0); + if (!$slug || !$title) { + continue; + } + + $url = self::URI . $slug->href; + $date = $element->find('span', 1); + $date = $date ? $date->{'data-datestring'} : ''; + + $this->items[] = [ + 'content' => $this->constructContent($url), + 'timestamp' => $date ? $date . 'UTC' : '', + 'title' => $title->plaintext, + 'uid' => $slug->href, + 'uri' => $url, + ]; + } + } + + private function constructContent($url) + { + $dom = getSimpleHTMLDOMCached($url); + + $article = $dom->find('div.details-content-story', 0); + if (!$article) { + return 'Content Not Found'; + } + + // Remove ads + foreach ($article->find('div[id*="_ad"]') as $remove) { + $remove->outertext = ''; + } + + // Correct image tag in $article + foreach ($article->find('h-img') as $img) { + $img->parent->outertext = sprintf('

', $img->src); + } + + $image = $dom->find('div.main-image-caption-container img', 0); + $image = $image ? '

' . $image->outertext . '

' : ''; + + return $image . $article; + } +} From 5b80af978fdd0a7ea566d9a69d669e0b22b0378a Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 12 Mar 2024 19:46:21 +0100 Subject: [PATCH 15/97] docs: improve README (#4009) --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f8d08058ddd..6124a4eac96 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ These instructions have been tested on a fresh Debian 12 VM from Digital Ocean ( ```shell timedatectl set-timezone Europe/Oslo -apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl +apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl # Create a new user account useradd --shell /bin/bash --create-home rss-bridge @@ -167,12 +167,10 @@ Restart fpm and nginx: ```shell # Lint and restart php-fpm -php-fpm8.2 -t -systemctl restart php8.2-fpm +php-fpm8.2 -t && systemctl restart php8.2-fpm # Lint and restart nginx -nginx -t -systemctl restart nginx +nginx -t && systemctl restart nginx ``` ### How to install from Composer From 4bad1c140a25d8ef8577d0fa7b0a60e27a5d7649 Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 12 Mar 2024 23:59:10 +0100 Subject: [PATCH 16/97] fix(reddit): url encoding (#4010) --- bridges/RedditBridge.php | 55 ++++++++++++++++-------------- tests/BridgeImplementationTest.php | 23 ------------- tests/RedditBridgeTest.php | 33 ++++++++++++++++++ 3 files changed, 62 insertions(+), 49 deletions(-) create mode 100644 tests/RedditBridgeTest.php diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index 9c72f9963f8..e2f79b11586 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -139,36 +139,13 @@ private function collectDataInternal(): void break; } - if (!($this->getInput('search') === '')) { - $keywords = $this->getInput('search'); - $keywords = str_replace([',', ' '], '%20', $keywords); - $keywords = $keywords . '%20'; - } else { - $keywords = ''; - } - - if (!empty($this->getInput('f')) && $this->queriedContext == 'single') { - $flair = $this->getInput('f'); - $flair = str_replace(' ', '%20', $flair); - $flair = 'flair%3A%22' . $flair . '%22%20'; - } else { - $flair = ''; - } + $search = $this->getInput('search'); + $flareInput = $this->getInput('f'); foreach ($subreddits as $subreddit) { - $name = trim($subreddit); - $url = self::URI - . '/search.json?q=' - . $keywords - . $flair - . ($user ? 'author%3A' : 'subreddit%3A') - . $name - . '&sort=' - . $this->getInput('d') - . '&include_over_18=on'; - $version = 'v0.0.1'; $useragent = "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"; + $url = self::createUrl($search, $flareInput, $subreddit, $user, $section, $this->queriedContext); $json = getContents($url, ['User-Agent: ' . $useragent]); $parsedJson = Json::decode($json, false); @@ -278,6 +255,32 @@ private function collectDataInternal(): void }); } + public static function createUrl($search, $flareInput, $subreddit, bool $user, $section, $queriedContext): string + { + if ($search === '') { + $keywords = ''; + } else { + $keywords = $search; + $keywords = str_replace([',', ' '], ' ', $keywords); + $keywords = $keywords . ' '; + } + + if ($flareInput && $queriedContext == 'single') { + $flair = $flareInput; + $flair = str_replace([',', ' '], ' ', $flair); + $flair = 'flair:"' . $flair . '" '; + } else { + $flair = ''; + } + $name = trim($subreddit); + $query = [ + 'q' => $keywords . $flair . ($user ? 'author:' : 'subreddit:') . $name, + 'sort' => $section, + 'include_over_18' => 'on', + ]; + return 'https://old.reddit.com/search.json?' . http_build_query($query); + } + public function getIcon() { return 'https://www.redditstatic.com/desktop2x/img/favicon/favicon-96x96.png'; diff --git a/tests/BridgeImplementationTest.php b/tests/BridgeImplementationTest.php index d2f74931dd0..dd68934edc9 100644 --- a/tests/BridgeImplementationTest.php +++ b/tests/BridgeImplementationTest.php @@ -157,29 +157,6 @@ public function testParameters($path) } } - /** - * @dataProvider dataBridgesProvider - */ - public function testVisibleMethods($path) - { - $bridgeAbstractMethods = get_class_methods(BridgeAbstract::class); - sort($bridgeAbstractMethods); - $feedExpanderMethods = get_class_methods(FeedExpander::class); - sort($feedExpanderMethods); - - $this->setBridge($path); - - $publicMethods = get_class_methods($this->bridge); - sort($publicMethods); - foreach ($publicMethods as $publicMethod) { - if ($this->bridge instanceof FeedExpander) { - $this->assertContains($publicMethod, $feedExpanderMethods); - } else { - $this->assertContains($publicMethod, $bridgeAbstractMethods); - } - } - } - /** * @dataProvider dataBridgesProvider */ diff --git a/tests/RedditBridgeTest.php b/tests/RedditBridgeTest.php new file mode 100644 index 00000000000..17a62e68874 --- /dev/null +++ b/tests/RedditBridgeTest.php @@ -0,0 +1,33 @@ +assertSame($expected, $actual); + + // https://old.reddit.com/search.json?q=author:RavenousRandy&sort=hot&include_over_18=on + $expected = 'https://old.reddit.com/search.json?q=author%3ARavenousRandy&sort=hot&include_over_18=on'; + $actual = RedditBridge::createUrl('', '', 'RavenousRandy', true, 'hot', 'user'); + $this->assertSame($expected, $actual); + + // https://old.reddit.com/search.json?q=cats dogs hen flair:"Proxy" subreddit:php&sort=hot&include_over_18=on + $expected = 'https://old.reddit.com/search.json?q=cats+dogs+hen+flair%3A%22Proxy%22+subreddit%3Aphp&sort=hot&include_over_18=on'; + $actual = RedditBridge::createUrl('cats,dogs hen', 'Proxy', 'php', false, 'hot', 'single'); + $this->assertSame($expected, $actual); + + // https://old.reddit.com/search.json?q=cats dogs hen flair:"Proxy Linux Server" subreddit:php&sort=hot&include_over_18=on + $expected = 'https://old.reddit.com/search.json?q=cats+dogs+hen+flair%3A%22Proxy+Linux+Server%22+subreddit%3Aphp&sort=hot&include_over_18=on'; + $actual = RedditBridge::createUrl('cats,dogs hen', 'Proxy,Linux Server', 'php', false, 'hot', 'single'); + $this->assertSame($expected, $actual); + } +} From e6cb5fdc89f2ad4137657cccea79fbdc23bb37cb Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Wed, 13 Mar 2024 23:47:46 +0100 Subject: [PATCH 17/97] [IdealoBridge] Fix Feed items & Feed title customisation (#4013) - Feed items with new price tracking had "Max Price Used" instead of "Max Price New" - Feed Title is now customised with the product name and the Price limits - Fixed logic for saving prices in cache - remove undefined variable notices --- bridges/IdealoBridge.php | 83 +++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php index cef2b812165..fe13e13dd52 100644 --- a/bridges/IdealoBridge.php +++ b/bridges/IdealoBridge.php @@ -40,6 +40,47 @@ public function getIcon() return 'https://cdn.idealo.com/storage/ids-assets/ico/favicon.ico'; } + /** + * Returns the RSS Feed title when a RSS feed is rendered + * @return string the RSS feed Title + */ + private function getFeedTitle() + { + $cacheDuration = 604800; + $link = $this->getInput('Link'); + $keyTITLE = $link . 'TITLE'; + $product = $this->loadCacheValue($keyTITLE, $cacheDuration); + + // The cache does not contain the title of the bridge, we must get it and save it in the cache + if ($product === null) { + $header = [ + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15' + ]; + $html = getSimpleHTMLDOM($link, $header); + $product = $html->find('.oopStage-title', 0)->find('span', 0)->plaintext; + $this->saveCacheValue($keyTITLE, $product); + } + + $MaxPriceUsed = $this->getInput('MaxPriceUsed'); + $MaxPriceNew = $this->getInput('MaxPriceNew'); + $titleParts = []; + + $titleParts[] = $product; + + // Add Max Prices to the title + if ($MaxPriceUsed !== null) { + $titleParts[] = 'Max Price Used : ' . $MaxPriceUsed . '€'; + } + if ($MaxPriceNew !== null) { + $titleParts[] = 'Max Price New : ' . $MaxPriceNew . '€'; + } + + $title = implode(' ', $titleParts); + + + return $title . ' - ' . $this::NAME; + } + public function collectData() { // Needs header with user-agent to function properly. @@ -69,12 +110,16 @@ public function collectData() $FirstButton = $html->find('.oopStage-conditionButton-wrapper-text', 0); if ($FirstButton) { $PriceNew = $FirstButton->find('strong', 0)->plaintext; + // Save current price + $this->saveCacheValue($KeyNEW, $PriceNew); } // Second Button is used $SecondButton = $html->find('.oopStage-conditionButton-wrapper-text', 1); if ($SecondButton) { $PriceUsed = $SecondButton->find('strong', 0)->plaintext; + // Save current price + $this->saveCacheValue($KeyUSED, $PriceUsed); } // Only continue if a price has changed @@ -83,16 +128,16 @@ public function collectData() $image = $html->find('.datasheet-cover-image', 0)->src; // Generate Content - if ($PriceNew > 1) { + if (isset($PriceNew) && $PriceNew > 1) { $content = "

Price New:
$PriceNew

"; $content .= "

Price New before:
$OldPriceNew

"; } if ($this->getInput('MaxPriceNew') != '') { - $content .= sprintf('

Max Price Used:
%s,00 €

', $this->getInput('MaxPriceNew')); + $content .= sprintf('

Max Price New:
%s,00 €

', $this->getInput('MaxPriceNew')); } - if ($PriceUsed > 1) { + if (isset($PriceUsed) && $PriceUsed > 1) { $content .= "

Price Used:
$PriceUsed

"; $content .= "

Price Used before:
$OldPriceUsed

"; } @@ -110,8 +155,8 @@ public function collectData() // Currently under Max new price if ($this->getInput('MaxPriceNew') != '') { - if ($PriceNew < $this->getInput('MaxPriceNew')) { - $title = sprintf($Pricealarm, 'Used', $PriceNew, $Productname, $now); + if (isset($PriceNew) && $PriceNew < $this->getInput('MaxPriceNew')) { + $title = sprintf($Pricealarm, 'New', $PriceNew, $Productname, $now); $item = [ 'title' => $title, 'uri' => $link, @@ -124,7 +169,7 @@ public function collectData() // Currently under Max used price if ($this->getInput('MaxPriceUsed') != '') { - if ($PriceUsed < $this->getInput('MaxPriceUsed')) { + if (isset($PriceUsed) && $PriceUsed < $this->getInput('MaxPriceUsed')) { $title = sprintf($Pricealarm, 'Used', $PriceUsed, $Productname, $now); $item = [ 'title' => $title, @@ -143,23 +188,23 @@ public function collectData() (!$this->getInput('ExcludeNew') && $PriceNew != $OldPriceNew ) || (!$this->getInput('ExcludeUsed') && $PriceUsed != $OldPriceUsed ) ) { - $title .= 'Priceupdate! '; + $title = 'Priceupdate! '; if (!$this->getInput('ExcludeNew')) { - if ($PriceNew < $OldPriceNew) { + if (isset($PriceNew) && $PriceNew < $OldPriceNew) { $title .= 'NEW:⬇ '; // Arrow Down Emoji } - if ($PriceNew > $OldPriceNew) { + if (isset($PriceNew) && $PriceNew > $OldPriceNew) { $title .= 'NEW:⬆ '; // Arrow Up Emoji } } if (!$this->getInput('ExcludeUsed')) { - if ($PriceUsed < $OldPriceUsed) { + if (isset($PriceUsed) && $PriceUsed < $OldPriceUsed) { $title .= 'USED:⬇ '; // Arrow Down Emoji } - if ($PriceUsed > $OldPriceUsed) { + if (isset($PriceUsed) && $PriceUsed > $OldPriceUsed) { $title .= 'USED:⬆ '; // Arrow Up Emoji } } @@ -177,9 +222,19 @@ public function collectData() } } } + } - // Save current price - $this->saveCacheValue($KeyNEW, $PriceNew); - $this->saveCacheValue($KeyUSED, $PriceUsed); + /** + * Returns the RSS Feed title according to the parameters + * @return string the RSS feed Tile + */ + public function getName() + { + switch ($this->queriedContext) { + case '0': + return $this->getFeedTitle(); + default: + return parent::getName(); + } } } From 36147a082d33faeef6e42fa103a3283cc5cf5dc3 Mon Sep 17 00:00:00 2001 From: Tim-Florian Feulner <50834839+R3dError@users.noreply.github.com> Date: Fri, 15 Mar 2024 19:20:04 +0100 Subject: [PATCH 18/97] Fix NACSouthGermanyMediaLibraryBridge for new website layout (#4014) --- bridges/NACSouthGermanyMediaLibraryBridge.php | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/bridges/NACSouthGermanyMediaLibraryBridge.php b/bridges/NACSouthGermanyMediaLibraryBridge.php index fff6c554e0b..030ded8f867 100644 --- a/bridges/NACSouthGermanyMediaLibraryBridge.php +++ b/bridges/NACSouthGermanyMediaLibraryBridge.php @@ -31,7 +31,7 @@ class NACSouthGermanyMediaLibraryBridge extends BridgeAbstract public function getIcon() { - return 'https://www.nak-stuttgart.de/static/themes/nak_sued/images/nak-logo.png'; + return 'https://nak-sued.de/static/themes/sued/images/logo.png'; } private static function parseTimestamp($title) @@ -66,9 +66,12 @@ private static function collectDataForSWR1($parent, $item) private static function collectDataForBayern2($parent, $item) { # Find link - $playerDom = getSimpleHTMLDOMCached(self::BASE_URI . $parent->find('a', 0)->href); - $sourceURI = $playerDom->find('source', 0)->src; - $item['enclosures'] = [self::BASE_URI . $sourceURI]; + $relativeURICode = $parent->find('a', 0)->onclick; + if (preg_match('/window\.open\(\'([^\']*)\'/', $relativeURICode, $matches)) { + $playerDom = getSimpleHTMLDOMCached(self::BASE_URI . $matches[1]); + $sourceURI = $playerDom->find('source', 0)->src; + $item['enclosures'] = [self::BASE_URI . $sourceURI]; + } # Add time to timestamp $item['timestamp'] .= ' 06:45'; @@ -78,14 +81,14 @@ private static function collectDataForBayern2($parent, $item) private function collectDataInList($pageURI, $customizeItemCall) { - $page = getSimpleHTMLDOM(self::BASE_URI . $pageURI); + $page = getSimpleHTMLDOM($pageURI); - foreach ($page->find('div.grids') as $parent) { + foreach ($page->find('div.flex-columns.entry') as $parent) { # Find title $title = $parent->find('h2', 0)->plaintext; # Find content - $contentBlock = $parent->find('ul.contentlist', 0); + $contentBlock = $parent->find('ul', 0); $content = ''; foreach ($contentBlock->find('li') as $li) { $content .= '

' . $li->plaintext . '

'; @@ -103,7 +106,7 @@ private function collectDataInList($pageURI, $customizeItemCall) private function collectDataFromAllPages($rootURI, $customizeItemCall) { $rootPage = getSimpleHTMLDOM($rootURI); - $pages = $rootPage->find('div#tabmenu', 0); + $pages = $rootPage->find('div.flex-columns.inner_filter', 0); foreach ($pages->find('a') as $page) { self::collectDataInList($page->href, [$this, $customizeItemCall]); } From a61524bf776fb29aaf30451c972f1ddb90920fdf Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Sun, 17 Mar 2024 19:02:51 +0100 Subject: [PATCH 19/97] Update RedditBridge.php (#4019) prevent error htmlspecialchars_decode(): Passing null to parameter #1 --- bridges/RedditBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index e2f79b11586..fbc6f67891c 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -189,7 +189,7 @@ private function collectDataInternal(): void // Comment $item['content'] = htmlspecialchars_decode($data->body_html); - } elseif ($data->is_self) { + } elseif ($data->is_self && isset($data->selftext_html)) { // Text post $item['content'] = htmlspecialchars_decode($data->selftext_html); From 58e2b56d40e0cae1cd3689f8c946650e5caba4cf Mon Sep 17 00:00:00 2001 From: Patrick Date: Sun, 17 Mar 2024 19:03:09 +0100 Subject: [PATCH 20/97] Adjustment to new website layout (#4020) --- bridges/JohannesBlickBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/JohannesBlickBridge.php b/bridges/JohannesBlickBridge.php index 6c00fecaba1..72583a53e22 100644 --- a/bridges/JohannesBlickBridge.php +++ b/bridges/JohannesBlickBridge.php @@ -13,7 +13,7 @@ public function collectData() or returnServerError('Could not request: ' . self::URI); $html = defaultLinkTo($html, self::URI); - foreach ($html->find('td > a') as $index => $a) { + foreach ($html->find('ul[class=easyfolderlisting] > li > a') as $index => $a) { $item = []; // Create an empty item $articlePath = $a->href; $item['title'] = $a->innertext; From 3ed193eee2e873496ca3635561c61e0e8ba49edd Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Fri, 22 Mar 2024 09:44:42 +0100 Subject: [PATCH 21/97] [IdealoBridge] Update Bridge Meta data & (#4022) The bridge meta data has been updated to reflect that the bridge works for other international version of Idealo. The Price trend is displayed on every price in the the Feed element content. The same function is now used to show the price trend in the Feed element title, to remove some duplicate code.. --- bridges/IdealoBridge.php | 46 ++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php index fe13e13dd52..4eb66dcb5d1 100644 --- a/bridges/IdealoBridge.php +++ b/bridges/IdealoBridge.php @@ -2,15 +2,15 @@ class IdealoBridge extends BridgeAbstract { - const NAME = 'Idealo.de Bridge'; + const NAME = 'idealo.de / idealo.fr / idealo.es Bridge'; const URI = 'https://www.idealo.de'; - const DESCRIPTION = 'Tracks the price for a product on idealo.de. Pricealarm if specific price is set'; + const DESCRIPTION = 'Tracks the price for a product on idealo.de / idealo.fr / idealo.es. Pricealarm if specific price is set'; const MAINTAINER = 'SebLaus'; const CACHE_TIMEOUT = 60 * 30; // 30 min const PARAMETERS = [ [ 'Link' => [ - 'name' => 'Idealo.de Link to productpage', + 'name' => 'idealo.de / idealo.fr / idealo.es Link to productpage', 'required' => true, 'exampleValue' => 'https://www.idealo.de/preisvergleich/OffersOfProduct/202007367_-s7-pro-ultra-roborock.html' ], @@ -81,6 +81,25 @@ private function getFeedTitle() return $title . ' - ' . $this::NAME; } + + /** + * Returns the Price Trend emoji + * @return string the Price Trend Emoji + */ + private function getPriceTrend($NewPrice, $OldPrice) + { + // In case there is no old PRice, then show no trend + if ($OldPrice === null) { + $trend = ''; + } else if ($NewPrice > $OldPrice) { + $trend = '↗'; + } else if ($NewPrice == $OldPrice) { + $trend = '➡'; + } else if ($NewPrice < $OldPrice) { + $trend = '↘'; + } + return $trend; + } public function collectData() { // Needs header with user-agent to function properly. @@ -127,9 +146,11 @@ public function collectData() // Get Product Image $image = $html->find('.datasheet-cover-image', 0)->src; + $content = ''; + // Generate Content if (isset($PriceNew) && $PriceNew > 1) { - $content = "

Price New:
$PriceNew

"; + $content .= sprintf('

Price New:
%s %s

', $PriceNew, $this->getPriceTrend($PriceNew, $OldPriceNew)); $content .= "

Price New before:
$OldPriceNew

"; } @@ -138,7 +159,7 @@ public function collectData() } if (isset($PriceUsed) && $PriceUsed > 1) { - $content .= "

Price Used:
$PriceUsed

"; + $content .= sprintf('

Price Used:
%s %s

', $PriceUsed, $this->getPriceTrend($PriceUsed, $OldPriceUsed)); $content .= "

Price Used before:
$OldPriceUsed

"; } @@ -191,22 +212,11 @@ public function collectData() $title = 'Priceupdate! '; if (!$this->getInput('ExcludeNew')) { - if (isset($PriceNew) && $PriceNew < $OldPriceNew) { - $title .= 'NEW:⬇ '; // Arrow Down Emoji - } - if (isset($PriceNew) && $PriceNew > $OldPriceNew) { - $title .= 'NEW:⬆ '; // Arrow Up Emoji - } + $title .= 'NEW' . $this->getPriceTrend($PriceNew, $OldPriceNew) . ' '; } - if (!$this->getInput('ExcludeUsed')) { - if (isset($PriceUsed) && $PriceUsed < $OldPriceUsed) { - $title .= 'USED:⬇ '; // Arrow Down Emoji - } - if (isset($PriceUsed) && $PriceUsed > $OldPriceUsed) { - $title .= 'USED:⬆ '; // Arrow Up Emoji - } + $title .= 'USED' . $this->getPriceTrend($PriceUsed, $OldPriceUsed) . ' '; } $title .= $Productname; $title .= ' '; From 2aace6c898c2d0f473b0a2071bf3e2d7e1f8e4d4 Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Fri, 22 Mar 2024 21:01:16 +0100 Subject: [PATCH 22/97] Added Bridge for Anisearch.de (#4023) * Create AnisearchBridge.php * Update AnisearchBridge.php * Update AnisearchBridge.php --- bridges/AnisearchBridge.php | 54 +++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 bridges/AnisearchBridge.php diff --git a/bridges/AnisearchBridge.php b/bridges/AnisearchBridge.php new file mode 100644 index 00000000000..639d143feb5 --- /dev/null +++ b/bridges/AnisearchBridge.php @@ -0,0 +1,54 @@ + [ + 'name' => 'Dub', + 'type' => 'list', + 'values' => [ + 'DE' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=de&sort=date&order=desc&view=4', + 'EN' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=en&sort=date&order=desc&view=4', + 'JP' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=ja&sort=date&order=desc&view=4' + ] + ] + ]]; + + public function collectData() + { + $baseurl = 'https://www.anisearch.de/'; + $limit = 10; + $dom = getSimpleHTMLDOM($this->getInput('category')); + foreach ($dom->find('li.btype0') as $key => $li) { + if ($key > $limit) { + break; + } + + $a = $li->find('a', 0); + $title = $a->find('span.title', 0); + $url = $baseurl . $a->href; + + //get article + $domarticle = getSimpleHTMLDOM($url); + $content = $domarticle->find('div.details-text', 0); + + //get header-image and set absolute src + $headerimage = $domarticle->find('img#details-cover', 0); + $src = $headerimage->src; + + $this->items[] = [ + 'title' => $title->plaintext, + 'uri' => $url, + 'content' => $headerimage . '
' . $content + ]; + } + } +} From fee5e269d0763aade289677d6685ebd8bd45c542 Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Sun, 24 Mar 2024 16:38:51 +0100 Subject: [PATCH 23/97] Update CaschyBridge.php (#4027) without removing the video-container-div the embedded youtube videos work again --- bridges/CaschyBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/CaschyBridge.php b/bridges/CaschyBridge.php index 0e3a07bc723..c25cdb08d42 100644 --- a/bridges/CaschyBridge.php +++ b/bridges/CaschyBridge.php @@ -54,7 +54,7 @@ private function addArticleToItem($item, $article) { // remove unwanted stuff foreach ( - $article->find('div.video-container, div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content, + $article->find('div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content, div.wp-embed, p.wp-caption-text, script') as $element ) { $element->remove(); From 0c2099a8529faad54d848dd73cfc6bd839abb5ba Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:41:56 +0100 Subject: [PATCH 24/97] [GolemBridge] fixed embedded youtube videos (#4033) * [GolemBridge] fixed embedded youtube videos embedded youtube-videos can be played directly from feed now * Update GolemBridge.php * Update GolemBridge.php * Update GolemBridge.php * Update GolemBridge.php --- bridges/GolemBridge.php | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index 599d713a0ee..216e913f0f2 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -106,10 +106,23 @@ private function extractContent($page) $article = $page->find('article', 0); + //built youtube iframes + foreach ($article->find('.embedcontent') as &$embedcontent) { + $ytscript = $embedcontent->find('script', 0); + if (preg_match('/www.youtube.com.*?\"/', $ytscript->innertext, $link)) { + $link = 'https://' . str_replace('\\', '', $link[0]); + $embedcontent->innertext .= <<'; + EOT; + } + } + // delete known bad elements foreach ( $article->find('div[id*="adtile"], #job-market, #seminars, iframe, - div.gbox_affiliate, div.toc, .embedcontent, script') as $bad + div.gbox_affiliate, div.toc') as $bad ) { $bad->remove(); } @@ -129,7 +142,7 @@ private function extractContent($page) $img->src = $img->getAttribute('data-src-full'); } - foreach ($content->find('p, h1, h2, h3, img[src*="."]') as $element) { + foreach ($content->find('p, h1, h2, h3, img[src*="."], iframe') as $element) { $item .= $element; } From e251e358ff757023f0e0863d47d1781c5196964c Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:42:41 +0100 Subject: [PATCH 25/97] [HeiseBridge] fix for embedded youtube-videos (#4034) * [HeiseBridge] fix for embbedded youtube-videos with this the embedded youtube videos will work in the feed * Update HeiseBridge.php * Update HeiseBridge.php --- bridges/HeiseBridge.php | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 504bcfb52e7..ab40e6d9338 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -160,7 +160,7 @@ private function addArticleToItem($item, $article) $article = defaultLinkTo($article, $item['uri']); // remove unwanted stuff - foreach ($article->find('figure.branding, a-ad, div.ho-text, a-img, .opt-in__content-container, .a-toc__list, a-collapse') as $element) { + foreach ($article->find('figure.branding, a-ad, div.ho-text, a-img, .a-toc__list, a-collapse, .opt-in__description, .opt-in__footnote') as $element) { $element->remove(); } // reload html, as remove() is buggy @@ -179,6 +179,30 @@ private function addArticleToItem($item, $article) } } + //fix for embbedded youtube-videos + $oldlink = ''; + foreach ($article->find('div.video__yt-container') as &$ytvideo) { + if (preg_match('/www.youtube.*?\"/', $ytvideo->innertext, $link) && $link[0] != $oldlink) { + //save link to prevent duplicates + $oldlink = $link[0]; + $ytiframe = << + EOT; + //check if video is in header or article for correct possitioning + if (strpos($header->innertext, $link[0])) { + $item['content'] .= $ytiframe; + } else { + $ytvideo->innertext .= $ytiframe; + $reloadneeded = 1; + } + } + } + if (isset($reloadneeded)) { + $article = str_get_html($article->outertext); + } + $categories = $article->find('.article-footer__topics ul.topics li.topics__item a-topic a'); foreach ($categories as $category) { $item['categories'][] = trim($category->plaintext); From db984d8a8b98a985fe9a272794fa20081908ad13 Mon Sep 17 00:00:00 2001 From: July Date: Thu, 28 Mar 2024 14:43:17 -0400 Subject: [PATCH 26/97] AO3Bridge: move tags to categories and remove duplicate fic summary (#4031) * AO3Bridge: move tags to categories and remove duplicate fic summary * [AO3Bridge] Fix tag html entity encoding --- bridges/AO3Bridge.php | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 32bbb0a2418..85f0f9f8269 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -91,12 +91,26 @@ private function collectList($url) continue; // discard deleted works } $item['title'] = $title->plaintext; - $item['content'] = $element; $item['uri'] = $title->href; $strdate = $element->find('div p.datetime', 0)->plaintext; $item['timestamp'] = strtotime($strdate); + // detach from rest of page because remove() is buggy + $element = str_get_html($element->outertext()); + $tags = $element->find('ul.required-tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + $tags = $element->find('ul.tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + + $item['content'] = implode('', $element->childNodes()); + $chapters = $element->find('dl dd.chapters', 0); // bookmarked series and external works do not have a chapters count $chapters = (isset($chapters) ? $chapters->plaintext : 0); @@ -123,6 +137,10 @@ private function collectList($url) $response = $httpClient->request($url, $agent); $html = \str_get_html($response->getBody()); $html = defaultLinkTo($html, self::URI); + // remove duplicate fic summary + if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) { + $ficsum->remove(); + } $item['content'] .= $html->find('#workskin', 0); } From be445759b631ea1ec954d70e8e1f30cfb9271f2b Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Thu, 28 Mar 2024 19:44:27 +0100 Subject: [PATCH 27/97] [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Move as much as possible to JSON (#4032) As the website use more and more JSON, and JSON is a machine readable format, I migrated as much as possible to the JSON. This simplifies the Abstract class a lot, and the Bridge classes need less language specifi strings. --- bridges/DealabsBridge.php | 42 +---- bridges/HotUKDealsBridge.php | 51 +----- bridges/MydealsBridge.php | 50 +----- bridges/PepperBridgeAbstract.php | 259 ++++++++----------------------- 4 files changed, 69 insertions(+), 333 deletions(-) diff --git a/bridges/DealabsBridge.php b/bridges/DealabsBridge.php index c65f0c75296..62d854f6235 100644 --- a/bridges/DealabsBridge.php +++ b/bridges/DealabsBridge.php @@ -1915,9 +1915,6 @@ class DealabsBridge extends PepperBridgeAbstract 'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré', 'no-results' => 'Aucun résultat', 'currency' => '€', - 'relative-date-indicator' => [ - 'il y a', - ], 'price' => 'Prix', 'shipping' => 'Livraison', 'origin' => 'Origine', @@ -1925,42 +1922,7 @@ class DealabsBridge extends PepperBridgeAbstract 'title-keyword' => 'Recherche', 'title-group' => 'Groupe', 'title-talk' => 'Surveillance Discussion', - 'local-months' => [ - 'janvier', - 'février', - 'mars', - 'avril', - 'mai', - 'juin', - 'juillet', - 'août', - 'septembre', - 'octobre', - 'novembre', - 'décembre' - ], - 'local-time-relative' => [ - 'il y a ', - 'min', - 'h', - 'jour', - 'jours', - 'mois', - 'ans', - 'et ' - ], - 'date-prefixes' => [ - 'Actualisé ', - ], - 'relative-date-alt-prefixes' => [ - 'Actualisé ', - ], - 'relative-date-ignore-suffix' => [ - ], - - 'localdeal' => [ - 'Local', - 'Pays d\'expédition' - ], + 'deal-type' => 'Type de deal', + 'localdeal' => 'Deal Local', ]; } diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index 1f059123b41..b631db7338b 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -3279,9 +3279,6 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', 'no-results' => 'no results', 'currency' => '£', - 'relative-date-indicator' => [ - 'ago', - ], 'price' => 'Price', 'shipping' => 'Shipping', 'origin' => 'Origin', @@ -3289,51 +3286,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'title-keyword' => 'Search', 'title-group' => 'Group', 'title-talk' => 'Discussion Monitoring', - 'local-months' => [ - 'Jan', - 'Feb', - 'Mar', - 'Apr', - 'May', - 'Jun', - 'Jul', - 'Aug', - 'Sep', - 'Occ', - 'Nov', - 'Dec', - 'st', - 'nd', - 'rd', - 'th' - ], - 'local-time-relative' => [ - 'Posted ', - 'm', - 'h,', - 'day', - 'days', - 'month', - 'year', - 'and ' - ], - 'date-prefixes' => [ - 'Posted ', - 'Found ', - 'Refreshed ', - 'Made hot ' - ], - 'relative-date-alt-prefixes' => [ - 'Made hot ', - 'Refreshed ', - 'Last updated ' - ], - 'relative-date-ignore-suffix' => [ - '/by.*$/' - ], - 'localdeal' => [ - 'Local', - 'Expires' - ] + 'deal-type' => 'Deal Type', + 'localdeal' => 'Local deal', ]; } diff --git a/bridges/MydealsBridge.php b/bridges/MydealsBridge.php index 08e32a0c370..41bae46cd53 100644 --- a/bridges/MydealsBridge.php +++ b/bridges/MydealsBridge.php @@ -2026,10 +2026,6 @@ class MydealsBridge extends PepperBridgeAbstract 'thread-error' => 'Die ID der Diskussion kann nicht ermittelt werden. Überprüfen Sie die eingegebene URL', 'no-results' => 'keine Ergebnisse', 'currency' => '€', - 'relative-date-indicator' => [ - 'vor', - 'seit' - ], 'price' => 'Preis', 'shipping' => 'Versand', 'origin' => 'Ursprung', @@ -2037,49 +2033,7 @@ class MydealsBridge extends PepperBridgeAbstract 'title-keyword' => 'Suche', 'title-group' => 'Gruppe', 'title-talk' => 'Überwachung Diskussion', - 'local-months' => [ - 'Jan', - 'Feb', - 'Mär', - 'Apr', - 'Mai', - 'Jun', - 'Jul', - 'Aug', - 'Sep', - 'Okt', - 'Nov', - 'Dez', - '.' - ], - 'local-time-relative' => [ - 'eingestellt vor ', - 'm', - 'h,', - 'day', - 'days', - 'month', - 'year', - 'and ' - ], - 'date-prefixes' => [ - 'eingestellt am ', - 'lokal ', - 'aktualisiert ', - ], - 'relative-date-alt-prefixes' => [ - 'aktualisiert vor ', - 'kommentiert vor ', - 'eingestellt vor ', - 'heiß seit ', - 'vor ' - ], - 'relative-date-ignore-suffix' => [ - '/von.*$/' - ], - 'localdeal' => [ - 'Lokal ', - 'Läuft bis ' - ] + 'deal-type' => 'Angebotsart', + 'localdeal' => 'Lokales Angebot', ]; } diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index d0e152384c7..7b40ea1c6cf 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -46,35 +46,6 @@ protected function collectDeals($url) $html = getSimpleHTMLDOM($url); $list = $html->find('article[id][class*=thread--deal]]'); - // Deal Image Link CSS Selector - $selectorImageLink = implode( - ' ', /* Notice this is a space! */ - [ - 'cept-thread-image-link', - 'imgFrame', - 'imgFrame--noBorder', - 'thread-listImgCell', - ] - ); - - // Deal Link CSS Selector - $selectorLink = implode( - ' ', /* Notice this is a space! */ - [ - 'cept-tt', - 'thread-link', - 'linkPlain', - ] - ); - - // Deal Hotness CSS Selector - $selectorHot = implode( - ' ', /* Notice this is a space! */ - [ - 'vote-box' - ] - ); - // Deal Description CSS Selector $selectorDescription = implode( ' ', /* Notice this is a space! */ @@ -83,65 +54,39 @@ protected function collectDeals($url) ] ); - // Deal Date CSS Selector - $selectorDate = implode( - ' ', /* Notice this is a space! */ - [ - 'size--all-s', - 'flex', - 'boxAlign-jc--all-fe' - ] - ); - // If there is no results, we don't parse the content because it display some random deals $noresult = $html->find('h3[class*=text--b]', 0); if ($noresult != null && strpos($noresult->plaintext, $this->i8n('no-results')) !== false) { $this->items = []; } else { foreach ($list as $deal) { - $item = []; - $item['uri'] = $this->getDealURI($deal); - $item['title'] = $this->getTitle($deal); - $item['author'] = $deal->find('span.thread-username', 0)->plaintext; - // Get the JSON Data stored as vue $jsonDealData = $this->getDealJsonData($deal); + $dealMeta = Json::decode($deal->find('div[class=threadGrid-headerMeta]', 0)->find('div[class=js-vue2]', 1)->getAttribute('data-vue2')); + + $item = []; + $item['uri'] = $this->getDealURI($jsonDealData); + $item['title'] = $this->getTitle($jsonDealData); + $item['author'] = $this->getDealAuthor($jsonDealData); $item['content'] = '
' . $this->getImage($deal) . '' - . $this->getHTMLTitle($item) + . $this->getHTMLTitle($jsonDealData) . $this->getPrice($jsonDealData) . $this->getDiscount($jsonDealData) - . $this->getShipsFrom($deal) - . $this->getShippingCost($deal) + . $this->getShipsFrom($dealMeta) + . $this->getShippingCost($jsonDealData) . $this->getSource($jsonDealData) + . $this->getDealLocation($dealMeta) . $deal->find('div[class*=' . $selectorDescription . ']', 0)->innertext . '' . $this->getTemperature($jsonDealData) . '
'; - // Check if a clock icon is displayed on the deal - $clocks = $deal->find('svg[class*=icon--clock]'); - if ($clocks !== null && count($clocks) > 0) { - // Get the last clock, corresponding to the deal posting date - $clock = end($clocks); - - // Find the text corresponding to the clock - $spanDateDiv = $clock->next_sibling(); - $itemDate = $spanDateDiv->plaintext; - // In some case of a Local deal, there is no date, but we can use - // this case for other reason (like date not in the last field) - if ($this->contains($itemDate, $this->i8n('localdeal'))) { - $item['timestamp'] = time(); - } elseif ($this->contains($itemDate, $this->i8n('relative-date-indicator'))) { - $item['timestamp'] = $this->relativeDateToTimestamp($itemDate); - } else { - $item['timestamp'] = $this->parseDate($itemDate); - } - } + $item['timestamp'] = $this->getPublishedDate($jsonDealData); $this->items[] = $item; } } @@ -284,22 +229,31 @@ private function getPrice($jsonDealData) } } + /** + * Get the Publish Date from a Deal if it exists + * @return integer Timestamp of the published date of the deal + */ + private function getPublishedDate($jsonDealData) + { + return $jsonDealData['props']['thread']['publishedAt']; + } + + /** + * Get the Deal Author from a Deal if it exists + * @return String Author of the deal + */ + private function getDealAuthor($jsonDealData) + { + return $jsonDealData['props']['thread']['user']['username']; + } + /** * Get the Title from a Deal if it exists * @return string String of the deal title */ - private function getTitle($deal) + private function getTitle($jsonDealData) { - $titleRoot = $deal->find('div[class*=threadGrid-title]', 0); - $titleA = $titleRoot->find('a[class*=thread-link]', 0); - $titleFirstChild = $titleRoot->first_child(); - if ($titleA !== null) { - $title = $titleA->plaintext; - } else { - // In some case, expired deals have a different format - $title = $titleRoot->find('span', 0)->plaintext; - } - + $title = $jsonDealData['props']['thread']['title']; return $title; } @@ -318,14 +272,10 @@ private function getTalkTitle() * Get the HTML Title code from an item * @return string String of the deal title */ - private function getHTMLTitle($item) + private function getHTMLTitle($jsonDealData) { - if ($item['uri'] == '') { - $html = '

' . $item['title'] . '

'; - } else { - $html = '

' - . $item['title'] . '

'; - } + $html = '

' + . $this->getTitle($jsonDealData) . '

'; return $html; } @@ -334,10 +284,11 @@ private function getHTMLTitle($item) * Get the URI from a Deal if it exists * @return string String of the deal URI */ - private function getDealURI($deal) + private function getDealURI($jsonDealData) { - $dealId = $deal->attr['id']; - $uri = $this->i8n('bridge-uri') . $this->i8n('uri-deal') . str_replace('_', '-', $dealId); + $dealSlug = $jsonDealData['props']['thread']['titleSlug']; + $dealId = $jsonDealData['props']['thread']['threadId']; + $uri = $this->i8n('bridge-uri') . $this->i8n('uri-deal') . $dealSlug . '-' . $dealId; return $uri; } @@ -345,18 +296,14 @@ private function getDealURI($deal) * Get the Shipping costs from a Deal if it exists * @return string String of the deal shipping Cost */ - private function getShippingCost($deal) + private function getShippingCost($jsonDealData) { - if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0) != null) { - if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1) != null) { + $isFree = $jsonDealData['props']['thread']['shipping']['isFree']; + $price = $jsonDealData['props']['thread']['shipping']['price']; + if ($isFree !== null) { return '
' . $this->i8n('shipping') . ' : ' - . strip_tags($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1)->innertext) + . $price . ' ' . $this->i8n('currency') . '
'; - } else { - return '
' . $this->i8n('shipping') . ' : ' - . strip_tags($deal->find('span[class*=text--color-greyShade flex--inline]', 0)->innertext) - . '
'; - } } else { return ''; } @@ -422,6 +369,25 @@ private function getDiscount($jsonDealData) } } + /** + * Get the Deal location if it exists + * @return string String of the deal location + */ + private function getDealLocation($dealMeta) + { + $ribbons = $dealMeta['props']['metaRibbons']; + $isLocal = false; + foreach ($ribbons as $ribbon) { + $isLocal |= ($ribbon['type'] == 'local'); + } + if ($isLocal) { + $content = '
' . $this->i8n('deal-type') . ' : ' . $this->i8n('localdeal') . '
'; + } else { + $content = ''; + } + return $content; + } + /** * Get the Picture URL from a Deal if it exists * @return string String of the deal Picture URL @@ -437,9 +403,8 @@ private function getImage($deal) * Get the originating country from a Deal if it exists * @return string String of the deal originating country */ - private function getShipsFrom($deal) + private function getShipsFrom($dealMeta) { - $dealMeta = Json::decode($deal->find('div[class=threadGrid-headerMeta]', 0)->find('div[class=js-vue2]', 1)->getAttribute('data-vue2')); $metas = $dealMeta['props']['metaRibbons']; $shipsFrom = null; foreach ($metas as $meta) { @@ -453,104 +418,6 @@ private function getShipsFrom($deal) return ''; } - /** - * Transforms a local date into a timestamp - * @return int timestamp of the input date - */ - private function parseDate($string) - { - $month_local = $this->i8n('local-months'); - $month_en = [ - 'January', - 'February', - 'March', - 'April', - 'May', - 'June', - 'July', - 'August', - 'September', - 'October', - 'November', - 'December' - ]; - - // A date can be prfixed with some words, we remove theme - $string = $this->removeDatePrefixes($string); - // We translate the local months name in the english one - $date_str = trim(str_replace($month_local, $month_en, $string)); - - // If the date does not contain any year, we add the current year - if (!preg_match('/[0-9]{4}/', $string)) { - $date_str .= ' ' . date('Y'); - } - - // Add the Hour and minutes - $date_str .= ' 00:00'; - $date = DateTime::createFromFormat('j F Y H:i', $date_str); - // In some case, the date is not recognized : as a workaround the actual date is taken - if ($date === false) { - $date = new DateTime(); - } - return $date->getTimestamp(); - } - - /** - * Remove the prefix of a date if it has one - * @return the date without prefiux - */ - private function removeDatePrefixes($string) - { - $string = str_replace($this->i8n('date-prefixes'), [], $string); - return $string; - } - - /** - * Remove the suffix of a relative date if it has one - * @return the relative date without suffixes - */ - private function removeRelativeDateSuffixes($string) - { - if (count($this->i8n('relative-date-ignore-suffix')) > 0) { - $string = preg_replace($this->i8n('relative-date-ignore-suffix'), '', $string); - } - return $string; - } - - /** - * Transforms a relative local date into a timestamp - * @return int timestamp of the input date - */ - private function relativeDateToTimestamp($str) - { - $date = new DateTime(); - - // The minimal amount of time substracted is a minute : the seconds in the resulting date would be related to the execution time of the script. - // This make no sense, so we set the seconds manually to "00". - $date->setTime($date->format('H'), $date->format('i'), 0); - - // In case of update date, replace it by the regular relative date first word - $str = str_replace($this->i8n('relative-date-alt-prefixes'), $this->i8n('local-time-relative')[0], $str); - - $str = $this->removeRelativeDateSuffixes($str); - - $search = $this->i8n('local-time-relative'); - - $replace = [ - '-', - 'minute', - 'hour', - 'day', - 'month', - 'year', - '' - ]; - $date->modify(str_replace($search, $replace, $str)); - - - return $date->getTimestamp(); - } - /** * Returns the RSS Feed title according to the parameters * @return string the RSS feed Tiyle From e0be3662589fbc86b944906713cd6ebfaaf43ccc Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Fri, 29 Mar 2024 15:37:43 +0100 Subject: [PATCH 28/97] Update AnisearchBridge.php (#4025) * Update AnisearchBridge.php added youtube trailer * made trailers optional and reduced scraping to 5 articles if selected * Update AnisearchBridge.php --- bridges/AnisearchBridge.php | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/bridges/AnisearchBridge.php b/bridges/AnisearchBridge.php index 639d143feb5..d5aad1c9352 100644 --- a/bridges/AnisearchBridge.php +++ b/bridges/AnisearchBridge.php @@ -19,16 +19,29 @@ class AnisearchBridge extends BridgeAbstract 'JP' => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=ja&sort=date&order=desc&view=4' ] + ], + 'trailers' => [ + 'name' => 'Trailers', + 'type' => 'checkbox', + 'title' => 'Will include trailes', + 'defaultValue' => false ] ]]; public function collectData() { $baseurl = 'https://www.anisearch.de/'; + $trailers = false; + $trailers = $this->getInput('trailers'); $limit = 10; + if ($trailers) { + $limit = 5; + } + $dom = getSimpleHTMLDOM($this->getInput('category')); + foreach ($dom->find('li.btype0') as $key => $li) { - if ($key > $limit) { + if ($key >= $limit) { break; } @@ -44,10 +57,29 @@ public function collectData() $headerimage = $domarticle->find('img#details-cover', 0); $src = $headerimage->src; + foreach ($content->find('.hidden') as $element) { + $element->remove(); + } + + //get trailer + $ytlink = ''; + if ($trailers) { + $trailerlink = $domarticle->find('section#trailers > div > div.swiper > ul.swiper-wrapper > li.swiper-slide > a', 0); + if (isset($trailerlink)) { + $trailersite = getSimpleHTMLDOM($baseurl . $trailerlink->href); + $trailer = $trailersite->find('div#player > iframe', 0); + $ytlink = <<'; + EOT; + } + } + $this->items[] = [ 'title' => $title->plaintext, 'uri' => $url, - 'content' => $headerimage . '
' . $content + 'content' => $headerimage . '
' . $content . $ytlink ]; } } From 24e429969f87574d5808098a27248f0e212ce86e Mon Sep 17 00:00:00 2001 From: Quentin de Longraye Date: Sat, 30 Mar 2024 16:11:57 +0100 Subject: [PATCH 29/97] specify system section for enabling bridges (#4036) --- docs/03_For_Hosts/05_Whitelisting.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/03_For_Hosts/05_Whitelisting.md b/docs/03_For_Hosts/05_Whitelisting.md index 113c4e3d43b..156174f0d72 100644 --- a/docs/03_For_Hosts/05_Whitelisting.md +++ b/docs/03_For_Hosts/05_Whitelisting.md @@ -1,14 +1,18 @@ -Modify `config.ini.php` to limit available bridges. +Modify `config.ini.php` to limit available bridges. Those changes should be applied in the `[system]` section. ## Enable all bridges ``` +[system] + enabled_bridges[] = * ``` ## Enable some bridges ``` +[system] + enabled_bridges[] = TwitchBridge enabled_bridges[] = GettrBridge ``` From 545dc969d35bc8c94a8c15875562690ee2fd6605 Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 31 Mar 2024 03:38:42 +0200 Subject: [PATCH 30/97] refactor (#4037) --- actions/DisplayAction.php | 8 +++--- bridges/NintendoBridge.php | 1 - formats/HtmlFormat.php | 51 ++++++++++++++++------------------ lib/BridgeCard.php | 2 +- templates/html-format.html.php | 25 ++++++++++++----- 5 files changed, 47 insertions(+), 40 deletions(-) diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index ed063825a87..24bdefe14d4 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -51,7 +51,6 @@ public function execute(Request $request) return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400); } - if ( Configuration::getConfig('proxy', 'url') && Configuration::getConfig('proxy', 'by_bridge') @@ -62,8 +61,6 @@ public function execute(Request $request) } $bridge = $bridgeFactory->create($bridgeClassName); - $formatFactory = new FormatFactory(); - $format = $formatFactory->create($format); $response = $this->createResponse($request, $bridge, $format); @@ -93,7 +90,7 @@ public function execute(Request $request) return $response; } - private function createResponse(Request $request, BridgeAbstract $bridge, FormatAbstract $format) + private function createResponse(Request $request, BridgeAbstract $bridge, string $format) { $items = []; $feed = []; @@ -157,6 +154,9 @@ private function createResponse(Request $request, BridgeAbstract $bridge, Format } } + $formatFactory = new FormatFactory(); + $format = $formatFactory->create($format); + $format->setItems($items); $format->setFeed($feed); $now = time(); diff --git a/bridges/NintendoBridge.php b/bridges/NintendoBridge.php index 1c2ef71a843..1c4ecf2bf61 100644 --- a/bridges/NintendoBridge.php +++ b/bridges/NintendoBridge.php @@ -4,7 +4,6 @@ class NintendoBridge extends XPathAbstract { const NAME = 'Nintendo Software Updates'; const URI = 'https://www.nintendo.co.uk/Support/Welcome-to-Nintendo-Support-11593.html'; - const DONATION_URI = ''; const DESCRIPTION = self::NAME; const MAINTAINER = 'Niehztog'; const PARAMETERS = [ diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 93c824b384c..1e2f60e62cf 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -6,34 +6,26 @@ class HtmlFormat extends FormatAbstract public function stringify() { + // This query string comes in already url decoded $queryString = $_SERVER['QUERY_STRING']; $feedArray = $this->getFeed(); $formatFactory = new FormatFactory(); - $buttons = []; - $linkTags = []; - foreach ($formatFactory->getFormatNames() as $formatName) { - // Dynamically build buttons for all formats (except HTML) + $formats = []; + + // Create all formats (except HTML) + $formatNames = $formatFactory->getFormatNames(); + foreach ($formatNames as $formatName) { if ($formatName === 'Html') { continue; } - $formatUrl = '?' . str_ireplace('format=Html', 'format=' . $formatName, htmlentities($queryString)); - $buttons[] = [ - 'href' => $formatUrl, - 'value' => $formatName, - ]; - $format = $formatFactory->create($formatName); - $linkTags[] = [ - 'href' => $formatUrl, - 'title' => $formatName, - 'type' => $format->getMimeType(), - ]; - } - - if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) { - $buttons[] = [ - 'href' => e($feedArray['donationUri']), - 'value' => 'Donate to maintainer', + // The format url is relative, but should be absolute in order to help feed readers. + $formatUrl = '?' . str_ireplace('format=Html', 'format=' . $formatName, $queryString); + $formatObject = $formatFactory->create($formatName); + $formats[] = [ + 'url' => $formatUrl, + 'name' => $formatName, + 'type' => $formatObject->getMimeType(), ]; } @@ -50,13 +42,18 @@ public function stringify() ]; } + $donationUri = null; + if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) { + $donationUri = $feedArray['donationUri']; + } + $html = render_template(__DIR__ . '/../templates/html-format.html.php', [ - 'charset' => $this->getCharset(), - 'title' => $feedArray['name'], - 'linkTags' => $linkTags, - 'uri' => $feedArray['uri'], - 'buttons' => $buttons, - 'items' => $items, + 'charset' => $this->getCharset(), + 'title' => $feedArray['name'], + 'formats' => $formats, + 'uri' => $feedArray['uri'], + 'items' => $items, + 'donation_uri' => $donationUri, ]); // Remove invalid characters ini_set('mbstring.substitute_character', 'none'); diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index c4677b9d7b5..d15ac865e0a 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -78,7 +78,7 @@ class="bridge-card" $card .= sprintf('', $bridgeClassName); - if ($bridge->getDonationURI() !== '' && Configuration::getConfig('admin', 'donations')) { + if (Configuration::getConfig('admin', 'donations') && $bridge->getDonationURI()) { $card .= sprintf( '

%s ~ Donate

', $bridge->getMaintainer(), diff --git a/templates/html-format.html.php b/templates/html-format.html.php index 3b0fe6fe2bd..bc95c5d04e7 100644 --- a/templates/html-format.html.php +++ b/templates/html-format.html.php @@ -8,12 +8,13 @@ - + + @@ -33,11 +34,21 @@ - - - + + + + + + + + + From b58d8b099b33ff030ac9004656a048b751ac2691 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Sun, 31 Mar 2024 03:44:10 +0200 Subject: [PATCH 31/97] docs: Complete helper function documentation (#3911) * docs: Complete helper function documentation Complete documentation of the Helper functions * docs: remove parameters and add a link to source - Parameters removed - Link to the file defining the function * docs: fix links Fix links to source files --- docs/06_Helper_functions/index.md | 81 +++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/docs/06_Helper_functions/index.md b/docs/06_Helper_functions/index.md index 31a13953235..3aaeed89207 100644 --- a/docs/06_Helper_functions/index.md +++ b/docs/06_Helper_functions/index.md @@ -233,3 +233,84 @@ $html = markdownToHtml($input); //
  • Translation improvements
  • // ``` + + +# e +The `e` function is used to convert special characters to HTML entities + +```PHP +e('0 < 1 and 2 > 1'); +``` + +`e` will return the content of the string escape that can be rendered as is in HTML + +[Defined in lib/html.php](/lib/html.php) + +# truncate +The `truncate` function is used to shorten a string if exceeds a certain length, and add a string indicating that the string has been shortened. + +```PHP +truncate('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed a neque nunc. Nam nibh sem.', 20 , '...'); +``` + +[Defined in lib/html.php](/lib/html.php) + +# sanitize +The `sanitize` function is used to remove some tags from a given HTML text. + +```PHP +$html = 'Sample Page +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit...

    + + +'; +$tags_to_remove = ['script', 'iframe', 'input', 'form']; +$attributes_to_keep = ['title', 'href', 'src']; +$text_to_keep = []; +sanitize($html, $tags_to_remove, $attributes_to_keep, $text_to_keep); +``` + +This function returns a simplehtmldom object of the remaining contents. + +[Defined in lib/html.php](/lib/html.php) + +# convertLazyLoading +The `convertLazyLoading` function is used to convert onvert lazy-loading images and frames (video embeds) into static elements. It accepts the HTML content as HTML objects or string objects. It returns the HTML content with fixed image/frame URLs (same type as input). + +```PHP +$html = ' + +

    Hello world!

    + + +backgroundToImg($html); +``` + +[Defined in lib/html.php](/lib/html.php) + + +# Json::encode +The `Json::encode` function is used to encode a value as à JSON string. + +```PHP +$array = [ + "foo" => "bar", + "bar" => "foo", +]; +Json::encode($array, true, true); +``` + +[Defined in lib/utils.php](/lib/utils.php) + +# Json::decode +The `Json::decode` function is used to decode a JSON string into à PHP variable. + +```PHP +$json = '{ + "foo": "bar", + "bar": "foo" +}'; +Json::decode($json); +``` + +[Defined in lib/utils.php](/lib/utils.php) From d23fd2522ce71877bcfc942c13250ff3ec9d71ca Mon Sep 17 00:00:00 2001 From: Miika Launiainen Date: Sun, 31 Mar 2024 04:46:23 +0300 Subject: [PATCH 32/97] [GenshinImpactBridge] Fix bridge to use new API (#4011) * [GenshinImpactBridge] Fix bridge to use new API * Add category parameters back to not break existing feeds * Fix lint error * Remove whitespace --- bridges/GenshinImpactBridge.php | 57 +++++++++++++-------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/bridges/GenshinImpactBridge.php b/bridges/GenshinImpactBridge.php index 24bc39d860a..0dc08a28518 100644 --- a/bridges/GenshinImpactBridge.php +++ b/bridges/GenshinImpactBridge.php @@ -2,11 +2,11 @@ class GenshinImpactBridge extends BridgeAbstract { - const MAINTAINER = 'corenting'; const NAME = 'Genshin Impact'; - const URI = 'https://genshin.mihoyo.com/en/news'; - const CACHE_TIMEOUT = 7200; // 2h - const DESCRIPTION = 'News from the Genshin Impact website'; + const URI = 'https://genshin.hoyoverse.com/en/news'; + const CACHE_TIMEOUT = 18000; // 5h + const DESCRIPTION = 'Latest news from the Genshin Impact website'; + const MAINTAINER = 'Miicat_47'; const PARAMETERS = [ [ 'category' => [ @@ -25,37 +25,31 @@ class GenshinImpactBridge extends BridgeAbstract public function collectData() { - $category = $this->getInput('category'); - - $url = 'https://genshin.mihoyo.com/content/yuanshen/getContentList'; - $url = $url . '?pageSize=5&pageNum=1&channelId=' . $category; + $url = 'https://api-os-takumi-static.hoyoverse.com/content_v2_user/app/a1b1f9d3315447cc/getContentList?iAppId=32&iChanId=395&iPageSize=5&iPage=1&sLangKey=en-us'; $api_response = getContents($url); $json_list = json_decode($api_response, true); foreach ($json_list['data']['list'] as $json_item) { - $article_url = 'https://genshin.mihoyo.com/content/yuanshen/getContent'; - $article_url = $article_url . '?contentId=' . $json_item['contentId']; - $article_res = getContents($article_url); - $article_json = json_decode($article_res, true); - $article_time = $article_json['data']['start_time']; - $timezone = 'Asia/Shanghai'; - $article_timestamp = new DateTime($article_time, new DateTimeZone($timezone)); - + $article_html = str_get_html($json_item['sContent']); + + // Check if article contains a embed YouTube video + $exp_youtube = '/https:\/\/[w\.]+youtube\.com\/embed\/([\w]+)/m'; + if (preg_match($exp_youtube, $article_html, $matches)) { + // Replace the YouTube embed with a YouTube link + $yt_embed = $article_html->find('div[class="ttr-video-frame"]', 0); + $yt_link = sprintf('https://youtube.com/watch?v=%1$s', $matches[1]); + $article_html = str_replace($yt_embed, $yt_link, $article_html); + } $item = []; - - $item['title'] = $article_json['data']['title']; - $item['timestamp'] = $article_timestamp->format('U'); - $item['content'] = $article_json['data']['content']; - $item['uri'] = $this->getArticleUri($json_item); - $item['id'] = $json_item['contentId']; + $item['title'] = $json_item['sTitle']; + $item['timestamp'] = $json_item['dtStartTime']; + $item['content'] = $article_html; + $item['uri'] = 'https://genshin.hoyoverse.com/en/news/detail/' . $json_item['iInfoId']; + $item['id'] = $json_item['iInfoId']; // Picture - foreach ($article_json['data']['ext'] as $ext) { - if ($ext['arrtName'] == 'banner' && count($ext['value']) == 1) { - $item['enclosures'] = [$ext['value'][0]['url']]; - break; - } - } + $json_ext = json_decode($json_item['sExt'], true); + $item['enclosures'] = $json_ext['banner'][0]['url']; $this->items[] = $item; } @@ -63,11 +57,6 @@ public function collectData() public function getIcon() { - return 'https://genshin.mihoyo.com/favicon.ico'; - } - - private function getArticleUri($json_item) - { - return 'https://genshin.mihoyo.com/en/news/detail/' . $json_item['contentId']; + return 'https://genshin.hoyoverse.com/favicon.ico'; } } From 1c3c85d8ff5a6d071f688ef09ca93f275b4995af Mon Sep 17 00:00:00 2001 From: Niehztog Date: Sun, 31 Mar 2024 18:46:07 +0200 Subject: [PATCH 33/97] [XPathBridge] Allow multiple categories (#4038) * [XPathAbstract] allow multiple categories * fix feed icons in two bridges * fix warning * fix linter errors --- bridges/BlizzardNewsBridge.php | 7 ++++ bridges/NiusBridge.php | 5 +++ lib/XPathAbstract.php | 74 +++++++++++++++++++++++++--------- 3 files changed, 67 insertions(+), 19 deletions(-) diff --git a/bridges/BlizzardNewsBridge.php b/bridges/BlizzardNewsBridge.php index 3930e0a4d1a..19c38152cc0 100644 --- a/bridges/BlizzardNewsBridge.php +++ b/bridges/BlizzardNewsBridge.php @@ -57,4 +57,11 @@ protected function getSourceUrl() } return 'https://news.blizzard.com/' . $locale; } + + public function getIcon() + { + return <<getItemValueOrNodeValue($typedResult, $isContent, $isContent && !$this->getSettingUseRawItemContent()); + $isCategories = 'categories' === $param; + $value = $this->getItemValueOrNodeValue($typedResult, $isContent, $isContent && !$this->getSettingUseRawItemContent(), $isCategories); $item->__set($param, $this->formatParamValue($param, $value)); } @@ -459,7 +460,7 @@ public function collectData() */ protected function formatParamValue($param, $value) { - $value = $this->fixEncoding($value); + $value = is_array($value) ? array_map([$this, 'fixEncoding'], $value) : $this->fixEncoding($value); switch ($param) { case 'title': return $this->formatItemTitle($value); @@ -572,12 +573,12 @@ protected function formatItemEnclosures($value) * formatted as array. * Can be easily overwritten for in case the values need to be transformed into something * else. - * @param string $value + * @param string|array $value * @return array */ protected function formatItemCategories($value) { - return [$value]; + return is_array($value) ? $value : [$value]; } /** @@ -596,22 +597,21 @@ protected function cleanMediaUrl($mediaUrl) /** * @param $typedResult - * @return string + * @param bool $returnXML + * @param bool $escapeHtml + * @param bool $allowMultiple + * @return string|array + * @throws Exception */ - protected function getItemValueOrNodeValue($typedResult, $returnXML = false, $escapeHtml = false) + protected function getItemValueOrNodeValue($typedResult, $returnXML = false, $escapeHtml = false, $allowMultiple = false) { - if ($typedResult instanceof \DOMNodeList) { + if ($typedResult instanceof \DOMNodeList && !$allowMultiple) { $item = $typedResult->item(0); - if ($item instanceof \DOMElement) { - // Don't escape XML - if ($returnXML) { - return ($item->ownerDocument ?? $item)->saveXML($item); - } - $text = $item->nodeValue; - } elseif ($item instanceof \DOMAttr) { - $text = $item->value; - } elseif ($item instanceof \DOMText) { - $text = $item->wholeText; + $text = $this->extractNodeListContent($item, $returnXML); + } elseif ($typedResult instanceof \DOMNodeList && $allowMultiple) { + $text = []; + foreach ($typedResult as $item) { + $text[] = $this->extractNodeListContent($item, $returnXML); } } elseif (is_string($typedResult) && strlen($typedResult) > 0) { $text = $typedResult; @@ -619,10 +619,46 @@ protected function getItemValueOrNodeValue($typedResult, $returnXML = false, $es throw new \Exception('Unknown type of XPath expression result.'); } + if (is_array($text)) { + foreach ($text as &$element) { + $element = $this->cleanExtractedText($element, $escapeHtml, $returnXML); + } + } else { + $text = $this->cleanExtractedText($text, $escapeHtml, $returnXML); + } + return $text; + } + + /** + * @param $item + * @param $returnXML + * @return false|string + * @throws Exception + */ + protected function extractNodeListContent($item, $returnXML) + { + if ($item instanceof \DOMElement) { + return $returnXML ? ($item->ownerDocument ?? $item)->saveXML($item) : $item->nodeValue; + } elseif ($item instanceof \DOMAttr) { + return $item->value; + } elseif ($item instanceof \DOMText) { + return $item->wholeText; + } + throw new \Exception('Unknown type of XPath expression result.'); + } + + /** + * @param $text + * @param $escapeHtml + * @param $returnXML + * @return string + */ + protected function cleanExtractedText($text, $escapeHtml, $returnXML) + { $text = trim($text); - if ($escapeHtml) { - return htmlspecialchars($text); + if ($escapeHtml && !$returnXML) { + $text = htmlspecialchars($text); } return $text; } From 8ca1b908400d2965c3ca6aa76b821b7bca7c50e0 Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 31 Mar 2024 20:07:14 +0200 Subject: [PATCH 34/97] fix(NationalGeographicBridge) (#4039) --- bridges/NationalGeographicBridge.php | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bridges/NationalGeographicBridge.php b/bridges/NationalGeographicBridge.php index f7572240adc..7f8f4fa243d 100644 --- a/bridges/NationalGeographicBridge.php +++ b/bridges/NationalGeographicBridge.php @@ -168,7 +168,7 @@ private function addStory($story) } $image = $story['img']; - $item['enclosures'][] = $image['src']; + $item['enclosures'][] = str_replace(' ', '%20', $image['src']); foreach ($story['tags'] as $tag) { $item['categories'][] = $tag['name'] ?? $tag; @@ -218,7 +218,10 @@ private function handleImages($image_module, $image_type) switch ($image_type) { case 'image': case 'imagegroup': - $image = $image_module['image']; + $image = $image_module['image'] ?? null; + if (!$image) { + return ''; + } $image_src = $image['src']; if (isset($image_module['alt'])) { $image_alt = $image_module['alt']; @@ -266,7 +269,11 @@ private function getFullArticle($uri) $json = json_decode($matches[1][0], true); - $unfiltered_data = $json['page']['content']['article']['frms']; + if (isset($json['page']['content']['article']['frms'])) { + $unfiltered_data = $json['page']['content']['article']['frms']; + } else { + $unfiltered_data = $json['page']['content']['prismarticle']['frms']; + } $filtered_data = $this->filterArticleData($unfiltered_data); $article = $filtered_data['edgs'][0]; @@ -288,7 +295,7 @@ private function getFullArticle($uri) } } - $published_date = $article['pbDt']; + $published_date = $article['pbDt'] ?? $article['dt']; $article_body = $article['bdy']; $content = ''; From 73289324bd39a31e225bd8f8048a1081bb771c67 Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 31 Mar 2024 21:02:55 +0200 Subject: [PATCH 35/97] feat: add vendor http header to cached responses (#4040) --- actions/DisplayAction.php | 2 +- bridges/MediapartBlogsBridge.php | 7 ++++++- lib/FeedItem.php | 2 -- lib/http.php | 9 ++++++++- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 24bdefe14d4..93813004f22 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -32,7 +32,7 @@ public function execute(Request $request) return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']); } } - return $cachedResponse; + return $cachedResponse->withHeader('rss-bridge', 'This is a cached response'); } if (!$bridgeName) { diff --git a/bridges/MediapartBlogsBridge.php b/bridges/MediapartBlogsBridge.php index fa8c3d5f1d3..d1e1c3c9867 100644 --- a/bridges/MediapartBlogsBridge.php +++ b/bridges/MediapartBlogsBridge.php @@ -35,7 +35,12 @@ public function collectData() $item['title'] = $item_title->innertext; $item['uri'] = self::BASE_URI . trim($item_title->href); - $item['author'] = $element->find('.author .subscriber', 0)->innertext; + + $author = $element->find('.author .subscriber', 0); + if ($author) { + $item['author'] = $author->innertext; + } + $item['content'] = $item_divs[count($item_divs) - 2] . $item_divs[count($item_divs) - 1]; $item['timestamp'] = strtotime($element->find('.author time', 0)->datetime); diff --git a/lib/FeedItem.php b/lib/FeedItem.php index bd37f119398..fc4549a7b5f 100644 --- a/lib/FeedItem.php +++ b/lib/FeedItem.php @@ -178,7 +178,6 @@ public function setAuthor($author) } else { $this->author = $author; } - return $this; } public function getContent(): ?string @@ -284,7 +283,6 @@ public function addMisc($name, $value) } else { $this->misc[$name] = $value; } - return $this; } public function toArray(): array diff --git a/lib/http.php b/lib/http.php index e4f9bf482ae..39f0c72710f 100644 --- a/lib/http.php +++ b/lib/http.php @@ -331,7 +331,14 @@ public function getHeader(string $name, bool $all = false) return array_pop($header); } - public function withBody(string $body): Response + public function withHeader(string $name, string $value): self + { + $clone = clone $this; + $clone->headers[$name] = [$value]; + return $clone; + } + + public function withBody(string $body): self { $clone = clone $this; $clone->body = $body; From 17a3b4c9d871208896e6c50a64b539d8689be128 Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 31 Mar 2024 21:32:27 +0200 Subject: [PATCH 36/97] Fix 198 (#4041) * fix(twitch): log instead of exception * typo --- bridges/RedditBridge.php | 2 +- bridges/TwitchBridge.php | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index fbc6f67891c..7ece0e15b30 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -121,7 +121,7 @@ private function collectDataInternal(): void $comments = false; $frontend = $this->getInput('frontend'); if ($frontend == '') { - $frontend = 'https://old.reddit.com'; + $frontend = 'https://old.reddit.com'; } $section = $this->getInput('d'); diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index f408f8855ca..9e70944e1fa 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -95,10 +95,14 @@ public function collectData() if ($data->user === null) { throw new \Exception(sprintf('Unable to find channel `%s`', $channel)); } + $user = $data->user; if ($user->videos === null) { - throw new HttpException('Service Unavailable', 503); + // twitch regularly does this for unknown reasons + $this->logger->info('Twitch returned empty set of videos', ['data' => $data]); + return; } + foreach ($user->videos->edges as $edge) { $video = $edge->node; From 9682f74fc569214186a504c03cf52dc78ed73d5f Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 31 Mar 2024 21:37:51 +0200 Subject: [PATCH 37/97] fix(cnet): author typo (#4042) --- bridges/CNETBridge.php | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 4a63c84773c..17c05e9b0eb 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -56,7 +56,7 @@ public function collectData() foreach ($links as $article_uri) { $article_dom = convertLazyLoading(getSimpleHTMLDOMCached($article_uri)); $title = trim($article_dom->find('h1', 0)->plaintext); - $author = $article_dom->find('span.c-assetAuthor_name', 0)->plaintext; + $author = $article_dom->find('span.c-assetAuthor_name', 0); $headline = $article_dom->find('p.c-contentHeader_description', 0); $content = $article_dom->find('div.c-pageArticle_content, div.single-article__content, div.article-main-body', 0); $date = null; @@ -97,7 +97,11 @@ public function collectData() $item = []; $item['uri'] = $article_uri; $item['title'] = $title; - $item['author'] = $author; + + if ($author) { + $item['author'] = $author->plaintext; + } + $item['content'] = $content; if (!is_null($date)) { From 182567e4341db19eb91df107216e6667c5d6e5de Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 31 Mar 2024 21:52:53 +0200 Subject: [PATCH 38/97] fix(bridges/DavesTrailerPageBridge): remove (#4043) --- bridges/DavesTrailerPageBridge.php | 40 ------------------------------ 1 file changed, 40 deletions(-) delete mode 100644 bridges/DavesTrailerPageBridge.php diff --git a/bridges/DavesTrailerPageBridge.php b/bridges/DavesTrailerPageBridge.php deleted file mode 100644 index 965f7e59b0c..00000000000 --- a/bridges/DavesTrailerPageBridge.php +++ /dev/null @@ -1,40 +0,0 @@ -find('tr') as $tr) { - // If it's a date row, update the current date - if ($tr->align == 'center') { - $curr_date = $tr->plaintext; - continue; - } - - $item = []; - - // title - $item['title'] = $tr->find('td', 0)->find('b', 0)->plaintext; - - // content - $item['content'] = $tr->find('ul', 1); - - // uri - $item['uri'] = $tr->find('a', 3)->getAttribute('href'); - - // date: parsed by FeedItem using strtotime - $item['timestamp'] = $curr_date; - - $this->items[] = $item; - } - } -} From d5d470cbc2195472fc1387ab55befd09d9ce21fc Mon Sep 17 00:00:00 2001 From: Dag Date: Sun, 31 Mar 2024 22:10:59 +0200 Subject: [PATCH 39/97] fix(dribble) (#4044) --- bridges/DribbbleBridge.php | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/bridges/DribbbleBridge.php b/bridges/DribbbleBridge.php index 3957c9de828..539127b36ba 100644 --- a/bridges/DribbbleBridge.php +++ b/bridges/DribbbleBridge.php @@ -18,12 +18,12 @@ public function collectData() { $html = getSimpleHTMLDOM(self::URI); - $json = $this->loadEmbeddedJsonData($html); + $data = $this->fetchData($html); foreach ($html->find('li[id^="screenshot-"]') as $shot) { $item = []; - $additional_data = $this->findJsonForShot($shot, $json); + $additional_data = $this->findJsonForShot($shot, $data); if ($additional_data === null) { $item['uri'] = self::URI . $shot->find('a', 0)->href; $item['title'] = $shot->find('.shot-title', 0)->plaintext; @@ -46,9 +46,8 @@ public function collectData() } } - private function loadEmbeddedJsonData($html) + private function fetchData($html) { - $json = []; $scripts = $html->find('script'); foreach ($scripts as $script) { @@ -69,12 +68,17 @@ private function loadEmbeddedJsonData($html) $end = strpos($script->innertext, '];') + 1; // convert JSON to PHP array - $json = json_decode(substr($script->innertext, $start, $end - $start), true); - break; + $json = substr($script->innertext, $start, $end - $start); + + try { + // TODO: fix broken json + return Json::decode($json); + } catch (\JsonException $e) { + return []; + } } } - - return $json; + return []; } private function findJsonForShot($shot, $json) From 7001fbaf49a801226e819919cff913909242e7ac Mon Sep 17 00:00:00 2001 From: July Date: Sun, 31 Mar 2024 16:41:58 -0400 Subject: [PATCH 40/97] [AO3Bridge] Fix bad heading selector (#4045) --- bridges/AO3Bridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 85f0f9f8269..4c09c28c048 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -77,7 +77,7 @@ private function collectList($url) $html = defaultLinkTo($html, self::URI); // Get list title. Will include page range + count in some cases - $heading = ($html->find('#main > h2', 0)); + $heading = ($html->find('#main h2', 0)); if ($heading->find('a.tag')) { $heading = $heading->find('a.tag', 0); } From b4659786cb682096a46c0b6ca8a54505aa0ba49d Mon Sep 17 00:00:00 2001 From: Miika Launiainen Date: Mon, 1 Apr 2024 22:16:32 +0300 Subject: [PATCH 41/97] [GenshinImpactBridge] Small fixes (#4046) * Switch json_decode to Json::decode * Change regex delimeter from / to # * Save item enclosures as list --- bridges/GenshinImpactBridge.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bridges/GenshinImpactBridge.php b/bridges/GenshinImpactBridge.php index 0dc08a28518..924155d9d55 100644 --- a/bridges/GenshinImpactBridge.php +++ b/bridges/GenshinImpactBridge.php @@ -27,13 +27,13 @@ public function collectData() { $url = 'https://api-os-takumi-static.hoyoverse.com/content_v2_user/app/a1b1f9d3315447cc/getContentList?iAppId=32&iChanId=395&iPageSize=5&iPage=1&sLangKey=en-us'; $api_response = getContents($url); - $json_list = json_decode($api_response, true); + $json_list = Json::decode($api_response); foreach ($json_list['data']['list'] as $json_item) { $article_html = str_get_html($json_item['sContent']); // Check if article contains a embed YouTube video - $exp_youtube = '/https:\/\/[w\.]+youtube\.com\/embed\/([\w]+)/m'; + $exp_youtube = '#https://[w\.]+youtube\.com/embed/([\w]+)#m'; if (preg_match($exp_youtube, $article_html, $matches)) { // Replace the YouTube embed with a YouTube link $yt_embed = $article_html->find('div[class="ttr-video-frame"]', 0); @@ -48,8 +48,8 @@ public function collectData() $item['id'] = $json_item['iInfoId']; // Picture - $json_ext = json_decode($json_item['sExt'], true); - $item['enclosures'] = $json_ext['banner'][0]['url']; + $json_ext = Json::decode($json_item['sExt']); + $item['enclosures'] = [$json_ext['banner'][0]['url']]; $this->items[] = $item; } From a12bab9eedcf149778ad47e2ee84346e7b0d2016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Wr=C3=B3bel?= Date: Mon, 1 Apr 2024 23:44:45 +0200 Subject: [PATCH 42/97] [AllegroBridge] ask for a complete cookie string, mere wcdx works no more (#4048) --- bridges/AllegroBridge.php | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/bridges/AllegroBridge.php b/bridges/AllegroBridge.php index 7cad11f1709..041212579ba 100644 --- a/bridges/AllegroBridge.php +++ b/bridges/AllegroBridge.php @@ -13,12 +13,9 @@ class AllegroBridge extends BridgeAbstract 'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660', 'required' => true, ], - 'sessioncookie' => [ - 'name' => 'The \'wdctx\' session cookie', - 'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits', - 'pattern' => '^.{70,};?$', - // phpcs:ignore - 'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd', + 'cookie' => [ + 'name' => 'The complete cookie value', + 'title' => 'Paste the value of the cookie value from your browser if you want to prevent Allegro imposing rate limits', 'required' => false, ], 'includeSponsoredOffers' => [ @@ -70,9 +67,9 @@ public function collectData() $opts = []; - // If a session cookie is provided - if ($sessioncookie = $this->getInput('sessioncookie')) { - $opts[CURLOPT_COOKIE] = 'wdctx=' . $sessioncookie; + // If a cookie is provided + if ($cookie = $this->getInput('cookie')) { + $opts[CURLOPT_COOKIE] = $cookie; } $html = getSimpleHTMLDOM($url, [], $opts); From bb979e9e0865991806f0d2cf1c55044168bcf586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Wr=C3=B3bel?= Date: Tue, 2 Apr 2024 00:06:15 +0200 Subject: [PATCH 43/97] [AllegroBridge] fix logical condition on parameters (#4049) --- bridges/AllegroBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/AllegroBridge.php b/bridges/AllegroBridge.php index 041212579ba..55e9f116040 100644 --- a/bridges/AllegroBridge.php +++ b/bridges/AllegroBridge.php @@ -81,11 +81,11 @@ public function collectData() $results = $html->find('article[data-analytics-view-custom-context="REGULAR"]'); - if (!$this->getInput('includeSponsoredOffers')) { + if ($this->getInput('includeSponsoredOffers')) { $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]')); } - if (!$this->getInput('includePromotedOffers')) { + if ($this->getInput('includePromotedOffers')) { $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]')); } From 8f962383c269aa181c3fc7025c478ae712cd38a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Wr=C3=B3bel?= Date: Tue, 2 Apr 2024 01:01:23 +0200 Subject: [PATCH 44/97] [eBayBridge] fix Belgian eBay URL handling (#4050) Fixes #3918 --- bridges/EBayBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/EBayBridge.php b/bridges/EBayBridge.php index 507930ea760..879581645cc 100644 --- a/bridges/EBayBridge.php +++ b/bridges/EBayBridge.php @@ -10,7 +10,7 @@ class EBayBridge extends BridgeAbstract 'url' => [ 'name' => 'Search URL', 'title' => 'Copy the URL from your browser\'s address bar after searching for your items and paste it here', - 'pattern' => '^(https:\/\/)?(www.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk).*$', + 'pattern' => '^(https:\/\/)?(www\.)?(befr\.|benl\.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk).*$', 'exampleValue' => 'https://www.ebay.com/sch/i.html?_nkw=atom+rss', 'required' => true, ] From fb66775ecee9db86c6be55c28eeae1c8f50e4cee Mon Sep 17 00:00:00 2001 From: Niehztog Date: Tue, 2 Apr 2024 23:14:25 +0200 Subject: [PATCH 45/97] [XPathAbstract] Refactor xpath abstract (#4047) * refactor XPathAbstract, keep all functionality intact * fix linter errors * further simplify code * set default value for raw item content to true, avoiding escaping of html tags in feed item contents by default --- bridges/BlizzardNewsBridge.php | 2 +- lib/XPathAbstract.php | 125 ++++++++++++--------------------- 2 files changed, 47 insertions(+), 80 deletions(-) diff --git a/bridges/BlizzardNewsBridge.php b/bridges/BlizzardNewsBridge.php index 19c38152cc0..993492d404b 100644 --- a/bridges/BlizzardNewsBridge.php +++ b/bridges/BlizzardNewsBridge.php @@ -37,7 +37,7 @@ class BlizzardNewsBridge extends XPathAbstract const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article'; const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2'; - const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]'; + const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]/text()'; const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href'; const XPATH_EXPRESSION_ITEM_AUTHOR = ''; const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp'; diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index 224d8e875a1..6163ca132d6 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -76,15 +76,6 @@ abstract class XPathAbstract extends BridgeAbstract */ const XPATH_EXPRESSION_ITEM_CONTENT = ''; - /** - * Use raw item content - * Whether to use the raw item content or to replace certain characters with - * special significance in HTML by HTML entities (using the PHP function htmlspecialchars). - * - * Use {@see XPathAbstract::getSettingUseRawItemContent()} to read this parameter - */ - const SETTING_USE_RAW_ITEM_CONTENT = false; - /** * XPath expression for extracting an item link from the item context * This expression should match a node's attribute containing the article URL @@ -158,6 +149,15 @@ abstract class XPathAbstract extends BridgeAbstract */ const SETTING_FIX_ENCODING = false; + /** + * Use raw item content + * Whether to use the raw item content or to replace certain characters with + * special significance in HTML by HTML entities (using the PHP function htmlspecialchars). + * + * Use {@see XPathAbstract::getSettingUseRawItemContent()} to read this parameter + */ + const SETTING_USE_RAW_ITEM_CONTENT = true; + /** * Internal storage for resulting feed name, automatically detected * @var string @@ -245,15 +245,6 @@ protected function getExpressionItemContent() return static::XPATH_EXPRESSION_ITEM_CONTENT; } - /** - * Use raw item content - * @return bool - */ - protected function getSettingUseRawItemContent(): bool - { - return static::SETTING_USE_RAW_ITEM_CONTENT; - } - /** * XPath expression for extracting an item link from the item context * @return string @@ -309,6 +300,15 @@ protected function getSettingFixEncoding(): bool return static::SETTING_FIX_ENCODING; } + /** + * Use raw item content + * @return bool + */ + protected function getSettingUseRawItemContent(): bool + { + return static::SETTING_USE_RAW_ITEM_CONTENT; + } + /** * Internal helper method for quickly accessing all the user defined constants * in derived classes @@ -331,8 +331,6 @@ private function getParam($name) return $this->getExpressionItemTitle(); case 'content': return $this->getExpressionItemContent(); - case 'raw_content': - return $this->getSettingUseRawItemContent(); case 'uri': return $this->getExpressionItemUri(); case 'author': @@ -345,6 +343,8 @@ private function getParam($name) return $this->getExpressionItemCategories(); case 'fix_encoding': return $this->getSettingFixEncoding(); + case 'raw_content': + return $this->getSettingUseRawItemContent(); } } @@ -438,9 +438,15 @@ public function collectData() continue; } - $isContent = $param === 'content'; - $isCategories = 'categories' === $param; - $value = $this->getItemValueOrNodeValue($typedResult, $isContent, $isContent && !$this->getSettingUseRawItemContent(), $isCategories); + if ('categories' === $param && $typedResult instanceof \DOMNodeList) { + $value = []; + foreach ($typedResult as $domNode) { + $value[] = $this->getItemValueOrNodeValue($domNode, false); + } + } else { + $value = $this->getItemValueOrNodeValue($typedResult, 'content' === $param); + } + $item->__set($param, $this->formatParamValue($param, $value)); } @@ -460,6 +466,7 @@ public function collectData() */ protected function formatParamValue($param, $value) { + $value = is_array($value) ? array_map('trim', $value) : trim($value); $value = is_array($value) ? array_map([$this, 'fixEncoding'], $value) : $this->fixEncoding($value); switch ($param) { case 'title': @@ -503,7 +510,7 @@ protected function formatItemTitle($value) */ protected function formatItemContent($value) { - return $value; + return $this->getParam('raw_content') ? $value : htmlspecialchars($value); } /** @@ -599,68 +606,28 @@ protected function cleanMediaUrl($mediaUrl) * @param $typedResult * @param bool $returnXML * @param bool $escapeHtml - * @param bool $allowMultiple - * @return string|array + * @return string * @throws Exception */ - protected function getItemValueOrNodeValue($typedResult, $returnXML = false, $escapeHtml = false, $allowMultiple = false) + protected function getItemValueOrNodeValue($typedResult, $returnXML = false) { - if ($typedResult instanceof \DOMNodeList && !$allowMultiple) { - $item = $typedResult->item(0); - $text = $this->extractNodeListContent($item, $returnXML); - } elseif ($typedResult instanceof \DOMNodeList && $allowMultiple) { - $text = []; - foreach ($typedResult as $item) { - $text[] = $this->extractNodeListContent($item, $returnXML); - } - } elseif (is_string($typedResult) && strlen($typedResult) > 0) { - $text = $typedResult; - } else { - throw new \Exception('Unknown type of XPath expression result.'); - } - - if (is_array($text)) { - foreach ($text as &$element) { - $element = $this->cleanExtractedText($element, $escapeHtml, $returnXML); - } - } else { - $text = $this->cleanExtractedText($text, $escapeHtml, $returnXML); + if ($typedResult instanceof \DOMNodeList) { + $typedResult = $typedResult->item(0); } - return $text; - } - /** - * @param $item - * @param $returnXML - * @return false|string - * @throws Exception - */ - protected function extractNodeListContent($item, $returnXML) - { - if ($item instanceof \DOMElement) { - return $returnXML ? ($item->ownerDocument ?? $item)->saveXML($item) : $item->nodeValue; - } elseif ($item instanceof \DOMAttr) { - return $item->value; - } elseif ($item instanceof \DOMText) { - return $item->wholeText; + if ($typedResult instanceof \DOMElement) { + return $returnXML ? ($typedResult->ownerDocument ?? $typedResult)->saveXML($typedResult) : $typedResult->nodeValue; + } elseif ($typedResult instanceof \DOMAttr) { + return $typedResult->value; + } elseif ($typedResult instanceof \DOMText) { + return $typedResult->wholeText; + } elseif (is_string($typedResult)) { + return $typedResult; + } elseif (null === $typedResult) { + return ''; } - throw new \Exception('Unknown type of XPath expression result.'); - } - /** - * @param $text - * @param $escapeHtml - * @param $returnXML - * @return string - */ - protected function cleanExtractedText($text, $escapeHtml, $returnXML) - { - $text = trim($text); - - if ($escapeHtml && !$returnXML) { - $text = htmlspecialchars($text); - } - return $text; + throw new \Exception('Unknown type of XPath expression result: ' . gettype($typedResult)); } /** From f736da6faebecf9215c11674c4dce12497136281 Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Wed, 3 Apr 2024 16:23:52 +0200 Subject: [PATCH 46/97] [GolemBridge] fix for internal videos (#4051) * [GolemBridge] fix for internal videos with this internal golem-videos can be played directly from feed * Update GolemBridge.php --- bridges/GolemBridge.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index 216e913f0f2..b52d3c2f469 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -119,6 +119,16 @@ private function extractContent($page) } } + //built golem videos + foreach ($article->find('.gvideofig') as &$embedcontent) { + if (preg_match('/gvideo_(.*)/', $embedcontent->id, $videoid)) { + $embedcontent->innertext .= << + EOT; + } + } + // delete known bad elements foreach ( $article->find('div[id*="adtile"], #job-market, #seminars, iframe, @@ -142,7 +152,7 @@ private function extractContent($page) $img->src = $img->getAttribute('data-src-full'); } - foreach ($content->find('p, h1, h2, h3, img[src*="."], iframe') as $element) { + foreach ($content->find('p, h1, h2, h3, img[src*="."], iframe, video') as $element) { $item .= $element; } From 94292af51b1dd662cc9513df0f2bfacfa70dad35 Mon Sep 17 00:00:00 2001 From: User123698745 Date: Thu, 4 Apr 2024 04:07:16 +0200 Subject: [PATCH 47/97] [prtester.py] fix url parameter encoding (#4052) this will (at least) fix the pr preview of: bridges/AnisearchBridge.php bridges/BakaUpdatesMangaReleasesBridge.php bridges/DesoutterBridge.php bridges/IndiegogoBridge.php --- .github/prtester.py | 46 +++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/.github/prtester.py b/.github/prtester.py index 30a9f43b639..3d7dae99bd3 100644 --- a/.github/prtester.py +++ b/.github/prtester.py @@ -5,6 +5,7 @@ from datetime import datetime from typing import Iterable import os.path +import urllib # This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge # @@ -45,15 +46,14 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w bridgeid = bridge_card.get('id') bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata print(f'{bridgeid}{instance_suffix}') - bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html' bridge_name = bridgeid.replace('Bridge', '') context_forms = bridge_card.find_all("form") form_number = 1 for context_form in context_forms: # a bridge can have multiple contexts, named 'forms' in html - # this code will produce a fully working formstring that should create a working feed when called + # this code will produce a fully working url that should create a working feed when called # this will create an example feed for every single context, to test them all - formstring = '' + context_parameters = {} error_messages = [] context_name = '*untitled*' context_name_element = context_form.find_previous_sibling('h5') @@ -62,27 +62,27 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w parameters = context_form.find_all("input") lists = context_form.find_all("select") # this for/if mess cycles through all available input parameters, checks if it required, then pulls - # the default or examplevalue and then combines it all together into the formstring + # the default or examplevalue and then combines it all together into the url parameters # if an example or default value is missing for a required attribute, it will throw an error # any non-required fields are not tested!!! for parameter in parameters: - if parameter.get('type') == 'hidden' and parameter.get('name') == 'context': - cleanvalue = parameter.get('value').replace(" ","+") - formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue - if parameter.get('type') == 'number' or parameter.get('type') == 'text': + parameter_type = parameter.get('type') + parameter_name = parameter.get('name') + if parameter_type == 'hidden': + context_parameters[parameter_name] = parameter.get('value') + if parameter_type == 'number' or parameter_type == 'text': if parameter.has_attr('required'): if parameter.get('placeholder') == '': if parameter.get('value') == '': - name_value = parameter.get('name') - error_messages.append(f'Missing example or default value for parameter "{name_value}"') + error_messages.append(f'Missing example or default value for parameter "{parameter_name}"') else: - formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value') + context_parameters[parameter_name] = parameter.get('value') else: - formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder') - # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring - if parameter.get('type') == 'checkbox': + context_parameters[parameter_name] = parameter.get('placeholder') + # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the url parameters + if parameter_type == 'checkbox': if parameter.has_attr('checked'): - formstring = formstring + '&' + parameter.get('name') + '=on' + context_parameters[parameter_name] = 'on' for listing in lists: selectionvalue = '' listname = listing.get('name') @@ -102,15 +102,21 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w if 'selected' in selectionentry.attrs: selectionvalue = selectionentry.get('value') break - formstring = formstring + '&' + listname + '=' + selectionvalue + context_parameters[listname] = selectionvalue termpad_url = 'about:blank' if error_messages: status = '
    '.join(map(lambda m: f'❌ `{m}`', error_messages)) else: - # if all example/default values are present, form the full request string, run the request, add a tag with + # if all example/default values are present, form the full request url, run the request, add a tag with # the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and # then upload it to termpad.com, a pastebin-like-site. - response = requests.get(instance.url + bridgestring + formstring) + context_parameters.update({ + 'action': 'display', + 'bridge': bridgeid, + 'format': 'Html', + }) + request_url = f'{instance.url}/?{urllib.parse.urlencode(context_parameters)}' + response = requests.get(request_url) page_text = response.text.replace('','') page_text = page_text.encode("utf_8") soup = BeautifulSoup(page_text, "html.parser") @@ -163,8 +169,8 @@ def getFirstLine(value: str) -> str: for instance_arg in args.instances: instance_arg_parts = instance_arg.split('::') instance = Instance() - instance.name = instance_arg_parts[1] if len(instance_arg_parts) >= 2 else '' - instance.url = instance_arg_parts[0] + instance.name = instance_arg_parts[1].strip() if len(instance_arg_parts) >= 2 else '' + instance.url = instance_arg_parts[0].strip().rstrip("/") instances.append(instance) else: instance = Instance() From 82606a479a75cd4eff4740ba49715b3135c89c49 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Thu, 4 Apr 2024 04:08:29 +0200 Subject: [PATCH 48/97] [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Fix search URL, No results handling fixed, Thread title and Message URL handling (#4053) * [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Fix search URL, No results handling fixed, Thread title and Message URL handling Search URL has been updated according to the website. If a search doesn't return any results, the HTML won't contain any specific text now : the HTML structure is slightly different, so the bridge has been updated. The unnneded 'no-results' text is now removed from the specific bridges. The board thread title has been removed from the content, so now we use the page element. In case a board message is empty, there was an exception during the filtering of message without URL. * [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Fix search URL, No results handling fixed, Thread title and Message URL handling Coding policy fixes --- bridges/DealabsBridge.php | 1 - bridges/HotUKDealsBridge.php | 1 - bridges/MydealsBridge.php | 1 - bridges/PepperBridgeAbstract.php | 25 ++++++++++++++----------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/bridges/DealabsBridge.php b/bridges/DealabsBridge.php index 62d854f6235..3ee1c6f5464 100644 --- a/bridges/DealabsBridge.php +++ b/bridges/DealabsBridge.php @@ -1913,7 +1913,6 @@ class DealabsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/bons-plans?merchant-id=', 'request-error' => 'Impossible de joindre Dealabs', 'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré', - 'no-results' => 'Aucun résultat', 'currency' => '€', 'price' => 'Prix', 'shipping' => 'Livraison', diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index b631db7338b..6958220e645 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -3277,7 +3277,6 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/deals?merchant-id=', 'request-error' => 'Could not request HotUKDeals', 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', - 'no-results' => 'no results', 'currency' => '£', 'price' => 'Price', 'shipping' => 'Shipping', diff --git a/bridges/MydealsBridge.php b/bridges/MydealsBridge.php index 41bae46cd53..7b23f263936 100644 --- a/bridges/MydealsBridge.php +++ b/bridges/MydealsBridge.php @@ -2024,7 +2024,6 @@ class MydealsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/gutscheine?merchant-id=', 'request-error' => 'Could not request mydeals', 'thread-error' => 'Die ID der Diskussion kann nicht ermittelt werden. Überprüfen Sie die eingegebene URL', - 'no-results' => 'keine Ergebnisse', 'currency' => '€', 'price' => 'Preis', 'shipping' => 'Versand', diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 7b40ea1c6cf..33d427bc05b 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -55,8 +55,8 @@ protected function collectDeals($url) ); // If there is no results, we don't parse the content because it display some random deals - $noresult = $html->find('h3[class*=text--b]', 0); - if ($noresult != null && strpos($noresult->plaintext, $this->i8n('no-results')) !== false) { + $noresult = $html->find('section[class=subNav]', 0)->find('div[class*=page-center listLayout aGrid]', 0); + if ($noresult === null) { $this->items = []; } else { foreach ($list as $deal) { @@ -174,13 +174,16 @@ protected function collectDataTalk() $item['uid'] = $comment->commentId; // Timestamp handling needs a new parsing function if ($onlyWithUrl == true) { - // Count Links and Quote Links - $content = str_get_html($item['content']); - $countLinks = count($content->find('a[href]')); - $countQuoteLinks = count($content->find('a[href][class=userHtml-quote-source]')); - // Only add element if there are Links ans more links tant Quote links - if ($countLinks > 0 && $countLinks > $countQuoteLinks) { - $this->items[] = $item; + // Only parse the comment if it is not empry + if ($item['content'] != '') { + // Count Links and Quote Links + $content = str_get_html($item['content']); + $countLinks = count($content->find('a[href]')); + $countQuoteLinks = count($content->find('a[href][class=userHtml-quote-source]')); + // Only add element if there are Links and more links tant Quote links + if ($countLinks > 0 && $countLinks > $countQuoteLinks) { + $this->items[] = $item; + } } } else { $this->items[] = $item; @@ -264,7 +267,7 @@ private function getTitle($jsonDealData) private function getTalkTitle() { $html = getSimpleHTMLDOMCached($this->getInput('url')); - $title = $html->find('.thread-title', 0)->plaintext; + $title = $html->find('title', 0)->plaintext; return $title; } @@ -472,7 +475,7 @@ private function getSearchURI() $priceFrom = $this->getInput('priceFrom'); $priceTo = $this->getInput('priceTo'); $url = $this->i8n('bridge-uri') - . 'search/advanced?q=' + . 'search?q=' . urlencode($q) . '&hide_expired=' . $hide_expired . '&hide_local=' . $hide_local From 3cba984d22d6b1e045781f208211c4fb336c2bf0 Mon Sep 17 00:00:00 2001 From: Dag <me@dvikan.no> Date: Thu, 4 Apr 2024 17:43:07 +0200 Subject: [PATCH 49/97] fix(FDroidRepoBridge): unlink when json file is absent from archive (#4056) --- bridges/FDroidRepoBridge.php | 110 +++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 51 deletions(-) diff --git a/bridges/FDroidRepoBridge.php b/bridges/FDroidRepoBridge.php index 7ce41bafe5c..286ada1b183 100644 --- a/bridges/FDroidRepoBridge.php +++ b/bridges/FDroidRepoBridge.php @@ -49,7 +49,7 @@ public function collectData() throw new \Exception('FDroidRepoBridge requires the php-zip extension'); } - $this->repo = $this->getRepo(); + $this->repo = $this->fetchData(); switch ($this->queriedContext) { case 'Latest Updates': $this->getAllUpdates(); @@ -58,63 +58,40 @@ public function collectData() $this->getPackage($this->getInput('package')); break; default: - returnServerError('Unimplemented Context (collectData)'); + throw new \Exception('Unimplemented Context (collectData)'); } } - public function getURI() - { - if (empty($this->queriedContext)) { - return parent::getURI(); - } - - $url = rtrim($this->GetInput('url'), '/'); - return strstr($url, '?', true) ?: $url; - } - - public function getName() - { - if (empty($this->queriedContext)) { - return parent::getName(); - } - - $name = $this->repo['repo']['name']; - switch ($this->queriedContext) { - case 'Latest Updates': - return $name; - case 'Follow Package': - return $this->getInput('package') . ' - ' . $name; - default: - returnServerError('Unimplemented Context (getName)'); - } - } - - private function getRepo() + /** + * This method fetches data from arbitrary url and writes to os temp file. + * I don't think there's any security problem here but might be DOS problems. + */ + private function fetchData() { $url = $this->getURI(); - // Get repo information (only available as JAR) - $jar = getContents($url . '/index-v1.jar'); - $jar_loc = tempnam(sys_get_temp_dir(), ''); - file_put_contents($jar_loc, $jar); + $zipFile = getContents($url . '/index-v1.jar'); + // On linux this creates a temp file in /tmp/ + $temporaryFile = tempnam(sys_get_temp_dir(), 'rssbridge_'); + file_put_contents($temporaryFile, $zipFile); - // JAR files are specially formatted ZIP files - $jar = new \ZipArchive(); - if ($jar->open($jar_loc) !== true) { - unlink($jar_loc); + $archive = new \ZipArchive(); + if ($archive->open($temporaryFile) !== true) { + unlink($temporaryFile); throw new \Exception('Failed to extract archive'); } - // Get file pointer to the relevant JSON inside - $fp = $jar->getStream('index-v1.json'); + $fp = $archive->getStream('index-v1.json'); if (!$fp) { - returnServerError('Failed to get file pointer'); + unlink($temporaryFile); + throw new \Exception('Failed to get file pointer'); } - $data = json_decode(stream_get_contents($fp), true); + $json = stream_get_contents($fp); fclose($fp); - $jar->close(); - unlink($jar_loc); + $data = Json::decode($json); + $archive->close(); + unlink($temporaryFile); return $data; } @@ -158,9 +135,9 @@ private function getAllUpdates() $summary = $lang['summary'] ?? $app['summary'] ?? ''; $description = markdownToHtml(trim($lang['description'] ?? $app['description'] ?? 'None')); $whatsNew = markdownToHtml(trim($lang['whatsNew'] ?? 'None')); - $website = $this->link($lang['webSite'] ?? $app['webSite'] ?? $app['authorWebSite'] ?? null); - $source = $this->link($app['sourceCode'] ?? null); - $issueTracker = $this->link($app['issueTracker'] ?? null); + $website = $this->createAnchor($lang['webSite'] ?? $app['webSite'] ?? $app['authorWebSite'] ?? null); + $source = $this->createAnchor($app['sourceCode'] ?? null); + $issueTracker = $this->createAnchor($app['issueTracker'] ?? null); $license = $app['license'] ?? 'None'; $item['content'] = <<<EOD {$icon} @@ -182,7 +159,7 @@ private function getAllUpdates() private function getPackage($package) { if (!isset($this->repo['packages'][$package])) { - returnClientError('Invalid Package Name'); + throw new \Exception('Invalid Package Name'); } $package = $this->repo['packages'][$package]; @@ -192,7 +169,7 @@ private function getPackage($package) $item['uri'] = $this->getURI() . '/' . $version['apkName']; $item['title'] = $version['versionName']; $item['timestamp'] = date(DateTime::ISO8601, (int) ($version['added'] / 1000)); - $item['uid'] = $version['versionCode']; + $item['uid'] = (string) $version['versionCode']; $size = round($version['size'] / 1048576, 1); // Bytes -> MB $sdk_link = 'https://developer.android.com/studio/releases/platforms'; $item['content'] = <<<EOD @@ -208,11 +185,42 @@ private function getPackage($package) } } - private function link($url) + public function getURI() + { + if (empty($this->queriedContext)) { + return parent::getURI(); + } + + $url = rtrim($this->getInput('url'), '/'); + if (strstr($url, '?', true)) { + return strstr($url, '?', true); + } else { + return $url; + } + } + + public function getName() + { + if (empty($this->queriedContext)) { + return parent::getName(); + } + + $name = $this->repo['repo']['name']; + switch ($this->queriedContext) { + case 'Latest Updates': + return $name; + case 'Follow Package': + return $this->getInput('package') . ' - ' . $name; + default: + throw new \Exception('Unimplemented Context (getName)'); + } + } + + private function createAnchor($url) { if (empty($url)) { return null; } - return '<a href="' . $url . '">' . $url . '</a>'; + return sprintf('<a href="%s">%s</a>', $url, $url); } } From 001dd47439339672b6e84979fd0e1a0118bc27b6 Mon Sep 17 00:00:00 2001 From: Dag <me@dvikan.no> Date: Thu, 4 Apr 2024 19:12:04 +0200 Subject: [PATCH 50/97] fix: small tweaks (#4057) --- bridges/FeedMergeBridge.php | 5 +++- bridges/GatesNotesBridge.php | 4 +++ bridges/PixivBridge.php | 47 +++--------------------------------- lib/FeedExpander.php | 1 + 4 files changed, 13 insertions(+), 44 deletions(-) diff --git a/bridges/FeedMergeBridge.php b/bridges/FeedMergeBridge.php index f2c1d9d5a2c..37b574b6720 100644 --- a/bridges/FeedMergeBridge.php +++ b/bridges/FeedMergeBridge.php @@ -64,6 +64,7 @@ public function collectData() $this->collectExpandableDatas($feed); } catch (HttpException $e) { $this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e))); + // This feed item might be spammy. Considering dropping it. $this->items[] = [ 'title' => 'RSS-Bridge: ' . $e->getMessage(), // Give current time so it sorts to the top @@ -71,7 +72,7 @@ public function collectData() ]; continue; } catch (\Exception $e) { - if (str_starts_with($e->getMessage(), 'Unable to parse xml')) { + if (str_starts_with($e->getMessage(), 'Failed to parse xml')) { // Allow this particular exception from FeedExpander $this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e))); continue; @@ -83,6 +84,8 @@ public function collectData() } } + // If $this->items is empty we should consider throw exception here + // Sort by timestamp descending usort($this->items, function ($a, $b) { $t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title']; diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 3381e096aca..b46b3ce663a 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -21,6 +21,10 @@ public function collectData() $rawContent = getContents($apiUrl); $cleanedContent = trim($rawContent, '"'); + $cleanedContent = str_replace([ + '<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/">', + '</string>' + ], '', $cleanedContent); $cleanedContent = str_replace('\r\n', "\n", $cleanedContent); $cleanedContent = stripslashes($cleanedContent); diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php index fc4443ed2d1..604b5d4bed3 100644 --- a/bridges/PixivBridge.php +++ b/bridges/PixivBridge.php @@ -160,7 +160,8 @@ private function getDataFromJSON($json, $json_key) $json = array_reduce($json, function ($acc, $i) { if ($i['illustType'] === 0) { $acc[] = $i; - }return $acc; + } + return $acc; }, []); break; case 'manga': @@ -235,8 +236,10 @@ public function collectData() $item = []; $item['uid'] = $result['id']; + $subpath = array_key_exists('illustType', $result) ? 'artworks/' : 'novel/show.php?id='; $item['uri'] = static::URI . $subpath . $result['id']; + $item['title'] = $result['title']; $item['author'] = $result['userName']; $item['timestamp'] = $result['updateDate']; @@ -253,8 +256,6 @@ public function collectData() } } else { $img_url = $result['url']; - // Temporarily disabling caching of the image - //$img_url = $this->cacheImage($result['url'], $result['id'], array_key_exists('illustType', $result)); } // Currently, this might result in broken image due to their strict referrer check @@ -271,46 +272,6 @@ public function collectData() } } - /** - * todo: remove manual file cache - * See bridge specific documentation for alternative option. - */ - private function cacheImage($url, $illustId, $isImage) - { - $illustId = preg_replace('/[^0-9]/', '', $illustId); - $thumbnailurl = $url; - - $path = PATH_CACHE . 'pixiv_img/'; - if (!is_dir($path)) { - mkdir($path, 0755, true); - } - - $path .= $illustId; - if ($this->getInput('fullsize')) { - $path .= '_fullsize'; - } - $path .= '.jpg'; - - if (!is_file($path)) { - // Get fullsize URL - if ($isImage && $this->getInput('fullsize')) { - $ajax_uri = static::URI . 'ajax/illust/' . $illustId; - $imagejson = $this->getData($ajax_uri, true, true); - $url = $imagejson['body']['urls']['original']; - } - - $headers = ['Referer: ' . static::URI]; - try { - $illust = $this->getData($url, true, false, $headers); - } catch (Exception $e) { - $illust = $this->getData($thumbnailurl, true, false, $headers); // Original thumbnail - } - file_put_contents($path, $illust); - } - - return get_home_page_url() . 'cache/pixiv_img/' . preg_replace('/.*\//', '', $path); - } - private function checkOptions() { $proxy = $this->getOption('proxy_url'); diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index abe964e147a..fe809bc259b 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -34,6 +34,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1) try { $this->feed = $feedParser->parseFeed($xmlString); } catch (\Exception $e) { + // FeedMergeBridge relies on this string throw new \Exception(sprintf('Failed to parse xml from %s: %s', $url, create_sane_exception_message($e))); } From 3ff2ef94e00d2edd52afb6795e9060538e63e4d5 Mon Sep 17 00:00:00 2001 From: sysadminstory <sysadminstory@users.noreply.github.com> Date: Thu, 4 Apr 2024 19:28:56 +0200 Subject: [PATCH 51/97] Fix docs : Replace relative links to files with full URL (#4059) --- docs/06_Helper_functions/index.md | 39 +++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/docs/06_Helper_functions/index.md b/docs/06_Helper_functions/index.md index 3aaeed89207..2b675ca3fd2 100644 --- a/docs/06_Helper_functions/index.md +++ b/docs/06_Helper_functions/index.md @@ -8,6 +8,8 @@ $this->getInput('your input name here'); `getInput` will either return the value for your parameter or `null` if the parameter is unknown or not specified. +[Defined in lib/BridgeAbstract.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/BridgeAbstract.php) + # getKey The `getKey` function is used to receive the key name to a selected list value given the name of the list, specified in `const PARAMETERS` @@ -39,6 +41,8 @@ $this->getKey('country'); `getKey` will either return the key name for your parameter or `null` if the parameter is unknown or not specified. +[Defined in lib/BridgeAbstract.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/BridgeAbstract.php) + # getContents The `getContents` function uses [cURL](https://secure.php.net/manual/en/book.curl.php) to acquire data from the specified URI while respecting the various settings defined at a global level by RSS-Bridge (i.e., proxy host, user agent, etc.). This function accepts a few parameters: @@ -55,6 +59,8 @@ $opts = array(CURLOPT_POST => 1); $html = getContents($url, $header, $opts); ``` +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # getSimpleHTMLDOM The `getSimpleHTMLDOM` function is a wrapper for the [simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design. @@ -62,6 +68,9 @@ The `getSimpleHTMLDOM` function is a wrapper for the ```PHP $html = getSimpleHTMLDOM('your URI'); ``` + +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # getSimpleHTMLDOMCached The `getSimpleHTMLDOMCached` function does the same as the [`getSimpleHTMLDOM`](#getsimplehtmldom) function, @@ -76,6 +85,8 @@ This function allows to specify the cache duration with the second parameter. $html = getSimpleHTMLDOMCached('your URI', 86400); // Duration 24h ``` +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # returnClientError The `returnClientError` function aborts execution of the current bridge and returns the given error message with error code **400**: @@ -86,6 +97,8 @@ returnClientError('Your error message') Use this function when the user provided invalid parameter or a required parameter is missing. +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + # returnServerError The `returnServerError` function aborts execution of the current bridge and returns the given error message with error code **500**: @@ -96,6 +109,8 @@ returnServerError('Your error message') Use this function when a problem occurs that has nothing to do with the parameters provided by the user. (like: Host service gone missing, empty data received, etc...) +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + # defaultLinkTo Automatically replaces any relative URL in a given string or DOM object (i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL. @@ -122,6 +137,8 @@ $html = defaultLinkTo($html, $this->getURI()); // Using bridge URL // <img src="https://www.github.com/rss-bridge/rss-bridge/blob/master/README.md"> ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # backgroundToImg Replaces tags with styles of `backgroud-image` by `<img />` tags. @@ -131,6 +148,8 @@ backgroundToImg(mixed $htmlContent) : object Returns a DOM object (even if provided a string). +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # extractFromDelimiters Extract the first part of a string matching the specified start and end delimiters. ```php @@ -151,6 +170,8 @@ $extracted = extractFromDelimiters($string, $start, $end); // 'John Doe' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # stripWithDelimiters Remove one or more part(s) of a string using a start and end delimiter. It is the inverse of `extractFromDelimiters`. @@ -173,6 +194,8 @@ $cleaned = stripWithDelimiters($string, $start, $end); // 'foobar' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # stripRecursiveHTMLSection Remove HTML sections containing one or more sections using the same HTML tag. @@ -192,6 +215,8 @@ $cleaned = stripRecursiveHTMLSection($string, $tag_name, $tag_start); // 'foobar' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # markdownToHtml Converts markdown input to HTML using [Parsedown](https://parsedown.org/). @@ -234,6 +259,7 @@ $html = markdownToHtml($input); // </ul> ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) # e The `e` function is used to convert special characters to HTML entities @@ -244,7 +270,7 @@ e('0 < 1 and 2 > 1'); `e` will return the content of the string escape that can be rendered as is in HTML -[Defined in lib/html.php](/lib/html.php) +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) # truncate The `truncate` function is used to shorten a string if exceeds a certain length, and add a string indicating that the string has been shortened. @@ -253,7 +279,7 @@ The `truncate` function is used to shorten a string if exceeds a certain length, truncate('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed a neque nunc. Nam nibh sem.', 20 , '...'); ``` -[Defined in lib/html.php](/lib/html.php) +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) # sanitize The `sanitize` function is used to remove some tags from a given HTML text. @@ -272,7 +298,7 @@ sanitize($html, $tags_to_remove, $attributes_to_keep, $text_to_keep); This function returns a simplehtmldom object of the remaining contents. -[Defined in lib/html.php](/lib/html.php) +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) # convertLazyLoading The `convertLazyLoading` function is used to convert onvert lazy-loading images and frames (video embeds) into static elements. It accepts the HTML content as HTML objects or string objects. It returns the HTML content with fixed image/frame URLs (same type as input). @@ -286,8 +312,7 @@ $html = '<html> backgroundToImg($html); ``` -[Defined in lib/html.php](/lib/html.php) - +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) # Json::encode The `Json::encode` function is used to encode a value as à JSON string. @@ -300,7 +325,7 @@ $array = [ Json::encode($array, true, true); ``` -[Defined in lib/utils.php](/lib/utils.php) +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) # Json::decode The `Json::decode` function is used to decode a JSON string into à PHP variable. @@ -313,4 +338,4 @@ $json = '{ Json::decode($json); ``` -[Defined in lib/utils.php](/lib/utils.php) +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) From d5aa3aef699b400d5b40d61ee45c4476f0fcb38e Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Fri, 5 Apr 2024 11:31:30 +0200 Subject: [PATCH 52/97] [FDroidRepoBridge] Fix example repo The ttrss example/placeholder repo is offline, which fails CI jobs. Replace it with a healthy repo and package to get working CI tests and comparisons. --- bridges/FDroidRepoBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/FDroidRepoBridge.php b/bridges/FDroidRepoBridge.php index 286ada1b183..b3fd146e323 100644 --- a/bridges/FDroidRepoBridge.php +++ b/bridges/FDroidRepoBridge.php @@ -14,7 +14,7 @@ class FDroidRepoBridge extends BridgeAbstract 'name' => 'Repository URL', 'title' => 'Usually ends with /repo/', 'required' => true, - 'exampleValue' => 'https://srv.tt-rss.org/fdroid/repo' + 'exampleValue' => 'https://molly.im/fdroid/foss/fdroid/repo' ] ], 'Latest Updates' => [ @@ -35,7 +35,7 @@ class FDroidRepoBridge extends BridgeAbstract 'package' => [ 'name' => 'Package Identifier', 'required' => true, - 'exampleValue' => 'org.fox.ttrss' + 'exampleValue' => 'im.molly.app' ] ] ]; From b3ac1d176ce9f4778986190702216f0f25918a70 Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Fri, 5 Apr 2024 17:39:38 +0200 Subject: [PATCH 53/97] [FDroidRepoBridge] Simplify json retrieval (#4063) * [FDroidRepoBridge] Simplify json retrieval I looked into avoiding the writing-to-file and then reading-from-file altogether. Using a special file path that leaves the data in memory probably wouldn't work. But I'm unsure why we use the `index-v1.jar` file altogether. The main F-Droid repo [lists](https://f-droid.org/en/docs/All_our_APIs/#the-repo-index) not only `index-v1.jar` (which only makes sense if we were to use the contained signature, which we don't), but also `index-v1.json` and `index-v2.json`. These json files can be fetched with `getContents`, optionally cached, and directly fed into `Json::decode` without using a temporary file. The HTTP transfer encoding can compress the file to a similar degree the jar (=zip) can. That's exactly what this commit uses. Now the question is whether all the F-Droid repositories out there have this file. I went through the whole [list of known repositories](https://forum.f-droid.org/t/known-repositories/721) and only one repo misses the `index-v1.json` file: [Bromite](https://fdroid.bromite.org/fdroid/repo/index-v1.json). Under these circumstances we can depend on the availability of the `index-v1.json` file. Closes #4062 * [FDroidRepoBridge] Cleanup not requiring Zip With the last commit 1152386678151aeafd984061d34248023378bf64, the zip extension is not required anymore. Don't fail if it's not available. --- bridges/FDroidRepoBridge.php | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/bridges/FDroidRepoBridge.php b/bridges/FDroidRepoBridge.php index b3fd146e323..844f6abbb2c 100644 --- a/bridges/FDroidRepoBridge.php +++ b/bridges/FDroidRepoBridge.php @@ -45,10 +45,6 @@ class FDroidRepoBridge extends BridgeAbstract public function collectData() { - if (!extension_loaded('zip')) { - throw new \Exception('FDroidRepoBridge requires the php-zip extension'); - } - $this->repo = $this->fetchData(); switch ($this->queriedContext) { case 'Latest Updates': @@ -62,36 +58,11 @@ public function collectData() } } - /** - * This method fetches data from arbitrary url and writes to os temp file. - * I don't think there's any security problem here but might be DOS problems. - */ private function fetchData() { $url = $this->getURI(); - - $zipFile = getContents($url . '/index-v1.jar'); - // On linux this creates a temp file in /tmp/ - $temporaryFile = tempnam(sys_get_temp_dir(), 'rssbridge_'); - file_put_contents($temporaryFile, $zipFile); - - $archive = new \ZipArchive(); - if ($archive->open($temporaryFile) !== true) { - unlink($temporaryFile); - throw new \Exception('Failed to extract archive'); - } - - $fp = $archive->getStream('index-v1.json'); - if (!$fp) { - unlink($temporaryFile); - throw new \Exception('Failed to get file pointer'); - } - - $json = stream_get_contents($fp); - fclose($fp); + $json = getContents($url . '/index-v1.json'); $data = Json::decode($json); - $archive->close(); - unlink($temporaryFile); return $data; } From 4602f4f475d938202de6c65214b321d41d0a93e8 Mon Sep 17 00:00:00 2001 From: Dag <me@dvikan.no> Date: Sat, 6 Apr 2024 18:07:45 +0200 Subject: [PATCH 54/97] tweaks (#4065) --- bridges/FilterBridge.php | 2 +- bridges/TrelloBridge.php | 34 ++++++++++------------------------ bridges/TwitchBridge.php | 2 +- formats/HtmlFormat.php | 2 +- lib/BridgeAbstract.php | 9 +++++++++ lib/contents.php | 2 ++ 6 files changed, 24 insertions(+), 27 deletions(-) diff --git a/bridges/FilterBridge.php b/bridges/FilterBridge.php index 1add47f49cf..3448a8c7cfe 100644 --- a/bridges/FilterBridge.php +++ b/bridges/FilterBridge.php @@ -77,7 +77,7 @@ public function collectData() { $url = $this->getInput('url'); if (!Url::validate($url)) { - returnClientError('The url parameter must either refer to http or https protocol.'); + throw new \Exception('The url parameter must either refer to http or https protocol.'); } $this->collectExpandableDatas($this->getURI()); } diff --git a/bridges/TrelloBridge.php b/bridges/TrelloBridge.php index cab2bde2880..42651fd13fc 100644 --- a/bridges/TrelloBridge.php +++ b/bridges/TrelloBridge.php @@ -553,10 +553,8 @@ class TrelloBridge extends BridgeAbstract private function queryAPI($path, $params = []) { - $data = json_decode(getContents('https://trello.com/1/' - . $path - . '?' - . http_build_query($params))); + $url = 'https://trello.com/1/' . $path . '?' . http_build_query($params); + $data = json_decode(getContents($url)); return $data; } @@ -576,33 +574,21 @@ private function renderAction($action, $textOnly = false) && !$textOnly && isset($entity->originalUrl) ) { - $string = '<p><a href="' - . $entity->originalUrl - . '"><img src="' - . $entity->previewUrl - . '"></a></p>'; + $string = sprintf( + '<p><a href="%s"><img src="%s"></a></p>', + $entity->originalUrl, + $entity->previewUrl ?? '' + ); } elseif ($type === 'card' && !$textOnly) { - $string = '<a href="https://trello.com/c/' - . $entity->shortLink - . '">' - . $entity->text - . '</a>'; + $string = sprintf('<a href="https://trello.com/c/%s">%s</a>', $entity->shortLink, $entity->text); } elseif ($type === 'member' && !$textOnly) { - $string = '<a href="https://trello.com/' - . $entity->username - . '">' - . $entity->text - . '</a>'; + $string = sprintf('<a href="https://trello.com/%s">%s</a>', $entity->username, $entity->text); } elseif ($type === 'date') { $string = gmdate('M j, Y \a\t g:i A T', strtotime($entity->date)); } elseif ($type === 'translatable') { $string = self::ACTION_TEXTS[$entity->translationKey]; } else { - if (isset($entity->text)) { - $string = $entity->text; - } else { - $string = ''; - } + $string = $entity->text ?? ''; } $strings['{' . $entity_name . '}'] = $string; } diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index 9e70944e1fa..424cd6e3b20 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -99,7 +99,7 @@ public function collectData() $user = $data->user; if ($user->videos === null) { // twitch regularly does this for unknown reasons - $this->logger->info('Twitch returned empty set of videos', ['data' => $data]); + //$this->logger->info('Twitch returned empty set of videos', ['data' => $data]); return; } diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 1e2f60e62cf..37ef3a930db 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -6,7 +6,7 @@ class HtmlFormat extends FormatAbstract public function stringify() { - // This query string comes in already url decoded + // This query string is url encoded $queryString = $_SERVER['QUERY_STRING']; $feedArray = $this->getFeed(); diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 1456e1c3e24..2467dec60e1 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -6,8 +6,17 @@ abstract class BridgeAbstract const URI = ''; const DONATION_URI = ''; const DESCRIPTION = 'No description provided'; + + /** + * Preferably a github username + */ const MAINTAINER = 'No maintainer'; + + /** + * Cache TTL in seconds + */ const CACHE_TIMEOUT = 3600; + const CONFIGURATION = []; const PARAMETERS = []; const TEST_DETECT_PARAMETERS = []; diff --git a/lib/contents.php b/lib/contents.php index 43db8c031dc..ba6dd531a10 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -17,6 +17,8 @@ function getContents( $httpClient = RssBridge::getHttpClient(); $cache = RssBridge::getCache(); + // TODO: consider url validation at this point + $httpHeadersNormalized = []; foreach ($httpHeaders as $httpHeader) { $parts = explode(':', $httpHeader); From 7d6881732dababa6a3188128e40227421cd8c972 Mon Sep 17 00:00:00 2001 From: July <phantop@tuta.io> Date: Sun, 7 Apr 2024 17:02:36 -0400 Subject: [PATCH 55/97] [ScribbleHubBridge] Add list page feed creation (#4012) * [ScribbleHubBridge] Add list page feed creation * [ScribbleHubBridge] Add list title handling * [ScribbleHubBridge] Don't include timestamp in List GUIDs * [ScribbleHubBridge] Fix usage of dynamic property --- bridges/ScribbleHubBridge.php | 60 ++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/bridges/ScribbleHubBridge.php b/bridges/ScribbleHubBridge.php index 0f7c7a6c7ff..60add80242f 100644 --- a/bridges/ScribbleHubBridge.php +++ b/bridges/ScribbleHubBridge.php @@ -23,6 +23,14 @@ class ScribbleHubBridge extends FeedExpander // Example: latest chapters from Uskweirs 'exampleValue' => '965299', ], + ], + 'List' => [ + 'url' => [ + 'name' => 'url', + 'required' => true, + // Example: latest stories with the 'Transgender' tag + 'exampleValue' => 'https://www.scribblehub.com/series-finder/?sf=1&gi=6&tgi=1088&sort=dateadded', + ], ] ]; @@ -34,6 +42,10 @@ public function getIcon() public function collectData() { $url = 'https://rssscribblehub.com/rssfeed.php?type='; + if ($this->queriedContext === 'List') { + $this->collectList($this->getURI()); + return; + } if ($this->queriedContext === 'Author') { $url = $url . 'author&uid=' . $this->getInput('uid'); } else { //All and Series use the same source feed @@ -42,6 +54,44 @@ public function collectData() $this->collectExpandableDatas($url); } + protected $author = ''; + + private function collectList($url) + { + $html = getSimpleHTMLDOMCached($url); + foreach ($html->find('.search_main_box') as $element) { + $item = []; + + $title = $element->find('.search_title a', 0); + $item['title'] = $title->plaintext; + $item['uri'] = $title->href; + + $strdate = $element->find('[title="Last Updated"]', 0)->plaintext; + $item['timestamp'] = strtotime($strdate); + $item['uid'] = $item['uri']; + + $details = getSimpleHTMLDOMCached($item['uri']); + $item['enclosures'][] = $details->find('.fic_image img', 0)->src; + $item['content'] = $details->find('.wi_fic_desc', 0); + + foreach ($details->find('.fic_genre') as $tag) { + $item['categories'][] = $tag->plaintext; + } + foreach ($details->find('.stag') as $tag) { + $item['categories'][] = $tag->plaintext; + } + + $read_url = $details->find('.read_buttons a', 0)->href; + $read_html = getSimpleHTMLDOMCached($read_url); + $item['content'] .= '<hr><h3>'; + $item['content'] .= $read_html->find('.chapter-title', 0); + $item['content'] .= '</h3>'; + $item['content'] .= $read_html->find('#chp_raw', 0); + + $this->items[] = $item; + } + } + protected function parseItem(array $item) { //For series, filter out other series from 'All' feed @@ -102,12 +152,17 @@ public function getName() } catch (HttpException $e) { // 403 Forbidden, This means we got anti-bot response if ($e->getCode() === 403) { - return $item; + return $name; } throw $e; } $title = html_entity_decode($page->find('.fic_title', 0)->plaintext); break; + case 'List': + $page = getSimpleHTMLDOMCached($this->getURI()); + $title = $page->find('head > title', 0)->plaintext; + $title = explode(' |', $title)[0]; + break; } if (isset($title)) { $name .= " - $title"; @@ -125,6 +180,9 @@ public function getURI() case 'Series': $uri = self::URI . 'series/' . $this->getInput('sid') . '/a'; break; + case 'List': + $uri = $this->getInput('url'); + break; } return $uri; } From 815dc180ccd9298015a2e6c55aaf05a189259e6c Mon Sep 17 00:00:00 2001 From: sysadminstory <sysadminstory@users.noreply.github.com> Date: Wed, 10 Apr 2024 17:30:56 +0200 Subject: [PATCH 56/97] [PicukiBridge] Fix image URL (#4068) Image URL does not need to be faked anymore, as the content/type is now valid. --- bridges/PicukiBridge.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/bridges/PicukiBridge.php b/bridges/PicukiBridge.php index f1d45e2acd8..5f1096b8470 100644 --- a/bridges/PicukiBridge.php +++ b/bridges/PicukiBridge.php @@ -89,9 +89,6 @@ public function collectData() $imageUrlParts[count($imageUrlParts) - 1] = urlencode($imageUrlParts[count($imageUrlParts) - 1]); $imageUrl = implode('/', $imageUrlParts); - // add fake file extension for it to be recognized as image/jpeg instead of application/octet-stream - $imageUrl = $imageUrl . '#.jpg'; - $this->items[] = [ 'uri' => $url, 'author' => $author, From a73b66f4d63766e42e6c772fe5d096e11de2a753 Mon Sep 17 00:00:00 2001 From: Dag <me@dvikan.no> Date: Wed, 10 Apr 2024 18:32:48 +0200 Subject: [PATCH 57/97] fix(ScientificAmericanBridge) (#4070) --- bridges/ScientificAmericanBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/ScientificAmericanBridge.php b/bridges/ScientificAmericanBridge.php index d575bf9488f..da52e0ad645 100644 --- a/bridges/ScientificAmericanBridge.php +++ b/bridges/ScientificAmericanBridge.php @@ -66,7 +66,7 @@ private function collectFeed() private function collectIssues() { $html = getSimpleHTMLDOMCached(self::ISSUES); - $content = $html->getElementById('content')->children(3); + $content = $html->getElementById('app')->children(3); $issues = $content->children(); $issues_count = min( (int)$this->getInput('parseIssues'), @@ -125,6 +125,7 @@ private function parseIssue($issue_link) private function updateItem($item) { + return $item; $html = getSimpleHTMLDOMCached($item['uri']); $article = $html->find('#sa_body', 0)->find('article', 0); From 58c254ad3bd3f0c9a3e01dc8b4011996dffbaf75 Mon Sep 17 00:00:00 2001 From: Miika Launiainen <miika@miicat.eu> Date: Thu, 11 Apr 2024 18:18:37 +0300 Subject: [PATCH 58/97] [YorushikaBridge] Add language selection parameter (#4073) * Add language selection parameter * Fix typo * Fix lint errors --- bridges/YorushikaBridge.php | 50 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/bridges/YorushikaBridge.php b/bridges/YorushikaBridge.php index 12d02f1f88d..d528999a49a 100644 --- a/bridges/YorushikaBridge.php +++ b/bridges/YorushikaBridge.php @@ -7,6 +7,20 @@ class YorushikaBridge extends BridgeAbstract const DESCRIPTION = 'Return news from Yorushika\'s offical website'; const MAINTAINER = 'Miicat_47'; const PARAMETERS = [ + 'global' => [ + 'lang' => [ + 'name' => 'Language', + 'defaultValue' => 'jp', + 'type' => 'list', + 'values' => [ + '日本語' => 'jp', + 'English' => 'en', + '한국어' => 'ko', + '中文(繁體字)' => 'zh-tw', + '中文(簡体字)' => 'zh-cn', + ] + ], + ], 'All categories' => [ ], 'Only selected categories' => [ @@ -27,6 +41,28 @@ class YorushikaBridge extends BridgeAbstract public function collectData() { + $url = 'https://yorushika.com/news/5/'; + switch ($this->getInput('lang')) { + case 'jp': + $url = 'https://yorushika.com/news/5/'; + break; + case 'en': + $url = 'https://yorushika.com/news/5/?lang=en'; + break; + case 'ko': + $url = 'https://yorushika.com/news/5/?lang=ko'; + break; + case 'zh-tw': + $url = 'https://yorushika.com/news/5/?lang=zh-tw'; + break; + case 'zh-cn': + $url = 'https://yorushika.com/news/5/?lang=zh-cn'; + break; + default: + $url = 'https://yorushika.com/news/5/'; + break; + } + $categories = []; if ($this->queriedContext == 'All categories') { array_push($categories, 'all'); @@ -42,7 +78,7 @@ public function collectData() } } - $html = getSimpleHTMLDOM('https://yorushika.com/news/5/')->find('.list--news', 0); + $html = getSimpleHTMLDOM($url)->find('.list--news', 0); $html = defaultLinkTo($html, $this->getURI()); foreach ($html->find('.inview') as $art) { @@ -62,10 +98,16 @@ public function collectData() $url = $art->find('a.clearfix', 0)->href; // Get article date - $exp_date = '/\d+\.\d+\.\d+/'; $date = $art->find('.date', 0)->plaintext; - preg_match($exp_date, $date, $matches); - $date = date_create_from_format('Y.m.d', $matches[0]); + if (preg_match('/(\d)年(\d)月(\d)/', $date, $matches)) { + // Some dates will contain Chinese characters, remove those from the string + $formattedDate = sprintf('%d.%02d.%02d', $matches[1], $matches[2], $matches[3]); + } else { + // Assume the date is already in 'Y.m.d' format + preg_match('/\d+\.\d+\.\d+/', $date, $matches); + $formattedDate = $matches[0]; + } + $date = date_create_from_format('Y.m.d', $formattedDate); $date = date_format($date, 'd.m.Y'); // Get article info From 428c6c3c66b0f2730a8eb899d7a5768cdd279777 Mon Sep 17 00:00:00 2001 From: Korytov Pavel <thexcloud@gmail.com> Date: Fri, 12 Apr 2024 02:57:55 +0300 Subject: [PATCH 59/97] [ScientificAmericanBridge] Update bridge (#4074) * [ScientificAmericanBridge] Update bridge * [ScientificAmericanBridge] Fix lint --- bridges/ScientificAmericanBridge.php | 131 ++++++++++++--------------- 1 file changed, 60 insertions(+), 71 deletions(-) diff --git a/bridges/ScientificAmericanBridge.php b/bridges/ScientificAmericanBridge.php index da52e0ad645..51cdc0d9f2e 100644 --- a/bridges/ScientificAmericanBridge.php +++ b/bridges/ScientificAmericanBridge.php @@ -25,7 +25,7 @@ class ScientificAmericanBridge extends FeedExpander ]; const FEED = 'http://rss.sciam.com/ScientificAmerican-Global'; - const ISSUES = 'https://www.scientificamerican.com/archive/issues/2020s/'; + const ISSUES = 'https://www.scientificamerican.com/archive/issues/'; public function collectData() { @@ -50,7 +50,7 @@ public function collectData() if ($this->getInput('addContents') == 1) { usort($this->items, function ($item1, $item2) { - return $item1['timestamp'] - $item2['timestamp']; + return $item2['timestamp'] - $item1['timestamp']; }); } } @@ -66,8 +66,12 @@ private function collectFeed() private function collectIssues() { $html = getSimpleHTMLDOMCached(self::ISSUES); - $content = $html->getElementById('app')->children(3); - $issues = $content->children(); + $content = $html->getElementById('app'); + $issues_list = $content->find('div[class^="issue__list"]', 0); + if ($issues_list == null) { + return []; + } + $issues = $issues_list->find('div[class^="list__item"]'); $issues_count = min( (int)$this->getInput('parseIssues'), count($issues) @@ -87,36 +91,19 @@ private function parseIssue($issue_link) $items = []; $html = getSimpleHTMLDOMCached($issue_link); - $features = $html->find('[class^=Detail_issue__article__previews__featured]', 0); - if ($features != null) { - $articles = $features->find('div', 0)->children(); + $blocks = $html->find('[class^="issueArchiveArticleListCompact"]'); + foreach ($blocks as $block) { + $articles = $block->find('article[class*="article"]'); foreach ($articles as $article) { - $h4 = $article->find('h4', 0); - $a = $h4->find('a', 0); + $a = $article->find('a[class^="articleLink"]', 0); $link = 'https://scientificamerican.com' . $a->getAttribute('href'); - $title = $a->plaintext; - $items[] = [ + $title = $a->find('h2[class^="articleTitle"]', 0); + array_push($items, [ 'uri' => $link, - 'title' => $title, + 'title' => $title->plaintext, 'uid' => $link, 'content' => '' - ]; - } - } - - $departments = $html->find('[class^=Detail_issue__article__previews__departments]', 0); - if ($departments != null) { - $headers = $departments->find('[class*=Listing_article__listing__title]'); - foreach ($headers as $header) { - $a = $header->find('a', 0); - $link = 'https://scientificamerican.com' . $a->getAttribute('href'); - $title = $a->plaintext; - $items[] = [ - 'uri' => $link, - 'title' => $title, - 'uid' => $link, - 'content' => '' - ]; + ]); } } @@ -125,65 +112,67 @@ private function parseIssue($issue_link) private function updateItem($item) { - return $item; $html = getSimpleHTMLDOMCached($item['uri']); - $article = $html->find('#sa_body', 0)->find('article', 0); + $article = $html->find('#app', 0)->find('article', 0); - $time = $article->find('time[itemprop="datePublished"]', 0); - if ($time == null) { - $time = $html->find('span[itemprop="datePublished"]', 0); - } + $time = $article->find('p[class^="article_pub_date"]', 0); if ($time) { $datetime = DateTime::createFromFormat('F j, Y', $time->plaintext); + $datetime->setTime(0, 0, 0, 0); $item['timestamp'] = $datetime->format('U'); } - $main = $article->find('section.article-grid__main', 0); - if ($main == null) { - $main = $article->find('div.article-text', 0); + $authors = $article->find('a[class^="article_authors__link"]'); + if ($authors) { + $author = implode('; ', array_map(fn($a) => $a->plaintext, $authors)); + $item['author'] = $author; } - if ($main == null) { - return $item; + $res = ''; + $desc = $article->find('div[class^="article_dek"]', 0); + if ($desc) { + $res .= $desc->innertext; } - foreach ($main->find('img') as $img) { - $img->removeAttribute('width'); - $img->removeAttribute('height'); - $img->setAttribute('style', 'height: auto; width: auto; max-height: 768px'); + $lead_figure = $article->find('figure[class^="lead_image"]', 0); + if ($lead_figure) { + $res .= $lead_figure->outertext; } - $rights_link = $main->find('div.article-rightslink', 0); - if ($rights_link != null) { - $rights_link->parent->removeChild($rights_link); - } - $reprints_link = $main->find('div.article-reprintsLink', 0); - if ($reprints_link != null) { - $reprints_link->parent->removeChild($reprints_link); - } - $about_section = $main->find('section.article-author-container', 0); - if ($about_section != null) { - $about_section->parent->removeChild($about_section); - } - $read_next = $main->find('#read-next', 0); - if ($read_next != null) { - $read_next->parent->removeChild($read_next); - } - - foreach ($main->find('iframe') as $iframe) { - $a = $html->createElement('a'); - $a->href = $iframe->src; - $a->innertext = $iframe->src; - $iframe->parent->appendChild($a); - $iframe->parent->removeChild($iframe); + $content = $article->find('div[class^="article__content"]', 0); + if ($content) { + foreach ($content->children() as $block) { + if (str_contains($block->innertext, 'On supporting science journalism')) { + continue; + } + if ( + ($block->tag == 'p' && $block->getAttribute('data-block') == 'sciam/paragraph') + || ($block->tag == 'figure' && str_starts_with($block->class, 'article__image')) + ) { + $iframe = $block->find('iframe', 0); + if ($iframe) { + $res .= "<a href=\"{$iframe->src}\">{$iframe->src}</a>"; + } else { + $res .= $block->outertext; + } + } else if ($block->tag == 'h2') { + $res .= '<h3>' . $block->innertext . '</h3>'; + } else if ($block->tag == 'blockquote') { + $res .= $block->outertext; + } else if ($block->tag == 'hr' && $block->getAttribute('data-block') == 'sciam/raw_html') { + $res .= '<hr />'; + } + } } - $authors = $main->find('span[itemprop="author"]', 0); - if ($authors != null) { - $item['author'] = $authors->plaintext; + $footer = $article->find('footer[class*="footer"]', 0); + if ($footer) { + $bios = $footer->find('div[class^=bio]'); + $bio = implode('', array_map(fn($b) => $b->innertext, $bios)); + $res .= $bio; } - $item['content'] = $main->innertext; + $item['content'] = $res; return $item; } } From 89013faf7dc019db5ef42efb7021c7a517858bfd Mon Sep 17 00:00:00 2001 From: Arya K <arya@projectsegfau.lt> Date: Sat, 13 Apr 2024 19:29:25 +0530 Subject: [PATCH 60/97] Add Project Segfault Instance (#4076) --- docs/01_General/06_Public_Hosts.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md index 1d59d9180b0..1bc8fc205da 100644 --- a/docs/01_General/06_Public_Hosts.md +++ b/docs/01_General/06_Public_Hosts.md @@ -24,6 +24,7 @@ | ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.m3wz.su| ![](https://img.shields.io/badge/website-up-brightgreen) | [@m3oweezed](https://m3wz.su/en/about) | Poland, Hosted with Timeweb Cloud | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany | ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine +| ![](https://iplookup.flagfox.net/images/h16/IN.png) | https://rssbridge.projectsegfau.lt | ![](https://img.shields.io/website/https/rssbridge.projectsegfau.lt) | [@gi-yt](https://aryak.me) | Self-Hosted at Mumbai, India with Airtel (ISP) | ## Inactive instances From b4d397ff7064298f4ffb9afe74fa27b34d26d3c8 Mon Sep 17 00:00:00 2001 From: Miika Launiainen <miika@miicat.eu> Date: Sun, 14 Apr 2024 20:13:31 +0300 Subject: [PATCH 61/97] [YorushikaBridge] Fix getting date (#4077) * Remove unnecessary variable * Fix getting date --- bridges/YorushikaBridge.php | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/bridges/YorushikaBridge.php b/bridges/YorushikaBridge.php index d528999a49a..d75b97d7d84 100644 --- a/bridges/YorushikaBridge.php +++ b/bridges/YorushikaBridge.php @@ -41,7 +41,6 @@ class YorushikaBridge extends BridgeAbstract public function collectData() { - $url = 'https://yorushika.com/news/5/'; switch ($this->getInput('lang')) { case 'jp': $url = 'https://yorushika.com/news/5/'; @@ -99,14 +98,8 @@ public function collectData() // Get article date $date = $art->find('.date', 0)->plaintext; - if (preg_match('/(\d)年(\d)月(\d)/', $date, $matches)) { - // Some dates will contain Chinese characters, remove those from the string - $formattedDate = sprintf('%d.%02d.%02d', $matches[1], $matches[2], $matches[3]); - } else { - // Assume the date is already in 'Y.m.d' format - preg_match('/\d+\.\d+\.\d+/', $date, $matches); - $formattedDate = $matches[0]; - } + preg_match('/(\d+)[\.年](\d+)[\.月](\d+)/u', $date, $matches); + $formattedDate = sprintf('%d.%02d.%02d', $matches[1], $matches[2], $matches[3]); $date = date_create_from_format('Y.m.d', $formattedDate); $date = date_format($date, 'd.m.Y'); From 957a820931dc45e9aadd996751cfb7321f4ba47e Mon Sep 17 00:00:00 2001 From: llamasblade <69692580+llamasblade@users.noreply.github.com> Date: Sun, 14 Apr 2024 17:14:52 +0000 Subject: [PATCH 62/97] [YandexZenBridge] Fix broken bridge for some channels (#4078) Fixes #4071. Major changes: - the bridge's URI changed from zen.yandex.com to dzen.ru, as the former redirects to the latter (perhaps the bridge's name should be changed as well); - the channel's URL is now required instead of the channel's username; - two kinds of URLs are supported, one for channels with usernames and one for channels with IDs in their URL; - the channel's real name, as shown in the webpage, is now used as the feed title. --- bridges/YandexZenBridge.php | 59 ++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/bridges/YandexZenBridge.php b/bridges/YandexZenBridge.php index 8a3db48b2b8..572423284e2 100644 --- a/bridges/YandexZenBridge.php +++ b/bridges/YandexZenBridge.php @@ -3,17 +3,17 @@ class YandexZenBridge extends BridgeAbstract { const NAME = 'YandexZen Bridge'; - const URI = 'https://zen.yandex.com'; - const DESCRIPTION = 'Latest posts from the specified profile.'; + const URI = 'https://dzen.ru'; + const DESCRIPTION = 'Latest posts from the specified channel.'; const MAINTAINER = 'llamasblade'; const PARAMETERS = [ [ - 'username' => [ - 'name' => 'Username', + 'channelURL' => [ + 'name' => 'Channel URL', 'type' => 'text', 'required' => true, - 'title' => 'The account\'s username, found in its URL', - 'exampleValue' => 'dream_faity_diy', + 'title' => 'The channel\'s URL', + 'exampleValue' => 'https://dzen.ru/dream_faity_diy', ], 'limit' => [ 'name' => 'Limit', @@ -27,14 +27,41 @@ class YandexZenBridge extends BridgeAbstract ]; # credit: https://github.com/teromene see #1032 - const _API_URL = 'https://zen.yandex.ru/api/v3/launcher/more?channel_name='; + const _BASE_API_URL_WITH_CHANNEL_NAME = 'https://dzen.ru/api/v3/launcher/more?channel_name='; + const _BASE_API_URL_WITH_CHANNEL_ID = 'https://dzen.ru/api/v3/launcher/more?channel_id='; + + const _ACCOUNT_URL_WITH_CHANNEL_ID_REGEX = '#^https?://dzen\.ru/id/(?<channelID>[a-z0-9]{24})#'; + const _ACCOUNT_URL_WITH_CHANNEL_NAME_REGEX = '#^https?://dzen\.ru/(?<channelName>[\w\.]+)#'; + + private $channelRealName = null; # as shown in the webpage, not in the URL + public function collectData() { - $profile_json = json_decode(getContents($this->getAPIUrl())); + $channelURL = $this->getInput('channelURL'); + + if (preg_match(self::_ACCOUNT_URL_WITH_CHANNEL_ID_REGEX, $channelURL, $matches)) { + $channelID = $matches['channelID']; + $channelAPIURL = self::_BASE_API_URL_WITH_CHANNEL_ID . $channelID; + } elseif (preg_match(self::_ACCOUNT_URL_WITH_CHANNEL_NAME_REGEX, $channelURL, $matches)) { + $channelName = $matches['channelName']; + $channelAPIURL = self::_BASE_API_URL_WITH_CHANNEL_NAME . $channelName; + } else { + returnClientError(<<<EOT +Invalid channel URL provided. +The channel\'s URL must be in one of these two forms: +- https://dzen.ru/dream_faity_diy +- https://dzen.ru/id/5ad7777f1aa80ce576015250 +EOT); + } + + $APIResponse = json_decode(getContents($channelAPIURL)); + + $this->channelRealName = $APIResponse->header->title; + $limit = $this->getInput('limit'); - foreach (array_slice($profile_json->items, 0, $limit) as $post) { + foreach (array_slice($APIResponse->items, 0, $limit) as $post) { $item = []; $item['uri'] = $post->share_link; @@ -56,21 +83,19 @@ public function collectData() } } - private function getAPIUrl() - { - return self::_API_URL . $this->getInput('username'); - } - public function getURI() { - return self::URI . '/' . $this->getInput('username'); + if (is_null($this->getInput('channelURL'))) { + return parent::getURI(); + } + return $this->getInput('channelURL'); } public function getName() { - if (is_null($this->getInput('username'))) { + if (is_null($this->channelRealName)) { return parent::getName(); } - return $this->getInput('username') . '\'s latest zen.yandex posts'; + return $this->channelRealName . '\'s latest zen.yandex posts'; } } From 97f5dafbc5e2ff9dadbe8a76cb7e07818da566e8 Mon Sep 17 00:00:00 2001 From: llamasblade <69692580+llamasblade@users.noreply.github.com> Date: Tue, 16 Apr 2024 15:58:05 +0000 Subject: [PATCH 63/97] [HytaleBridge] Fix bridge not pulling all blog posts (#4079) --- bridges/HytaleBridge.php | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/bridges/HytaleBridge.php b/bridges/HytaleBridge.php index 7ca11af62e1..01fc0f385c7 100644 --- a/bridges/HytaleBridge.php +++ b/bridges/HytaleBridge.php @@ -18,26 +18,27 @@ public function collectData() $blogPosts = json_decode(getContents(self::_API_URL_PUBLISHED)); $length = count($blogPosts); - for ($i = 1; $i < $length; $i += 3) { + for ($i = 0; $i < $length; $i += 3) { $slug = $blogPosts[$i]->slug; $blogPost = json_decode(getContents(self::_API_URL_BLOG_POST . $slug)); - if (property_exists($blogPost, 'previous')) { - $this->addBlogPost($blogPost->previous); + if (property_exists($blogPost, 'next')) { + $this->addBlogPost($blogPost->next); } $this->addBlogPost($blogPost); - if (property_exists($blogPost, 'next')) { - $this->addBlogPost($blogPost->next); + if (property_exists($blogPost, 'previous')) { + $this->addBlogPost($blogPost->previous); } } - if ($length % 3 == 1) { - $slug = $blogPosts[count($blogPosts) - 1]->slug; + if (($length >= 3) && ($length % 3 == 0)) { + $slug = $blogPosts[$length - 1]->slug; $blogPost = json_decode(getContents(self::_API_URL_BLOG_POST . $slug)); + $this->addBlogPost($blogPost); } } From 8c3e973b9f508f3ea928a720a616d4d95d71d22b Mon Sep 17 00:00:00 2001 From: sysadminstory <sysadminstory@users.noreply.github.com> Date: Thu, 18 Apr 2024 01:43:53 +0200 Subject: [PATCH 64/97] [PepperBridgeAbstract] Fix the "no result" detection (#4082) The "no result" test did not work, it is fixed now. --- bridges/PepperBridgeAbstract.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 33d427bc05b..6e41cf20745 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -55,8 +55,8 @@ protected function collectDeals($url) ); // If there is no results, we don't parse the content because it display some random deals - $noresult = $html->find('section[class=subNav]', 0)->find('div[class*=page-center listLayout aGrid]', 0); - if ($noresult === null) { + $noresult = $html->find('div[id=content-list]', 0)->find('h2', 0); + if ($noresult !== null) { $this->items = []; } else { foreach ($list as $deal) { From 1f71d76ac1e93ef403098eec3c7a864fe8748ee6 Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:18:45 +0200 Subject: [PATCH 65/97] [HeiseBridge] Remove additional ad banners For example https://www.heise.de/meinung/Kommentar-Microsofts-Sicherheitspraxis-wird-zur-Gefahr-und-das-BSI-schweigt-9686629.html has two inline banners for a heise offering, not directly related to the article. Removing all "inline" figures, which seems to catch all inline unwanted elements, while avoiding removing useful figures/images. --- bridges/HeiseBridge.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index ab40e6d9338..a78b4609fac 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -160,7 +160,10 @@ private function addArticleToItem($item, $article) $article = defaultLinkTo($article, $item['uri']); // remove unwanted stuff - foreach ($article->find('figure.branding, a-ad, div.ho-text, a-img, .a-toc__list, a-collapse, .opt-in__description, .opt-in__footnote') as $element) { + foreach ( + $article->find('figure.branding, figure.a-inline-image, a-ad, div.ho-text, a-img, + .a-toc__list, a-collapse, .opt-in__description, .opt-in__footnote') as $element + ) { $element->remove(); } // reload html, as remove() is buggy From 154b8b9cdb6ad7f125753c5e0964871c1730e8f8 Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Fri, 19 Apr 2024 19:08:58 +0200 Subject: [PATCH 66/97] Create TarnkappeBridge.php (#4085) * Create TarnkappeBridge.php * Update TarnkappeBridge.php --- bridges/TarnkappeBridge.php | 79 +++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 bridges/TarnkappeBridge.php diff --git a/bridges/TarnkappeBridge.php b/bridges/TarnkappeBridge.php new file mode 100644 index 00000000000..c04c9546558 --- /dev/null +++ b/bridges/TarnkappeBridge.php @@ -0,0 +1,79 @@ +<?php + +class TarnkappeBridge extends FeedExpander +{ + const MAINTAINER = 'Tone866'; + const NAME = 'tarnkappe Bridge'; + const URI = 'https://tarnkappe.info/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns the full articles instead of only the intro'; + const PARAMETERS = [[ + 'category' => [ + 'name' => 'Category', + 'required' => false, + 'title' => <<<'TITLE' + If you only want to subscribe to a specific category + you can enter it here. + If not, leave it blank to subscribe to everything. + TITLE, + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Specify number of full articles to return', + 'defaultValue' => 10 + ] + ]]; + const LIMIT = 10; + + public function collectData() + { + if (empty($this->getInput('category'))) { + $category = 'https://tarnkappe.info/feed'; + } else { + $category = 'https://tarnkappe.info/artikel/' . $this->getInput('category') . '/feed'; + } + + $this->collectExpandableDatas( + $category, + $this->getInput('limit') ?: static::LIMIT + ); + } + + protected function parseItem(array $item) + { + if (strpos($item['uri'], 'https://tarnkappe.info/') !== 0) { + return $item; + } + + $article = getSimpleHTMLDOMCached($item['uri']); + + if ($article) { + $article = defaultLinkTo($article, $item['uri']); + $item = $this->addArticleToItem($item, $article); + } + + return $item; + } + + private function addArticleToItem($item, $article) + { + $item['content'] = $article->find('a.image-header', 0); + + $article = $article->find('main#article article div.card-content div.content.entry-content', 0); + + // remove unwanted stuff + foreach ( + $article->find('em, section, div.menu') as $element + ) { + $element->remove(); + } + // reload html, as remove() is buggy + $article = str_get_html($article->outertext); + + $item['content'] .= $article; + + return $item; + } +} From d31f20758cb9c3a0841c4cf1c2c283b4cfc29077 Mon Sep 17 00:00:00 2001 From: Thomas <mightymt@users.noreply.github.com> Date: Fri, 26 Apr 2024 18:47:06 +0200 Subject: [PATCH 67/97] [YouTubeCommunityTabBridge] Improve building of content & title (#4089) * [YouTubeCommunityTabBridge] Improve building of content & title Fixes truncated link hrefs in content and adds some general improvements regarding the building of item content and item title * [YouTubeCommunityTabBridge] Fix PHP deprecation warnings Fixes the following deprecation warnings: substr(): Passing null to parameter #1 ($string) of type string is deprecated --- bridges/YouTubeCommunityTabBridge.php | 37 ++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/bridges/YouTubeCommunityTabBridge.php b/bridges/YouTubeCommunityTabBridge.php index 20822828b0d..74200b17854 100644 --- a/bridges/YouTubeCommunityTabBridge.php +++ b/bridges/YouTubeCommunityTabBridge.php @@ -204,7 +204,15 @@ private function getText($runs) $text = ''; foreach ($runs as $part) { - $text .= $this->formatUrls($part->text); + if (isset($part->navigationEndpoint->browseEndpoint->canonicalBaseUrl)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->browseEndpoint->canonicalBaseUrl); + } elseif (isset($part->navigationEndpoint->urlEndpoint->url)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->urlEndpoint->url); + } elseif (isset($part->navigationEndpoint->commandMetadata->webCommandMetadata->url)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->commandMetadata->webCommandMetadata->url); + } else { + $text .= $this->formatUrls($part->text, null); + } } return nl2br($text); @@ -275,6 +283,7 @@ private function ellipsisTitle($text) { $length = 100; + $text = strip_tags($text); if (strlen($text) > $length) { $text = explode('<br>', wordwrap($text, $length, '<br>')); return $text[0] . '...'; @@ -283,12 +292,26 @@ private function ellipsisTitle($text) return $text; } - private function formatUrls($content) + private function formatUrls($content, $url) { - return preg_replace( - '/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims', - '<a target="_blank" href="$1" target="_blank">$1</a> ', - $content - ); + if (substr(strval($url), 0, 1) == '/') { + // fix relative URL + $url = 'https://www.youtube.com' . $url; + } elseif (substr(strval($url), 0, 33) == 'https://www.youtube.com/redirect?') { + // extract actual URL from YouTube redirect + parse_str(substr($url, 33), $params); + if (strpos(($params['q'] ?? ''), rtrim($content, '.')) === 0) { + $url = $params['q']; + } + } + + // ensure all URLs are made clickable + $url = $url ?? $content; + + if (filter_var($url, FILTER_VALIDATE_URL)) { + return '<a href="' . $url . '" target="_blank">' . $content . '</a>'; + } + + return $content; } } From f3ca567159386f32ef26c424b14d9b6e2acf126a Mon Sep 17 00:00:00 2001 From: Korytov Pavel <thexcloud@gmail.com> Date: Sat, 27 Apr 2024 11:35:59 +0300 Subject: [PATCH 68/97] [TldrTechBridge] Fix and improve bridge (#4090) --- bridges/TldrTechBridge.php | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/bridges/TldrTechBridge.php b/bridges/TldrTechBridge.php index 984117b2d9d..d29553479ba 100644 --- a/bridges/TldrTechBridge.php +++ b/bridges/TldrTechBridge.php @@ -22,11 +22,15 @@ class TldrTechBridge extends BridgeAbstract 'type' => 'list', 'values' => [ 'Tech' => 'tech', - 'Crypto' => 'crypto', + 'Web Dev' => 'webdev', 'AI' => 'ai', - 'Web Dev' => 'engineering', + 'Information Security' => 'infosec', + 'Product Management' => 'product', + 'DevOps' => 'devops', + 'Crypto' => 'crypto', + 'Design' => 'design', + 'Marketing' => 'marketing', 'Founders' => 'founders', - 'Cybersecurity' => 'cybersecurity' ], 'defaultValue' => 'tech' ] @@ -48,12 +52,17 @@ public function collectData() // Convert /<topic>/2023-01-01 to unix timestamp $date_items = explode('/', $child->href); $date = strtotime(end($date_items)); - $this->items[] = [ - 'uri' => self::URI . $child->href, - 'title' => $child->plaintext, - 'timestamp' => $date, - 'content' => $this->extractContent(self::URI . $child->href), - ]; + $item_url = self::URI . ltrim($child->href, '/'); + try { + $this->items[] = [ + 'uri' => self::URI . $child->href, + 'title' => $child->plaintext, + 'timestamp' => $date, + 'content' => $this->extractContent($item_url), + ]; + } catch (HttpException $e) { + continue; + } $added++; if ($added >= $limit) { break; @@ -66,7 +75,7 @@ private function extractContent($url) $html = getSimpleHTMLDOM($url); $content = $html->find('div.content-center.mt-5', 0); if (!$content) { - return ''; + throw new HttpException('Could not find content', 500); } $subscribe_form = $content->find('div.mt-5 > div > form', 0); if ($subscribe_form) { From d15960f955be91c8fa236d96186fcb0524572859 Mon Sep 17 00:00:00 2001 From: Thomas <mightymt@users.noreply.github.com> Date: Thu, 2 May 2024 19:45:04 +0200 Subject: [PATCH 69/97] [YouTubeCommunityTabBridge] Multi-image attachment support (#4091) Adds support for multi-image attachments. Also changes individual if-statments in "getAttachments" to if/elseif as each post can apparently only have one attachment anyway. --- bridges/YouTubeCommunityTabBridge.php | 31 +++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/bridges/YouTubeCommunityTabBridge.php b/bridges/YouTubeCommunityTabBridge.php index 74200b17854..0c145c02675 100644 --- a/bridges/YouTubeCommunityTabBridge.php +++ b/bridges/YouTubeCommunityTabBridge.php @@ -228,8 +228,8 @@ private function getAttachments($details) if (isset($details->backstageAttachment)) { $attachments = $details->backstageAttachment; - // Video if (isset($attachments->videoRenderer) && isset($attachments->videoRenderer->videoId)) { + // Video if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted a video'; } @@ -238,10 +238,8 @@ private function getAttachments($details) <iframe width="100%" height="410" src="https://www.youtube.com/embed/{$attachments->videoRenderer->videoId}" frameborder="0" allow="encrypted-media;" allowfullscreen></iframe> EOD; - } - - // Image - if (isset($attachments->backstageImageRenderer)) { + } elseif (isset($attachments->backstageImageRenderer)) { + // Image if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted an image'; } @@ -251,10 +249,8 @@ private function getAttachments($details) $content = <<<EOD <p><img src="{$lastThumb->url}"></p> EOD; - } - - // Poll - if (isset($attachments->pollRenderer)) { + } elseif (isset($attachments->pollRenderer)) { + // Poll if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted a poll'; } @@ -270,6 +266,23 @@ private function getAttachments($details) $content = <<<EOD <hr><p>Poll ({$attachments->pollRenderer->totalVotes->simpleText})<br><ul>{$pollChoices}</ul><p> EOD; + } elseif (isset($attachments->postMultiImageRenderer->images)) { + // Multiple images + $images = $attachments->postMultiImageRenderer->images; + + if (is_array($images)) { + if (empty($this->itemTitle)) { + $this->itemTitle = $this->feedName . ' posted ' . count($images) . ' images'; + } + + foreach ($images as $image) { + $lastThumb = end($image->backstageImageRenderer->image->thumbnails); + + $content .= <<<EOD +<p><img src="{$lastThumb->url}"></p> +EOD; + } + } } } From f48020982530ff08636a5233021014c16fc67c36 Mon Sep 17 00:00:00 2001 From: Eugene Molotov <eugene.molotov@yandex.ru> Date: Mon, 6 May 2024 02:30:23 +0500 Subject: [PATCH 70/97] [YoutubeBridge] Fix empty result in search feed (#4098) --- bridges/YoutubeBridge.php | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 6a29e387158..af14c856f2c 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -193,14 +193,7 @@ private function collectDataInternal() $html = $this->fetch($url_listing); $jsonData = $this->extractJsonFromHtml($html); $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; - $jsonData = $jsonData->sectionListRenderer->contents; - foreach ($jsonData as $data) { - // Search result includes some ads, have to filter them - if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) { - $jsonData = $data->itemSectionRenderer->contents; - break; - } - } + $jsonData = $jsonData->sectionListRenderer->contents[0]->itemSectionRenderer->contents; $this->fetchItemsFromFromJsonData($jsonData); $this->feeduri = $url_listing; $this->feedName = 'Search: ' . $search; From d11b7f77540760bf115b724dd06033ca6886485e Mon Sep 17 00:00:00 2001 From: Patrick <jummo4@yahoo.de> Date: Sun, 5 May 2024 23:30:38 +0200 Subject: [PATCH 71/97] Change URI for St. Johannes Blick (#4099) Co-authored-by: Patrick <jummo@mailbox.org> --- bridges/JohannesBlickBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/JohannesBlickBridge.php b/bridges/JohannesBlickBridge.php index 72583a53e22..80ca9a711a7 100644 --- a/bridges/JohannesBlickBridge.php +++ b/bridges/JohannesBlickBridge.php @@ -3,7 +3,7 @@ class JohannesBlickBridge extends BridgeAbstract { const NAME = 'Johannes Blick'; - const URI = 'https://www.st-johannes-baptist.de/index.php/unsere-medien/johannesblick-archiv'; + const URI = 'https://www.st-johannes-baptist.de/index.php/medien-und-downloads/archiv-johannesblick'; const DESCRIPTION = 'RSS feed for Johannes Blick'; const MAINTAINER = 'jummo4@yahoo.de'; From 1c3024fca7b8330b75e93fde673af09b1d797fa5 Mon Sep 17 00:00:00 2001 From: Facundo Tuesca <facu@tuesca.com> Date: Wed, 8 May 2024 00:25:45 +0200 Subject: [PATCH 72/97] [MangaReaderBridge] Change feed title to manga name (#4092) --- bridges/MangaReaderBridge.php | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bridges/MangaReaderBridge.php b/bridges/MangaReaderBridge.php index 1fa0c62dc54..1b8e765b134 100644 --- a/bridges/MangaReaderBridge.php +++ b/bridges/MangaReaderBridge.php @@ -26,11 +26,26 @@ class MangaReaderBridge extends BridgeAbstract ] ]; + protected $feedName = ''; + + + public function getName() + { + if (empty($this->feedName)) { + return parent::getName(); + } else { + return $this->feedName; + } + } + public function collectData() { $url = $this->getInput('url'); $lang = $this->getInput('lang'); $dom = getSimpleHTMLDOM($url); + $aniDetail = $dom->getElementById('ani_detail'); + $this->feedName = html_entity_decode($aniDetail->find('h2', 0)->plaintext); + $chapters = $dom->getElementById($lang . '-chapters'); foreach ($chapters->getElementsByTagName('li') as $chapter) { From 776ee233bdc22833815ece7d87140a4c64aa540c Mon Sep 17 00:00:00 2001 From: Alex Balgavy <8124851+thezeroalpha@users.noreply.github.com> Date: Sun, 12 May 2024 20:30:23 +0200 Subject: [PATCH 73/97] [NOSBridge] fix bridge (#4102) CSS selectors were no longer valid. --- bridges/NOSBridge.php | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bridges/NOSBridge.php b/bridges/NOSBridge.php index 33cad40b190..60a560aac56 100644 --- a/bridges/NOSBridge.php +++ b/bridges/NOSBridge.php @@ -14,7 +14,7 @@ class NOSBridge extends BridgeAbstract 'name' => 'Onderwerp', 'title' => 'Kies onderwerp', 'values' => [ - 'Laatste nieuws' => 'nieuws', + 'Laatste nieuws' => 'nieuws/laatste', 'Binnenland' => 'nieuws/binnenland', 'Buitenland' => 'nieuws/buitenland', 'Regionaal nieuws' => 'nieuws/regio', @@ -38,17 +38,16 @@ public function collectData() { $url = sprintf('https://www.nos.nl/%s', $this->getInput('topic')); $dom = getSimpleHTMLDOM($url); - $dom = $dom->find('ul.list-items', 0); + $dom = $dom->find('main#content > div > section > ul', 0); if (!$dom) { throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); } $dom = defaultLinkTo($dom, $this->getURI()); - foreach ($dom->find('li.list-items__item') as $article) { - $a = $article->find('a', 0); + foreach ($dom->find('li') as $article) { $this->items[] = [ - 'title' => $article->find('h3.list-items__title', 0)->plaintext, - 'uri' => $article->find('a.list-items__link', 0)->href, - 'content' => $article->find('p.list-items__description', 0)->plaintext, + 'title' => $article->find('h2', 0)->plaintext, + 'uri' => $article->find('a', 0)->href, + 'content' => $article->find('p', 0)->plaintext, 'timestamp' => strtotime($article->find('time', 0)->datetime), ]; } From 494990086335bfa498f0ba7497765c09f3ff7570 Mon Sep 17 00:00:00 2001 From: July <phantop@tuta.io> Date: Sun, 12 May 2024 15:45:14 -0400 Subject: [PATCH 74/97] [ScribbleHubBridge] Handle 429 errors and use consistent GUID (#4104) --- bridges/ScribbleHubBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/ScribbleHubBridge.php b/bridges/ScribbleHubBridge.php index 60add80242f..b4f7beaa13c 100644 --- a/bridges/ScribbleHubBridge.php +++ b/bridges/ScribbleHubBridge.php @@ -107,12 +107,13 @@ protected function parseItem(array $item) } $item['comments'] = $item['uri'] . '#comments'; + $item['uid'] = $item['uri']; try { $dom = getSimpleHTMLDOMCached($item['uri']); } catch (HttpException $e) { // 403 Forbidden, This means we got anti-bot response - if ($e->getCode() === 403) { + if ($e->getCode() === 403 || $e->getCode() === 429) { return $item; } throw $e; @@ -134,7 +135,6 @@ protected function parseItem(array $item) //Generate UID $item_pid = $dom->find('#mypostid', 0)->value; - $item['uid'] = $item_sid . "/$item_pid"; return $item; } From 6e2aeda61d4aefb1106be2cef63cc775ee4f300d Mon Sep 17 00:00:00 2001 From: July <phantop@tuta.io> Date: Sun, 12 May 2024 15:46:07 -0400 Subject: [PATCH 75/97] [GameBananaBridge] Include update contents in feed (#4103) * [GameBananaBridge] Include update contents in feed * [GameBananaBridge] Fix dynamic title property --- bridges/GameBananaBridge.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bridges/GameBananaBridge.php b/bridges/GameBananaBridge.php index 591ac0e9647..9a0a068606d 100644 --- a/bridges/GameBananaBridge.php +++ b/bridges/GameBananaBridge.php @@ -28,6 +28,8 @@ public function getIcon() return 'https://images.gamebanana.com/static/img/favicon/favicon.ico'; } + private $title; + public function collectData() { $url = 'https://api.gamebanana.com/Core/List/New?itemtype=Mod&page=1&gameid=' . $this->getInput('gid'); @@ -38,7 +40,7 @@ public function collectData() $json_list = json_decode($api_response, true); // Get first page mod list $url = 'https://api.gamebanana.com/Core/Item/Data?itemtype[]=Game&fields[]=name&itemid[]=' . $this->getInput('gid'); - $fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate'; + $fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate,Updates().aLatestUpdates()'; foreach ($json_list as $element) { // Build api request to minimize API calls $mid = $element[1]; $url .= '&itemtype[]=Mod&fields[]=' . $fields . '&itemid[]=' . $mid; @@ -72,6 +74,10 @@ public function collectData() foreach ($img_list as $img_element) { $item['content'] .= '<img src="https://images.gamebanana.com/img/ss/mods/' . $img_element['_sFile'] . '"/>'; } + if ($this->getInput('updates') && sizeof($element[8]) > 0) { + $item['content'] .= '<br><strong>Update: ' . $element[8][0]['_sTitle']; + $item['content'] .= '</strong><br>' . $element[8][0]['_sText'] . '<hr>'; + } $item['content'] .= '<br>' . $element[2]; $item['uid'] = $item['uri'] . $item['title'] . $item['timestamp']; From b785a4b64ee7fb295b845a59bd9d573a3e775e3e Mon Sep 17 00:00:00 2001 From: July <phantop@tuta.io> Date: Fri, 17 May 2024 15:29:17 -0400 Subject: [PATCH 76/97] ArsTechnicaBridge: restore categories lost by FeedExpander (#4030) --- bridges/ArsTechnicaBridge.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index 2c631871caf..fcb1bd4fb18 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -39,6 +39,10 @@ protected function parseItem(array $item) $item_html = defaultLinkTo($item_html, self::URI); $item['content'] = $item_html->find('.article-content', 0); + $parsely = $item_html->find('[name="parsely-page"]', 0); + $parsely_json = json_decode(html_entity_decode($parsely->content), true); + $item['categories'] = $parsely_json['tags']; + $pages = $item_html->find('nav.page-numbers > .numbers > a', -2); if (null !== $pages) { for ($i = 2; $i <= $pages->innertext; $i++) { From a7ed3d56f9b8ea3194f78925e3d339f933aa726e Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Sat, 18 May 2024 15:46:53 +0200 Subject: [PATCH 77/97] [ZeitBridge] Prettify author field By removing HTML tags (plaintext) and trimming it. --- bridges/ZeitBridge.php | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bridges/ZeitBridge.php b/bridges/ZeitBridge.php index b9806e5ab45..07b8e70c449 100644 --- a/bridges/ZeitBridge.php +++ b/bridges/ZeitBridge.php @@ -108,12 +108,9 @@ private function parseArticle($item, $article) } // authors - $authors = $article->find('*[itemtype*="schema.org/Person"]'); - if (!$authors) { - $authors = $article->find('.metadata__source'); - } + $authors = $article->find('*[itemtype*="schema.org/Person"]') ?? $article->find('.metadata__source'); if ($authors) { - $item['author'] = implode(', ', $authors); + $item['author'] = implode(', ', array_map(function ($e) { return trim($e->plaintext); }, $authors)); } // header image From 4d12aa2a9ee03b32b270a4a0930d7e9cef0515ca Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Sat, 18 May 2024 16:11:26 +0200 Subject: [PATCH 78/97] [ZeitBridge] Remove annoyances, add content Remove navigational elements, podcast images. Add many more header images, article content in <ul> (and for ggod measure in <ol>) and quotes with their content and not only their author. Extreme example: https://www.zeit.de/campus/2024-05/protest-palaestina-universitaet-europa-uebersicht --- bridges/ZeitBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/ZeitBridge.php b/bridges/ZeitBridge.php index 07b8e70c449..d79f7e7cfc6 100644 --- a/bridges/ZeitBridge.php +++ b/bridges/ZeitBridge.php @@ -87,7 +87,7 @@ private function parseArticle($item, $article) // remove known bad elements foreach ( $article->find( - 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, div[data-paywall], .js-embed-consent' + 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, .podcast-player__image, div[data-paywall], .js-embed-consent, script, nav, .article-flexible-toc__subheading-link, .faq-link' ) as $bad ) { $bad->remove(); @@ -114,7 +114,7 @@ private function parseArticle($item, $article) } // header image - $headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('header', 0); + $headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('.article-header', 0) ?? $article->find('header', 0); if ($headerimg) { $item['content'] .= implode('', $headerimg->find('img[src], figcaption')); } @@ -124,7 +124,7 @@ private function parseArticle($item, $article) if ($pages) { foreach ($pages as $page) { - $elements = $page->find('p, h2, figcaption, img[src]'); + $elements = $page->find('p, ul, ol, h2, figure.article__media img[src], figure.article__media figcaption, figure.quote'); $item['content'] .= implode('', $elements); } } From 7bde7a56f95a50f0394a1efb45cbbd80af643f9f Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Sat, 18 May 2024 16:18:23 +0200 Subject: [PATCH 79/97] [ZeitBridge] Fix linting --- bridges/ZeitBridge.php | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bridges/ZeitBridge.php b/bridges/ZeitBridge.php index d79f7e7cfc6..ae8a1a666e0 100644 --- a/bridges/ZeitBridge.php +++ b/bridges/ZeitBridge.php @@ -87,7 +87,9 @@ private function parseArticle($item, $article) // remove known bad elements foreach ( $article->find( - 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast, .podcast-player__image, div[data-paywall], .js-embed-consent, script, nav, .article-flexible-toc__subheading-link, .faq-link' + 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, + .article-heading__container--podcast, .podcast-player__image, div[data-paywall], + .js-embed-consent, script, nav, .article-flexible-toc__subheading-link, .faq-link' ) as $bad ) { $bad->remove(); @@ -110,7 +112,9 @@ private function parseArticle($item, $article) // authors $authors = $article->find('*[itemtype*="schema.org/Person"]') ?? $article->find('.metadata__source'); if ($authors) { - $item['author'] = implode(', ', array_map(function ($e) { return trim($e->plaintext); }, $authors)); + $item['author'] = implode(', ', array_map(function ($e) { + return trim($e->plaintext); + }, $authors)); } // header image From 75f35391fa3399f230d425a33a6836ccf07340ff Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Sat, 18 May 2024 16:51:00 +0200 Subject: [PATCH 80/97] [HeiseBridge] Add missing <ol> elements (#4110) The following article has <ol> elements that were missing. Adding them to have the full content. https://heise.de/-9714438 --- bridges/HeiseBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index a78b4609fac..9aa7209d4c2 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -214,7 +214,7 @@ private function addArticleToItem($item, $article) $content = $article->find('.article-content', 0); if ($content) { $contentElements = $content->find( - 'p, h3, ul, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption' + 'p, h3, ul, ol, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption' ); $item['content'] .= implode('', $contentElements); } From dc199ebf5c134960c59bceb233afff0379062a77 Mon Sep 17 00:00:00 2001 From: Albert Kiskorov <me@albi.io> Date: Sun, 19 May 2024 19:37:59 +0700 Subject: [PATCH 81/97] Fix: Ensure `$time` is set from `innertext` when `datetime` attribute is not found (#4111) This commit addresses a bug where the $time variable is not set from the innertext of the $time_element when the datetime attribute is not found. The previous implementation only checked if $time was null or an empty string, which did not cover all cases where the datetime attribute might be missing. By using the empty() function, we ensure that $time is correctly set from the innertext when the datetime attribute is not present. --- bridges/CssSelectorComplexBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index 632e6b6aa3c..a2e001b27ef 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -442,7 +442,7 @@ protected function parseEntryElement( if (!is_null($time_selector) && $time_selector != '') { $time_element = $entry_html->find($time_selector, 0); $time = $time_element->getAttribute('datetime'); - if (is_null($time)) { + if (empty($time)) { $time = $time_element->innertext; } From 5a68ee0c87bc2392886efd411511e094ae4fe5ac Mon Sep 17 00:00:00 2001 From: tillcash <tillcash@users.noreply.github.com> Date: Sun, 26 May 2024 20:51:14 +0530 Subject: [PATCH 82/97] [HinduTamilBridge] New (#4115) --- bridges/HinduTamilBridge.php | 91 ++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 bridges/HinduTamilBridge.php diff --git a/bridges/HinduTamilBridge.php b/bridges/HinduTamilBridge.php new file mode 100644 index 00000000000..cdbdfe358b0 --- /dev/null +++ b/bridges/HinduTamilBridge.php @@ -0,0 +1,91 @@ +<?php + +class HinduTamilBridge extends FeedExpander +{ + const NAME = 'HinduTamil'; + const URI = 'https://www.hindutamil.in'; + const DESCRIPTION = 'Retrieve full articles from hindutamil.in feeds'; + const MAINTAINER = 'tillcash'; + const PARAMETERS = [ + [ + 'topic' => [ + 'name' => 'topic', + 'type' => 'list', + 'defaultValue' => 'crime', + 'values' => [ + 'Astrology' => 'astrology', + 'Blogs' => 'blogs', + 'Business' => 'business', + 'Cartoon' => 'cartoon', + 'Cinema' => 'cinema', + 'Crime' => 'crime', + 'Discussion' => 'discussion', + 'Education' => 'education', + 'Environment' => 'environment', + 'India' => 'india', + 'Lifestyle' => 'life-style', + 'Literature' => 'literature', + 'Opinion' => 'opinion', + 'Reporters' => 'reporters-page', + 'Socialmedia' => 'social-media', + 'Spirituals' => 'spirituals', + 'Sports' => 'sports', + 'Supplements' => 'supplements', + 'Tamilnadu' => 'tamilnadu', + 'Technology' => 'technology', + 'Tourism' => 'tourism', + 'World' => 'world', + ], + ], + 'limit' => [ + 'name' => 'limit (max 100)', + 'type' => 'number', + 'defaultValue' => 10, + ], + ], + ]; + + const FEED_BASE_URL = 'https://feeds.feedburner.com/Hindu_Tamil_'; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ' - ' . $topic : ''); + } + + public function collectData() + { + $limit = min(100, $this->getInput('limit')); + $url = self::FEED_BASE_URL . $this->getInput('topic'); + $this->collectExpandableDatas($url, $limit); + } + + protected function parseItem($item) + { + $dom = getSimpleHTMLDOMCached($item['uri']); + + $date = $dom->find('p span.date', 1); + if ($date) { + $item['timestamp'] = $date->innertext; + } + + $content = $dom->find('#pgContentPrint', 0); + if (!$content) { + return $item; + } + + $image = $dom->find('#LoadArticle figure', 0); + $item['content'] = $image . $this->cleanContent($content); + + return $item; + } + + private function cleanContent($content) + { + foreach ($content->find('div[align="center"], script') as $remove) { + $remove->outertext = ''; + } + + return $content; + } +} From bd90109c70fd3d4b302ed38e7b768e6df7d323d9 Mon Sep 17 00:00:00 2001 From: tillcash <tillcash@users.noreply.github.com> Date: Thu, 30 May 2024 00:46:10 +0530 Subject: [PATCH 83/97] [HarvardHealthBlogBridge] New (#4116) --- bridges/HarvardHealthBlogBridge.php | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 bridges/HarvardHealthBlogBridge.php diff --git a/bridges/HarvardHealthBlogBridge.php b/bridges/HarvardHealthBlogBridge.php new file mode 100644 index 00000000000..75e7e2cfbb9 --- /dev/null +++ b/bridges/HarvardHealthBlogBridge.php @@ -0,0 +1,56 @@ +<?php + +class HarvardHealthBlogBridge extends BridgeAbstract +{ + const NAME = 'Harvard Health Blog'; + const URI = 'https://www.health.harvard.edu/blog'; + const DESCRIPTION = 'Retrieve articles from health.harvard.edu'; + const MAINTAINER = 'tillcash'; + const MAX_ARTICLES = 10; + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI); + $count = 0; + + foreach ($dom->find('div[class="mb-16 md:flex"]') as $element) { + if ($count >= self::MAX_ARTICLES) { + break; + } + + $data = $element->find('a[class="hover:text-red transition-colors duration-200"]', 0); + if (!$data) { + continue; + } + + $url = $data->href; + + $this->items[] = [ + 'content' => $this->constructContent($url), + 'timestamp' => $element->find('time', 0)->datetime, + 'title' => $data->plaintext, + 'uid' => $url, + 'uri' => $url, + ]; + + $count++; + } + } + + private function constructContent($url) + { + $dom = getSimpleHTMLDOMCached($url); + + $article = $dom->find('div[class*="content-repository-content"]', 0); + if (!$article) { + return 'Content Not Found'; + } + + // Remove ads + foreach ($article->find('.inline-ad') as $remove) { + $remove->outertext = ''; + } + + return $article->innertext; + } +} From cfd406861eeb6f90b61df56981cc0337ab504a06 Mon Sep 17 00:00:00 2001 From: tillcash <tillcash@users.noreply.github.com> Date: Thu, 30 May 2024 19:38:08 +0530 Subject: [PATCH 84/97] [HarvardHealthBlogBridge] Update (#4117) Make article image optional as all images are representative --- bridges/HarvardHealthBlogBridge.php | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/bridges/HarvardHealthBlogBridge.php b/bridges/HarvardHealthBlogBridge.php index 75e7e2cfbb9..bb6a5ede419 100644 --- a/bridges/HarvardHealthBlogBridge.php +++ b/bridges/HarvardHealthBlogBridge.php @@ -7,6 +7,15 @@ class HarvardHealthBlogBridge extends BridgeAbstract const DESCRIPTION = 'Retrieve articles from health.harvard.edu'; const MAINTAINER = 'tillcash'; const MAX_ARTICLES = 10; + const PARAMETERS = [ + [ + 'image' => [ + 'name' => 'Article Image', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ], + ], + ]; public function collectData() { @@ -46,9 +55,15 @@ private function constructContent($url) return 'Content Not Found'; } - // Remove ads - foreach ($article->find('.inline-ad') as $remove) { - $remove->outertext = ''; + // remove article image + if (!$this->getInput('image')) { + $image = $article->find('p', 0); + $image->remove(); + } + + // remove ads + foreach ($article->find('.inline-ad') as $ad) { + $ad->outertext = ''; } return $article->innertext; From 36706a3dec464f1ab59b21356bc838ab77c657b4 Mon Sep 17 00:00:00 2001 From: Tim-Florian Feulner <50834839+R3dError@users.noreply.github.com> Date: Mon, 3 Jun 2024 00:55:39 +0200 Subject: [PATCH 85/97] Fix NACSouthGermanyMediaLibraryBridge due to website changes (#4121) --- bridges/NACSouthGermanyMediaLibraryBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/NACSouthGermanyMediaLibraryBridge.php b/bridges/NACSouthGermanyMediaLibraryBridge.php index 030ded8f867..70129b37cef 100644 --- a/bridges/NACSouthGermanyMediaLibraryBridge.php +++ b/bridges/NACSouthGermanyMediaLibraryBridge.php @@ -85,12 +85,12 @@ private function collectDataInList($pageURI, $customizeItemCall) foreach ($page->find('div.flex-columns.entry') as $parent) { # Find title - $title = $parent->find('h2', 0)->plaintext; + $title = trim($parent->find('h2')[0]->innertext); # Find content - $contentBlock = $parent->find('ul', 0); + $contentBlock = $parent->find('div')[2]; $content = ''; - foreach ($contentBlock->find('li') as $li) { + foreach ($contentBlock->find('li,p') as $li) { $content .= '<p>' . $li->plaintext . '</p>'; } From 87fa6ea71e8ae8c6e0cddd47e28fd65664a6cb05 Mon Sep 17 00:00:00 2001 From: Tone <66808319+Tone866@users.noreply.github.com> Date: Mon, 10 Jun 2024 19:40:07 +0200 Subject: [PATCH 86/97] [HeiseBridge.php] Prevent Youtube videos from being filtered out (#4125) --- bridges/HeiseBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 9aa7209d4c2..e26a46076f5 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -214,7 +214,7 @@ private function addArticleToItem($item, $article) $content = $article->find('.article-content', 0); if ($content) { $contentElements = $content->find( - 'p, h3, ul, ol, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption' + 'p, h3, ul, ol, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption, noscript iframe' ); $item['content'] .= implode('', $contentElements); } From d3d33c72bda0cb6f222b415da075b32f2a60a61d Mon Sep 17 00:00:00 2001 From: tillcash <tillcash@users.noreply.github.com> Date: Tue, 11 Jun 2024 19:10:49 +0530 Subject: [PATCH 87/97] [HinduTamilBridge] fix timestamp (#4127) --- bridges/HinduTamilBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/HinduTamilBridge.php b/bridges/HinduTamilBridge.php index cdbdfe358b0..cab04171acf 100644 --- a/bridges/HinduTamilBridge.php +++ b/bridges/HinduTamilBridge.php @@ -66,7 +66,7 @@ protected function parseItem($item) $date = $dom->find('p span.date', 1); if ($date) { - $item['timestamp'] = $date->innertext; + $item['timestamp'] = $date->innertext . ' IST'; } $content = $dom->find('#pgContentPrint', 0); From e1b74aeb1bd120ac0c337fae9f55943cf7d00a0d Mon Sep 17 00:00:00 2001 From: July <phantop@tuta.io> Date: Wed, 12 Jun 2024 23:02:17 -0400 Subject: [PATCH 88/97] [GameBananaBridge] Add categories and more detailed updates (#4129) * [GameBananaBridge] Add mod categorie(s) * [GameBananaBridge] Include full update changelog details --- bridges/GameBananaBridge.php | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/bridges/GameBananaBridge.php b/bridges/GameBananaBridge.php index 9a0a068606d..88b19ef0cc7 100644 --- a/bridges/GameBananaBridge.php +++ b/bridges/GameBananaBridge.php @@ -40,7 +40,7 @@ public function collectData() $json_list = json_decode($api_response, true); // Get first page mod list $url = 'https://api.gamebanana.com/Core/Item/Data?itemtype[]=Game&fields[]=name&itemid[]=' . $this->getInput('gid'); - $fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate,Updates().aLatestUpdates()'; + $fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate,Updates().aLatestUpdates(),Category().name,RootCategory().name'; foreach ($json_list as $element) { // Build api request to minimize API calls $mid = $element[1]; $url .= '&itemtype[]=Mod&fields[]=' . $fields . '&itemid[]=' . $mid; @@ -52,11 +52,18 @@ public function collectData() array_shift($json_list); // Take title from API request and remove from json foreach ($json_list as $element) { + // Trashed mod IDs are still picked up and return null; skip + if ($element[0] == null) { + continue; + } + $item = []; $item['uri'] = $element[6]; $item['comments'] = $item['uri'] . '#PostsListModule'; $item['title'] = $element[0]; $item['author'] = $element[1]; + $item['categories'][] = $element[9]; + $item['categories'][] = $element[10]; $item['timestamp'] = $element[5]; if ($this->getInput('updates')) { @@ -74,9 +81,21 @@ public function collectData() foreach ($img_list as $img_element) { $item['content'] .= '<img src="https://images.gamebanana.com/img/ss/mods/' . $img_element['_sFile'] . '"/>'; } + + // Get updates from element[8], if applicable if ($this->getInput('updates') && sizeof($element[8]) > 0) { - $item['content'] .= '<br><strong>Update: ' . $element[8][0]['_sTitle']; - $item['content'] .= '</strong><br>' . $element[8][0]['_sText'] . '<hr>'; + $update = $element[8][0]; + $item['content'] .= '<br><strong>Update:</strong> ' . $update['_sTitle']; + if ($update['_sText'] != '') { + $item['content'] .= '<br>' . $update['_sText']; + } + foreach ($update['_aChangeLog'] as $change) { + if ($change['cat'] == '') { + $change['cat'] = 'Change'; + } + $item['content'] .= '<br><em>' . $change['cat'] . '</em>: ' . $change['text']; + } + $item['content'] .= '<br><hr>'; } $item['content'] .= '<br>' . $element[2]; From bb1e308057b7e8740d67d0fb0fa9c6298e9b9730 Mon Sep 17 00:00:00 2001 From: sysadminstory <sysadminstory@users.noreply.github.com> Date: Thu, 13 Jun 2024 05:03:20 +0200 Subject: [PATCH 89/97] [IdealoBridge] Fix price comparison and some PHP Notice (#4130) * [IdealoBridge] Fix price comparison and some PHP Notice - The prices were compared as String and the comparison was wrong in some case : now the price are converted to float before the comparison, so the logic works really. - Don't show a new or used product price if it does not exist : this prevents a PHP Notice to be thrown * [IdealoBridge] Fix price conversion in case the price is null The conversion as float of the text price won't work if the price is null : we retunr null in this case now. --- bridges/IdealoBridge.php | 43 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php index 4eb66dcb5d1..f426a45c326 100644 --- a/bridges/IdealoBridge.php +++ b/bridges/IdealoBridge.php @@ -81,6 +81,25 @@ private function getFeedTitle() return $title . ' - ' . $this::NAME; } + /** + * Returns the Price as float + * @return float rhe price converted in float + */ + private function convertPriceToFloat($price) + { + // Every price is stored / displayed as "xxx,xx €", but PHP can't convert it as float + + if ($price !== null) { + // Convert comma as dot + $price = str_replace(',', '.', $price); + // Remove the '€' char + $price = str_replace('€', '', $price); + // Convert to float + return floatval($price); + } else { + return $price; + } + } /** * Returns the Price Trend emoji @@ -88,8 +107,10 @@ private function getFeedTitle() */ private function getPriceTrend($NewPrice, $OldPrice) { - // In case there is no old PRice, then show no trend - if ($OldPrice === null) { + $NewPrice = $this->convertPriceToFloat($NewPrice); + $OldPrice = $this->convertPriceToFloat($OldPrice); + // In case there is no old Price, then show no trend + if ($OldPrice === null || $OldPrice == 0) { $trend = ''; } else if ($NewPrice > $OldPrice) { $trend = '↗'; @@ -125,7 +146,7 @@ public function collectData() $OldPriceNew = $this->loadCacheValue($KeyNEW); $OldPriceUsed = $this->loadCacheValue($KeyUSED); - // First button is new. Found at oopStage-conditionButton-wrapper-text class (.) + // First button contains the new price. Found at oopStage-conditionButton-wrapper-text class (.) $FirstButton = $html->find('.oopStage-conditionButton-wrapper-text', 0); if ($FirstButton) { $PriceNew = $FirstButton->find('strong', 0)->plaintext; @@ -133,7 +154,7 @@ public function collectData() $this->saveCacheValue($KeyNEW, $PriceNew); } - // Second Button is used + // Second Button contains the used product price $SecondButton = $html->find('.oopStage-conditionButton-wrapper-text', 1); if ($SecondButton) { $PriceUsed = $SecondButton->find('strong', 0)->plaintext; @@ -149,7 +170,7 @@ public function collectData() $content = ''; // Generate Content - if (isset($PriceNew) && $PriceNew > 1) { + if (isset($PriceNew) && $this->convertPriceToFloat($PriceNew) > 0) { $content .= sprintf('<p><b>Price New:</b><br>%s %s</p>', $PriceNew, $this->getPriceTrend($PriceNew, $OldPriceNew)); $content .= "<p><b>Price New before:</b><br>$OldPriceNew</p>"; } @@ -158,7 +179,7 @@ public function collectData() $content .= sprintf('<p><b>Max Price New:</b><br>%s,00 €</p>', $this->getInput('MaxPriceNew')); } - if (isset($PriceUsed) && $PriceUsed > 1) { + if (isset($PriceUsed) && $this->convertPriceToFloat($PriceUsed) > 0) { $content .= sprintf('<p><b>Price Used:</b><br>%s %s</p>', $PriceUsed, $this->getPriceTrend($PriceUsed, $OldPriceUsed)); $content .= "<p><b>Price Used before:</b><br>$OldPriceUsed</p>"; } @@ -176,7 +197,7 @@ public function collectData() // Currently under Max new price if ($this->getInput('MaxPriceNew') != '') { - if (isset($PriceNew) && $PriceNew < $this->getInput('MaxPriceNew')) { + if (isset($PriceNew) && $this->convertPriceToFloat($PriceNew) < $this->getInput('MaxPriceNew')) { $title = sprintf($Pricealarm, 'New', $PriceNew, $Productname, $now); $item = [ 'title' => $title, @@ -190,7 +211,7 @@ public function collectData() // Currently under Max used price if ($this->getInput('MaxPriceUsed') != '') { - if (isset($PriceUsed) && $PriceUsed < $this->getInput('MaxPriceUsed')) { + if (isset($PriceUsed) && $this->convertPriceToFloat($PriceUsed) < $this->getInput('MaxPriceUsed')) { $title = sprintf($Pricealarm, 'Used', $PriceUsed, $Productname, $now); $item = [ 'title' => $title, @@ -202,7 +223,7 @@ public function collectData() } } - // General Priceupdate + // General Priceupdate Without any Max Price for new and Used product if ($this->getInput('MaxPriceUsed') == '' && $this->getInput('MaxPriceNew') == '') { // check if a relevant pricechange happened if ( @@ -211,11 +232,11 @@ public function collectData() ) { $title = 'Priceupdate! '; - if (!$this->getInput('ExcludeNew')) { + if (!$this->getInput('ExcludeNew') && isset($PriceNew)) { $title .= 'NEW' . $this->getPriceTrend($PriceNew, $OldPriceNew) . ' '; } - if (!$this->getInput('ExcludeUsed')) { + if (!$this->getInput('ExcludeUsed') && isset($PriceUsed)) { $title .= 'USED' . $this->getPriceTrend($PriceUsed, $OldPriceUsed) . ' '; } $title .= $Productname; From 649dfa72929c8aabdbbf4c693b35c484ffa135e9 Mon Sep 17 00:00:00 2001 From: Ftonans <77411099+Ftonans@users.noreply.github.com> Date: Thu, 13 Jun 2024 18:11:02 +0000 Subject: [PATCH 90/97] Update instance list (#4131) vern's instance seems to be working, I changed the url to https since they have automatic redirect. I removed trailing slashes from the urls so they look the same. I removed [rss.m3wz.su](https://rss.m3wz.su] since I didn't see the website online and the owner last posted on Fediverse two months ago. I'm not sure maybe it should be in "Inactive" category, I can try to contact m3wz for information about his instance. I removed rss.foxhaven.cyou because of [this](https://shitpost.poridge.club/notes/9lumb2gll8) (TL;DR the owner lost access to the domain) bus-hit is offline but the main website is working. I guess the rss-bridge just crashed and the owner will restart it. --- docs/01_General/06_Public_Hosts.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md index 1bc8fc205da..fa8d5fddd53 100644 --- a/docs/01_General/06_Public_Hosts.md +++ b/docs/01_General/06_Public_Hosts.md @@ -3,12 +3,12 @@ | Country | Address | Status | Contact | Comment | |:-------:|---------|--------|----------|---------| | ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.org/bridge01 | ![](https://img.shields.io/website/https/rss-bridge.org/bridge01.svg) | [@dvikan](https://github.com/dvikan) | London, Digital Ocean| -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in/ | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with OVH SAS (Maintained in India) | -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.cheredeprince.net/ | ![](https://img.shields.io/website/https/rss-bridge.cheredeprince.net) | [@La_Bécasse](https://cheredeprince.net/contact) | Self-Hosted at home in France | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with OVH SAS (Maintained in India) | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.cheredeprince.net | ![](https://img.shields.io/website/https/rss-bridge.cheredeprince.net) | [@La_Bécasse](https://cheredeprince.net/contact) | Self-Hosted at home in France | | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.sans-nuage.fr | ![](https://img.shields.io/website/https/rss-bridge.sans-nuage.fr) | [@Alsace Réseau Neutre](https://arn-fai.net/contact) | Hosted in Alsace, France | | ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.lewd.tech | ![](https://img.shields.io/website/https/rss-bridge.lewd.tech.svg) | [@Erisa](https://github.com/Erisa) | Hosted in London, protected by Cloudflare Rate Limiting | | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://bridge.easter.fr | ![](https://img.shields.io/website/https/bridge.easter.fr.svg) | [@chatainsim](https://github.com/chatainsim) | Hosted in Isère, France | -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://wtf.roflcopter.fr/rss-bridge/ | ![](https://img.shields.io/website/https/wtf.roflcopter.fr/rss-bridge.svg) | [roflcopter.fr](https://wtf.roflcopter.fr/) | Hosted in France | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://wtf.roflcopter.fr/rss-bridge | ![](https://img.shields.io/website/https/wtf.roflcopter.fr/rss-bridge.svg) | [roflcopter.fr](https://wtf.roflcopter.fr/) | Hosted in France | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss.nixnet.services | ![](https://img.shields.io/website/https/rss.nixnet.services.svg) | [@amolith](https://nixnet.services/contact) | Hosted in Wunstorf, Germany | | ![](https://iplookup.flagfox.net/images/h16/AT.png) | https://rss-bridge.ggc-project.de | ![](https://img.shields.io/website/https/rss-bridge.ggc-project.de) | [@ggc-project.de](https://social.dev-wiki.de/@ggc_project) | Hosted in Steyr, Austria | | ![](https://iplookup.flagfox.net/images/h16/CA.png) | https://rssbridge.bus-hit.me | ![](https://img.shields.io/website/https/rssbridge.bus-hit.me.svg)| [@austinhuang0131](https://austinhuang.me/) | Hosted with Oracle in Québec, Canada | @@ -16,15 +16,15 @@ | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.boldair.dev | ![](https://img.shields.io/website?down_color=red&down_message=down&up_color=lime&up_message=up&url=https%3A%2F%2Frssbridge.boldair.dev) | [@Boldairdev](https://github.com/Boldairdev) | Latest Github release, Hosted on PHP 8.0 in Roubaix, France | | ![](https://iplookup.flagfox.net/images/h16/IN.png) | https://rss-bridge.bb8.fun | ![](https://img.shields.io/website/https/rss-bridge.bb8.fun.svg) | [@captn3m0](https://github.com/captn3m0) | Hosted in Bengaluru, India | | ![](https://iplookup.flagfox.net/images/h16/RU.png) | https://ololbu.ru/rss-bridge | ![](https://img.shields.io/website/https/ololbu.ru) | [@Ololbu](https://github.com/Ololbu) | Hosted in Moscow, Russia | -| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://tools.bheil.net/rss-bridge/ | ![](https://img.shields.io/website/https/tools.bheil.net.svg) | [@bheil](https://www.bheil.net) | Hosted in Germany | +| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://tools.bheil.net/rss-bridge | ![](https://img.shields.io/website/https/tools.bheil.net.svg) | [@bheil](https://www.bheil.net) | Hosted in Germany | | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://bridge.suumitsu.eu | ![](https://img.shields.io/website/https/bridge.suumitsu.eu.svg) | [@mitsukarenai](https://github.com/mitsukarenai) | Hosted in Paris, France | | ![](https://iplookup.flagfox.net/images/h16/NL.png) | https://feed.eugenemolotov.ru | ![](https://img.shields.io/website/https/feed.eugenemolotov.ru.svg) | [@em92](https://github.com/em92) | Hosted in Amsterdam, Netherlands | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss-bridge.mediani.de | ![](https://img.shields.io/website/https/rss-bridge.mediani.de.svg) | [@sokai](https://github.com/sokai) | Hosted with Netcup, Germany | -| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.foxhaven.cyou| ![](https://img.shields.io/badge/website-up-brightgreen) | [@Aysilu](https://foxhaven.cyou) | Hosted with Timeweb (Maintained in Poland) | -| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.m3wz.su| ![](https://img.shields.io/badge/website-up-brightgreen) | [@m3oweezed](https://m3wz.su/en/about) | Poland, Hosted with Timeweb Cloud | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany | ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine | ![](https://iplookup.flagfox.net/images/h16/IN.png) | https://rssbridge.projectsegfau.lt | ![](https://img.shields.io/website/https/rssbridge.projectsegfau.lt) | [@gi-yt](https://aryak.me) | Self-Hosted at Mumbai, India with Airtel (ISP) | +| ![](https://iplookup.flagfox.net/images/h16/US.png) | https://rb.vern.cc | ![](https://img.shields.io/website/https/rb.vern.cc.svg) | [@vern.cc](https://vern.cc/en/admin) | Hosted with Hetzner, US | +| ![](https://iplookup.flagfox.net/images/h16/RO.png) | https://rss.bloat.cat | ![](https://img.shields.io/website/https/rss.bloat.cat) | [@vlnst](https://bloat.cat/contact) | Hosted with Kyun, Romania | ## Inactive instances @@ -32,4 +32,3 @@ | Country | Address | Status | Contact | Comment | |:-------:|---------|--------|----------|---------| | ![](https://iplookup.flagfox.net/images/h16/FI.png) | https://rss-bridge.snopyta.org | ![](https://img.shields.io/website/https/rss-bridge.snopyta.org.svg) | [@Perflyst](https://github.com/Perflyst) | Hosted in Helsinki, Finland | -| ![](https://iplookup.flagfox.net/images/h16/US.png) | http://rb.vern.cc/ | ![](https://img.shields.io/website/https/rb.vern.cc.svg) | [@vern.cc](https://vern.cc/en/admin) | Hosted with Hetzner, US | From 0eac7a078479eac48be6c841a79f77ff04f429a8 Mon Sep 17 00:00:00 2001 From: Mynacol <Mynacol@users.noreply.github.com> Date: Sun, 16 Jun 2024 13:16:42 +0200 Subject: [PATCH 91/97] [HeiseBridge] Remove lost+found icon Remove the icon visible in l+f articles, e.g. https://www.heise.de/news/l-f-DISGOMOJI-die-Linux-Malware-die-auf-Emojis-steht-9765024.html Using a css selector in the form img[alt*="l+f"] was tried, but is not supported by the used library. --- bridges/HeiseBridge.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index e26a46076f5..82c1f1aa4ff 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -166,6 +166,11 @@ private function addArticleToItem($item, $article) ) { $element->remove(); } + foreach ($article->find('img') as $element) { + if (str_contains($element->alt, 'l+f')) { + $element->remove(); + } + } // reload html, as remove() is buggy $article = str_get_html($article->outertext); From 206bebc7bdf90b00b2457a5693fe07079b3df40a Mon Sep 17 00:00:00 2001 From: Dag <me@dvikan.no> Date: Tue, 18 Jun 2024 20:22:46 +0200 Subject: [PATCH 92/97] ci: disallow the sizeof function in linter (#4134) --- bridges/DiarioDoAlentejoBridge.php | 2 +- bridges/GameBananaBridge.php | 2 +- bridges/ItakuBridge.php | 28 ++++++++++++++-------------- bridges/MagellantvBridge.php | 2 +- bridges/PatreonBridge.php | 2 +- bridges/PresidenciaPTBridge.php | 2 +- bridges/VieDeMerdeBridge.php | 2 +- phpcs.xml | 8 ++++++++ 8 files changed, 28 insertions(+), 20 deletions(-) diff --git a/bridges/DiarioDoAlentejoBridge.php b/bridges/DiarioDoAlentejoBridge.php index 9b82b49fc6d..0bd0f1d4e7b 100644 --- a/bridges/DiarioDoAlentejoBridge.php +++ b/bridges/DiarioDoAlentejoBridge.php @@ -47,7 +47,7 @@ public function collectData() }, self::PT_MONTH_NAMES), array_map(function ($num) { return sprintf('-%02d-', $num); - }, range(1, sizeof(self::PT_MONTH_NAMES))), + }, range(1, count(self::PT_MONTH_NAMES))), $element->find('span.date', 0)->innertext ); diff --git a/bridges/GameBananaBridge.php b/bridges/GameBananaBridge.php index 88b19ef0cc7..0f04f56b8e4 100644 --- a/bridges/GameBananaBridge.php +++ b/bridges/GameBananaBridge.php @@ -83,7 +83,7 @@ public function collectData() } // Get updates from element[8], if applicable - if ($this->getInput('updates') && sizeof($element[8]) > 0) { + if ($this->getInput('updates') && count($element[8]) > 0) { $update = $element[8][0]; $item['content'] .= '<br><strong>Update:</strong> ' . $update['_sTitle']; if ($update['_sText'] != '') { diff --git a/bridges/ItakuBridge.php b/bridges/ItakuBridge.php index 0577752cc55..4f4145742a4 100644 --- a/bridges/ItakuBridge.php +++ b/bridges/ItakuBridge.php @@ -347,17 +347,17 @@ private function getImagesSearch(array $opt) $url = self::URI . "/api/galleries/images/?by_following=false&date_range={$opt['range']}&ordering={$opt['order']}&is_video={$opt['video_only']}"; $url .= "&text={$opt['text']}&visibility=PUBLIC&visibility=PROFILE_ONLY&page=1&page_size=30&format=json"; - if (sizeof($opt['optional_tags']) > 0) { + if (count($opt['optional_tags']) > 0) { foreach ($opt['optional_tags'] as $tag) { $url .= "&optional_tags=$tag"; } } - if (sizeof($opt['negative_tags']) > 0) { + if (count($opt['negative_tags']) > 0) { foreach ($opt['negative_tags'] as $tag) { $url .= "&negative_tags=$tag"; } } - if (sizeof($opt['required_tags']) > 0) { + if (count($opt['required_tags']) > 0) { foreach ($opt['required_tags'] as $tag) { $url .= "&required_tags=$tag"; } @@ -381,17 +381,17 @@ private function getPostsSearch(array $opt) $url = self::URI . "/api/posts/?by_following=false&date_range={$opt['range']}&ordering={$opt['order']}"; $url .= '&visibility=PUBLIC&visibility=PROFILE_ONLY&page=1&page_size=30&format=json'; - if (sizeof($opt['optional_tags']) > 0) { + if (count($opt['optional_tags']) > 0) { foreach ($opt['optional_tags'] as $tag) { $url .= "&optional_tags=$tag"; } } - if (sizeof($opt['negative_tags']) > 0) { + if (count($opt['negative_tags']) > 0) { foreach ($opt['negative_tags'] as $tag) { $url .= "&negative_tags=$tag"; } } - if (sizeof($opt['required_tags']) > 0) { + if (count($opt['required_tags']) > 0) { foreach ($opt['required_tags'] as $tag) { $url .= "&required_tags=$tag"; } @@ -446,7 +446,7 @@ private function getOwnerID($username) private function getPost($id, array $metadata = null) { - if (isset($metadata) && sizeof($metadata['gallery_images']) < $metadata['num_images']) { + if (isset($metadata) && count($metadata['gallery_images']) < $metadata['num_images']) { $metadata = null; //force re-fetch of metadata } $uri = self::URI . '/posts/' . $id; @@ -457,7 +457,7 @@ private function getPost($id, array $metadata = null) $content_str = nl2br($data['content']); $content = "<p>{$content_str}</p><br/>"; //TODO: Add link and itaku user mention detection and convert into links. - if (array_key_exists('tags', $data) && sizeof($data['tags']) > 0) { + if (array_key_exists('tags', $data) && count($data['tags']) > 0) { $tag_types = [ 'ARTIST' => '', 'COPYRIGHT' => '', @@ -479,7 +479,7 @@ private function getPost($id, array $metadata = null) } } - if (sizeof($data['folders']) > 0) { + if (count($data['folders']) > 0) { $content .= '📁 In Folder(s): '; foreach ($data['folders'] as $folder) { $url = self::URI . '/profile/' . $data['owner_username'] . '/posts/' . $folder['id']; @@ -488,7 +488,7 @@ private function getPost($id, array $metadata = null) } $content .= '<hr/>'; - if (sizeof($data['gallery_images']) > 0) { + if (count($data['gallery_images']) > 0) { foreach ($data['gallery_images'] as $media) { $title = $media['title']; $url = self::URI . '/images/' . $media['id']; @@ -529,7 +529,7 @@ private function getCommission($id, array $metadata = null) $content_str = nl2br($data['description']); $content = "<p>{$content_str}</p><br>"; //TODO: Add link and itaku user mention detection and convert into links. - if (array_key_exists('tags', $data) && sizeof($data['tags']) > 0) { + if (array_key_exists('tags', $data) && count($data['tags']) > 0) { // $content .= "🏷 Tag(s): "; $tag_types = [ 'ARTIST' => '', @@ -552,7 +552,7 @@ private function getCommission($id, array $metadata = null) } } - if (array_key_exists('reference_gallery_sections', $data) && sizeof($data['reference_gallery_sections']) > 0) { + if (array_key_exists('reference_gallery_sections', $data) && count($data['reference_gallery_sections']) > 0) { $content .= '📁 Example folder(s): '; foreach ($data['folders'] as $folder) { $url = self::URI . '/profile/' . $data['owner_username'] . '/gallery/' . $folder['id']; @@ -601,7 +601,7 @@ private function getImage($id /* array $metadata = null */) //$metadata disabled $content_str = nl2br($data['description']); $content = "<p>{$content_str}</p><br/>"; //TODO: Add link and itaku user mention detection and convert into links. - if (array_key_exists('tags', $data) && sizeof($data['tags']) > 0) { + if (array_key_exists('tags', $data) && count($data['tags']) > 0) { // $content .= "🏷 Tag(s): "; $tag_types = [ 'ARTIST' => '', @@ -624,7 +624,7 @@ private function getImage($id /* array $metadata = null */) //$metadata disabled } } - if (array_key_exists('sections', $data) && sizeof($data['sections']) > 0) { + if (array_key_exists('sections', $data) && count($data['sections']) > 0) { $content .= '📁 In Folder(s): '; foreach ($data['sections'] as $folder) { $url = self::URI . '/profile/' . $data['owner_username'] . '/gallery/' . $folder['id']; diff --git a/bridges/MagellantvBridge.php b/bridges/MagellantvBridge.php index b1f0403e105..0a225160d8f 100644 --- a/bridges/MagellantvBridge.php +++ b/bridges/MagellantvBridge.php @@ -63,7 +63,7 @@ public function collectData() // Check whether items exists $article_list = $dom->find('div.articlePreview_preview-card__mLMOm'); - if (sizeof($article_list) == 0) { + if (count($article_list) == 0) { throw new Exception(sprintf('Unable to find css selector on `%s`', $url)); } diff --git a/bridges/PatreonBridge.php b/bridges/PatreonBridge.php index a21624253c1..895a9306fa2 100644 --- a/bridges/PatreonBridge.php +++ b/bridges/PatreonBridge.php @@ -228,7 +228,7 @@ public function collectData() //post attachments if ( isset($post->relationships->attachments->data) && - sizeof($post->relationships->attachments->data) > 0 + count($post->relationships->attachments->data) > 0 ) { $item['enclosures'] = []; $item['content'] .= '<hr><p><b>Attachments:</b><ul>'; diff --git a/bridges/PresidenciaPTBridge.php b/bridges/PresidenciaPTBridge.php index 052b2751d4a..247e8fce5ad 100644 --- a/bridges/PresidenciaPTBridge.php +++ b/bridges/PresidenciaPTBridge.php @@ -76,7 +76,7 @@ public function collectData() }, self::PT_MONTH_NAMES), array_map(function ($num) { return sprintf('-%02d-', $num); - }, range(1, sizeof(self::PT_MONTH_NAMES))), + }, range(1, count(self::PT_MONTH_NAMES))), $edt ); diff --git a/bridges/VieDeMerdeBridge.php b/bridges/VieDeMerdeBridge.php index 9e6166fbfd7..be3841577ac 100644 --- a/bridges/VieDeMerdeBridge.php +++ b/bridges/VieDeMerdeBridge.php @@ -26,7 +26,7 @@ public function collectData() $html = getSimpleHTMLDOM(self::URI, []); $quotes = $html->find('article.bg-white'); - if (sizeof($quotes) === 0) { + if (count($quotes) === 0) { return; } diff --git a/phpcs.xml b/phpcs.xml index bd1aca28489..9e393a137a0 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -35,6 +35,14 @@ </properties> </rule> + <rule ref="Generic.PHP.ForbiddenFunctions"> + <properties> + <property name="forbiddenFunctions" type="array"> + <element key="sizeof" value="count"/> + </property> + </properties> + </rule> + <!-- Duplicate class names are not allowed --> <rule ref="Generic.Classes.DuplicateClassName"/> From 00074b9bfc4c1ed3174b83d2f33015a1dc240464 Mon Sep 17 00:00:00 2001 From: Dag <me@dvikan.no> Date: Tue, 18 Jun 2024 20:55:05 +0200 Subject: [PATCH 93/97] fix: dont remove www from anchors in DOM, fix #4114 (#4135) --- static/rss-bridge.js | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/static/rss-bridge.js b/static/rss-bridge.js index b9b466d6cb8..9cd004cb00c 100644 --- a/static/rss-bridge.js +++ b/static/rss-bridge.js @@ -1,21 +1,14 @@ function rssbridge_list_search() { - function remove_www_from_url(url) { - if (url.hostname.indexOf('www.') === 0) { - url.hostname = url.hostname.substr(4); - } - } - var search = document.getElementById('searchfield').value; - var searchAsUrl = document.createElement('a'); - searchAsUrl.href = search; - remove_www_from_url(searchAsUrl); + var bridgeCards = document.querySelectorAll('section.bridge-card'); for (var i = 0; i < bridgeCards.length; i++) { var bridgeName = bridgeCards[i].getAttribute('data-ref'); var bridgeShortName = bridgeCards[i].getAttribute('data-short-name'); var bridgeDescription = bridgeCards[i].querySelector('.description'); - var bridgeUrl = bridgeCards[i].getElementsByTagName('a')[0]; - remove_www_from_url(bridgeUrl); + var bridgeUrlElement = bridgeCards[i].getElementsByTagName('a')[0]; + var bridgeUrl = bridgeUrlElement.toString(); + bridgeCards[i].style.display = 'none'; if (!bridgeName || !bridgeUrl) { continue; @@ -30,10 +23,7 @@ function rssbridge_list_search() { if (bridgeDescription.textContent.match(searchRegex)) { bridgeCards[i].style.display = 'block'; } - if (bridgeUrl.toString().match(searchRegex)) { - bridgeCards[i].style.display = 'block'; - } - if (bridgeUrl.hostname === searchAsUrl.hostname) { + if (bridgeUrl.match(searchRegex)) { bridgeCards[i].style.display = 'block'; } } From d60f0b0e74278db9d30b50e40533d92d92c079ec Mon Sep 17 00:00:00 2001 From: Dag <me@dvikan.no> Date: Tue, 18 Jun 2024 21:12:29 +0200 Subject: [PATCH 94/97] feat(FilterBridge): custom feed name parameter (#4136) fix #4100 --- bridges/FilterBridge.php | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/bridges/FilterBridge.php b/bridges/FilterBridge.php index 3448a8c7cfe..a1066fb5cfb 100644 --- a/bridges/FilterBridge.php +++ b/bridges/FilterBridge.php @@ -15,6 +15,12 @@ class FilterBridge extends FeedExpander 'exampleValue' => 'https://lorem-rss.herokuapp.com/feed?unit=day', 'required' => true, ], + 'name' => [ + 'name' => 'Feed name (optional)', + 'type' => 'text', + 'exampleValue' => 'My feed', + 'required' => false, + ], 'filter' => [ 'name' => 'Filter (regular expression!!!)', 'required' => false, @@ -158,11 +164,18 @@ protected function parseItem(array $item) public function getURI() { $url = $this->getInput('url'); - - if (empty($url)) { - $url = parent::getURI(); + if ($url) { + return $url; } + return parent::getURI(); + } - return $url; + public function getName() + { + $name = $this->getInput('name'); + if ($name) { + return $name; + } + return parent::getName(); } } From 2a84350cb2bf4d62111c0294e71d56302714ee63 Mon Sep 17 00:00:00 2001 From: July <phantop@tuta.io> Date: Fri, 21 Jun 2024 09:47:34 -0400 Subject: [PATCH 95/97] [HumbleBundleBridge] Create new bridge (#4139) * [HumbleBundleBridge] Create new bridge * [HumbleBundleBridge] Use less redundant bundle type handling --- bridges/HumbleBundleBridge.php | 68 ++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 bridges/HumbleBundleBridge.php diff --git a/bridges/HumbleBundleBridge.php b/bridges/HumbleBundleBridge.php new file mode 100644 index 00000000000..42e025a58db --- /dev/null +++ b/bridges/HumbleBundleBridge.php @@ -0,0 +1,68 @@ +<?php + +class HumbleBundleBridge extends BridgeAbstract +{ + const NAME = 'Humble Bundle'; + const MAINTAINER = 'phantop'; + const URI = 'https://humblebundle.com/'; + const DESCRIPTION = 'Returns bundles from Humble Bundle.'; + const PARAMETERS = [[ + 'type' => [ + 'name' => 'Bundle type', + 'type' => 'list', + 'defaultValue' => 'bundles', + 'values' => [ + 'All' => 'bundles', + 'Books' => 'books', + 'Games' => 'games', + 'Software' => 'software', + ] + ] + ]]; + + public function collectData() + { + $page = getSimpleHTMLDOMCached($this->getURI()); + $json_text = $page->find('#landingPage-json-data', 0)->innertext; + $json = json_decode(html_entity_decode($json_text), true)['data']; + + $products = []; + $types = ['books', 'games', 'software']; + $types = $this->getInput('type') === 'bundles' ? $types : [$this->getInput('type')]; + foreach ($types as $type) { + $products = array_merge($products, $json[$type]['mosaic'][0]['products']); + } + + foreach ($products as $element) { + $item = []; + $item['author'] = $element['author']; + $item['timestamp'] = $element['start_date|datetime']; + $item['title'] = $element['tile_short_name']; + $item['uid'] = $element['machine_name']; + $item['uri'] = parent::getURI() . $element['product_url']; + + $item['content'] = $element['marketing_blurb']; + $item['content'] .= '<br>' . $element['detailed_marketing_blurb']; + + $item['categories'] = $element['hover_highlights']; + array_unshift($item['categories'], explode(':', $element['tile_name'])[0]); + array_unshift($item['categories'], $element['tile_stamp']); + + $item['enclosures'] = [$element['tile_logo'], $element['high_res_tile_image']]; + $this->items[] = $item; + } + } + + public function getName() + { + $name = parent::getName(); + $name .= $this->getInput('type') ? ' - ' . $this->getInput('type') : ''; + return $name; + } + + public function getURI() + { + $uri = parent::getURI() . $this->getInput('type'); + return $uri; + } +} From adad9d6405efb1b19987bf6933ea8309b5f3c28e Mon Sep 17 00:00:00 2001 From: Thomas <mightymt@users.noreply.github.com> Date: Mon, 24 Jun 2024 22:32:03 +0200 Subject: [PATCH 96/97] [YouTubeCommunityTabBridge] Improve JSON extraction (#4140) Small change that should make the extraction of JSON from HTML work more reliably --- bridges/YouTubeCommunityTabBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/YouTubeCommunityTabBridge.php b/bridges/YouTubeCommunityTabBridge.php index 0c145c02675..284b81f924d 100644 --- a/bridges/YouTubeCommunityTabBridge.php +++ b/bridges/YouTubeCommunityTabBridge.php @@ -32,7 +32,7 @@ class YouTubeCommunityTabBridge extends BridgeAbstract private $itemTitle = ''; private $urlRegex = '/youtube\.com\/(channel|user|c)\/([\w]+)\/community/'; - private $jsonRegex = '/var ytInitialData = (.*);<\/script>/'; + private $jsonRegex = '/var ytInitialData = ([^<]*);<\/script>/'; public function detectParameters($url) { @@ -70,7 +70,7 @@ public function collectData() $html = getSimpleHTMLDOM($this->feedUrl); } - $json = $this->extractJson($html->find('body', 0)->innertext); + $json = $this->extractJson($html->find('html', 0)->innertext); $this->feedName = $json->header->c4TabbedHeaderRenderer->title; From d0c35146dd9b8b0625eacefb59402a16519b5c5f Mon Sep 17 00:00:00 2001 From: tillcash <tillcash@users.noreply.github.com> Date: Sat, 29 Jun 2024 00:21:59 +0530 Subject: [PATCH 97/97] [HinduTamilBridge] Fix timestamp again (#4142) --- bridges/HinduTamilBridge.php | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/bridges/HinduTamilBridge.php b/bridges/HinduTamilBridge.php index cab04171acf..1b556ed80b6 100644 --- a/bridges/HinduTamilBridge.php +++ b/bridges/HinduTamilBridge.php @@ -66,7 +66,7 @@ protected function parseItem($item) $date = $dom->find('p span.date', 1); if ($date) { - $item['timestamp'] = $date->innertext . ' IST'; + $item['timestamp'] = $this->convertToRFC3339($date->plaintext); } $content = $dom->find('#pgContentPrint', 0); @@ -88,4 +88,16 @@ private function cleanContent($content) return $content; } + + private function convertToRFC3339($DateString) + { + $timestamp = strtotime(trim($DateString)); + + if ($timestamp !== false) { + $rfc3339DateTime = date('Y-m-d\TH:i:s', $timestamp) . '+05:30'; + return $rfc3339DateTime; + } else { + return null; + } + } }