From c5f586497f3d23be61a6e8a5fe0f948f98a5b2f6 Mon Sep 17 00:00:00 2001 From: Mynacol Date: Sat, 16 Dec 2023 11:21:19 +0100 Subject: [PATCH 01/88] [GolemBridge] Remove multi-page page headers On multi-page articles like [1], all the pages after the first one have a page header that we add in the article content. When we tack the pages together again, we don't need those extra page headers. [1] https://www.golem.de/news/science-fiction-die-zehn-besten-filme-aus-den-spannenden-70ern-2312-179557.html --- bridges/GolemBridge.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index c1b03433af9..599d713a0ee 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -116,9 +116,6 @@ private function extractContent($page) // reload html, as remove() is buggy $article = str_get_html($article->outertext); - if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) { - $item .= $pageHeader; - } $header = $article->find('header', 0); foreach ($header->find('p, figure') as $element) { From 3944ae68cbe8b8dd4fd653a288cffdb42cd3802e Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 19 Dec 2023 07:53:25 +0100 Subject: [PATCH 02/88] fix(reddit): use old.reddit.com instead of www.reddit.com (#3848) --- bridges/RedditBridge.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index 2b7fe84f9a5..bb3e7afcf38 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -1,10 +1,15 @@ Date: Tue, 19 Dec 2023 08:46:37 +0100 Subject: [PATCH 03/88] fix(gatesnotes): the unfucked their json (#3849) --- bridges/GatesNotesBridge.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 24ba9b2ec17..0d9199680f2 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -23,12 +23,14 @@ public function collectData() $cleanedContent = str_replace([ '', '', - '\r\n', ], '', $rawContent); - $cleanedContent = str_replace('\"', '"', $cleanedContent); - $cleanedContent = trim($cleanedContent, '"'); + // $cleanedContent = str_replace('\"', '"', $cleanedContent); + // $cleanedContent = trim($cleanedContent, '"'); $json = Json::decode($cleanedContent, false); + if (is_string($json)) { + throw new \Exception('wtf? ' . $json); + } foreach ($json as $article) { $item = []; From 98a94855dc6b909b75629c6630c3795c68e7d560 Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 20 Dec 2023 03:16:25 +0100 Subject: [PATCH 04/88] feat: embed response in http exception (#3847) --- bridges/GettrBridge.php | 10 +++++++++- config.default.ini.php | 3 ++- lib/contents.php | 15 ++------------- lib/http.php | 24 +++++++++++++++++++++++- templates/exception.html.php | 23 +++++++++++++++++++++++ 5 files changed, 59 insertions(+), 16 deletions(-) diff --git a/bridges/GettrBridge.php b/bridges/GettrBridge.php index 74804043049..d3b9b899aa3 100644 --- a/bridges/GettrBridge.php +++ b/bridges/GettrBridge.php @@ -33,7 +33,15 @@ public function collectData() $user, min($this->getInput('limit'), 20) ); - $data = json_decode(getContents($api), false); + try { + $json = getContents($api); + } catch (HttpException $e) { + if ($e->getCode() === 400 && str_contains($e->response->getBody(), 'E_USER_NOTFOUND')) { + throw new \Exception('User not found: ' . $user); + } + throw $e; + } + $data = json_decode($json, false); foreach ($data->result->aux->post as $post) { $this->items[] = [ diff --git a/config.default.ini.php b/config.default.ini.php index 52786aefbe4..201b1414fcd 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -47,7 +47,8 @@ enable_maintenance_mode = false [http] -timeout = 60 +; Operation timeout in seconds +timeout = 30 useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" ; Max http response size in MB diff --git a/lib/contents.php b/lib/contents.php index a4def21ae68..8676a2a8df8 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -101,19 +101,8 @@ function getContents( $response = $response->withBody($cachedResponse->getBody()); break; default: - $exceptionMessage = sprintf( - '%s resulted in %s %s %s', - $url, - $response->getCode(), - $response->getStatusLine(), - // If debug, include a part of the response body in the exception message - Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', - ); - - if (CloudFlareException::isCloudFlareResponse($response)) { - throw new CloudFlareException($exceptionMessage, $response->getCode()); - } - throw new HttpException(trim($exceptionMessage), $response->getCode()); + $e = HttpException::fromResponse($response, $url); + throw $e; } if ($returnFull === true) { // todo: return the actual response object diff --git a/lib/http.php b/lib/http.php index eb70705f600..bfa6b6bff7f 100644 --- a/lib/http.php +++ b/lib/http.php @@ -2,7 +2,29 @@ class HttpException extends \Exception { - // todo: should include the failing http response (if present) + public ?Response $response; + + public function __construct(string $message = '', int $statusCode = 0, ?Response $response = null) + { + parent::__construct($message, $statusCode); + $this->response = $response ?? new Response('', 0); + } + + public static function fromResponse(Response $response, string $url): HttpException + { + $message = sprintf( + '%s resulted in %s %s %s', + $url, + $response->getCode(), + $response->getStatusLine(), + // If debug, include a part of the response body in the exception message + Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', + ); + if (CloudFlareException::isCloudFlareResponse($response)) { + return new CloudFlareException($message, $response->getCode(), $response); + } + return new HttpException(trim($message), $response->getCode(), $response); + } } final class CloudFlareException extends HttpException diff --git a/templates/exception.html.php b/templates/exception.html.php index dac0ad26a7a..e1dd97c112e 100644 --- a/templates/exception.html.php +++ b/templates/exception.html.php @@ -16,6 +16,13 @@

+ getCode() === 400): ?> +

400 Bad Request

+

+ This is usually caused by an incorrectly constructed http request. +

+ + getCode() === 404): ?>

404 Page Not Found

@@ -40,6 +47,22 @@

+ getCode() === 0): ?> +

+ See + + https://curl.haxx.se/libcurl/c/libcurl-errors.html + + for description of the curl error code. +

+ +

+ + https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/getCode()) ?> + +

+ + getCode() === 10): ?>

The rss feed is completely empty

From 4e40e032b0fcac52bc74ba5994cefe1d00debf45 Mon Sep 17 00:00:00 2001 From: Mynacol Date: Wed, 20 Dec 2023 22:18:10 +0100 Subject: [PATCH 05/88] Remove matrix reference The main communications platform is still Libera.chat, matrix was only provided by the hosted IRC-Matrix bridge. The bridge was turned off already and won't come back. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 570fb87d89a..2a762d45763 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ Officially hosted instance: https://rss-bridge.org/bridge01/ [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) -[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#rssbridge:libera.chat) [![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions) ||| From 4c5cf89725e7ebd975eb6ec5136b5e3927df07fe Mon Sep 17 00:00:00 2001 From: Dag Date: Thu, 21 Dec 2023 09:18:21 +0100 Subject: [PATCH 06/88] fix(rumble): not all videos have a datetime (#3852) --- bridges/RumbleBridge.php | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bridges/RumbleBridge.php b/bridges/RumbleBridge.php index d5b82136ad1..f6bfca7d193 100644 --- a/bridges/RumbleBridge.php +++ b/bridges/RumbleBridge.php @@ -40,15 +40,18 @@ public function collectData() $dom = getSimpleHTMLDOM($url); foreach ($dom->find('ol.thumbnail__grid div.thumbnail__grid--item') as $video) { - $datetime = $video->find('time', 0)->getAttribute('datetime'); - - $this->items[] = [ + $item = [ 'title' => $video->find('h3', 0)->plaintext, 'uri' => self::URI . $video->find('a', 0)->href, - 'timestamp' => (new \DateTimeImmutable($datetime))->getTimestamp(), 'author' => $account . '@rumble.com', 'content' => defaultLinkTo($video, self::URI)->innertext, ]; + $time = $video->find('time', 0); + if ($time) { + $publishedAt = new \DateTimeImmutable($time->getAttribute('datetime')); + $item['timestamp'] = $publishedAt->getTimestamp(); + } + $this->items[] = $item; } } From f40f99740588b09033917fd38132a99875495540 Mon Sep 17 00:00:00 2001 From: Dag Date: Thu, 21 Dec 2023 09:24:22 +0100 Subject: [PATCH 07/88] fix: various small fixes (#3853) --- bridges/ARDAudiothekBridge.php | 20 +++++++++++++------- bridges/CarThrottleBridge.php | 6 ++---- bridges/EZTVBridge.php | 2 +- bridges/TrelloBridge.php | 2 +- bridges/YoutubeBridge.php | 6 +++++- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/bridges/ARDAudiothekBridge.php b/bridges/ARDAudiothekBridge.php index 2c1958f3d0e..619c0911f06 100644 --- a/bridges/ARDAudiothekBridge.php +++ b/bridges/ARDAudiothekBridge.php @@ -63,11 +63,13 @@ class ARDAudiothekBridge extends BridgeAbstract public function collectData() { - $oldTz = date_default_timezone_get(); + $path = $this->getInput('path'); + $limit = $this->getInput('limit'); + $oldTz = date_default_timezone_get(); date_default_timezone_set('Europe/Berlin'); - $pathComponents = explode('/', $this->getInput('path')); + $pathComponents = explode('/', $path); if (empty($pathComponents)) { returnClientError('Path may not be empty'); } @@ -82,17 +84,21 @@ public function collectData() } $url = self::APIENDPOINT . 'programsets/' . $showID . '/'; - $rawJSON = getContents($url); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json1 = getContents($url); + $data1 = Json::decode($json1, false); + $processedJSON = $data1->data->programSet; + if (!$processedJSON) { + throw new \Exception('Unable to find show id: ' . $showID); + } - $limit = $this->getInput('limit'); $answerLength = 1; $offset = 0; $numberOfElements = 1; while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) { - $rawJSON = getContents($url . '?offset=' . $offset); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json2 = getContents($url . '?offset=' . $offset); + $data2 = Json::decode($json2, false); + $processedJSON = $data2->data->programSet; $answerLength = count($processedJSON->items->nodes); $offset = $offset + $answerLength; diff --git a/bridges/CarThrottleBridge.php b/bridges/CarThrottleBridge.php index 913b686caec..70d7b54e140 100644 --- a/bridges/CarThrottleBridge.php +++ b/bridges/CarThrottleBridge.php @@ -9,8 +9,7 @@ class CarThrottleBridge extends BridgeAbstract public function collectData() { - $news = getSimpleHTMLDOMCached(self::URI . 'news') - or returnServerError('could not retrieve page'); + $news = getSimpleHTMLDOMCached(self::URI . 'news'); $this->items[] = []; @@ -22,8 +21,7 @@ public function collectData() $item['uri'] = self::URI . $titleElement->getAttribute('href'); $item['title'] = $titleElement->innertext; - $articlePage = getSimpleHTMLDOMCached($item['uri']) - or returnServerError('could not retrieve page'); + $articlePage = getSimpleHTMLDOMCached($item['uri']); $authorDiv = $articlePage->find('div.author div'); if ($authorDiv) { diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 73318f0c713..25a88124266 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -96,7 +96,7 @@ protected function getEztvUri() protected function getItemFromTorrent($torrent) { $item = []; - $item['uri'] = $torrent->episode_url; + $item['uri'] = $torrent->episode_url ?? $torrent->torrent_url; $item['author'] = $torrent->imdb_id; $item['timestamp'] = $torrent->date_released_unix; $item['title'] = $torrent->title; diff --git a/bridges/TrelloBridge.php b/bridges/TrelloBridge.php index a1b5cfb8567..cab2bde2880 100644 --- a/bridges/TrelloBridge.php +++ b/bridges/TrelloBridge.php @@ -648,7 +648,7 @@ public function collectData() $action->type ]; if (isset($action->data->card)) { - $item['categories'][] = $action->data->card->name; + $item['categories'][] = $action->data->card->name ?? $action->data->card->id; $item['uri'] = 'https://trello.com/c/' . $action->data->card->shortLink . '#action-' diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 993f8c90663..6a29e387158 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -164,7 +164,11 @@ private function collectDataInternal() $jsonData = $this->extractJsonFromHtml($html); // TODO: this method returns only first 100 video items // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0] ?? null; + if (!$jsonData) { + // playlist probably doesnt exists + throw new \Exception('Unable to find playlist: ' . $url_listing); + } $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; $item_count = count($jsonData); From ea2b4d7506f0feded2899cb0aab351fa7dca3194 Mon Sep 17 00:00:00 2001 From: July Date: Sat, 23 Dec 2023 03:42:37 -0500 Subject: [PATCH 08/88] [ArsTechnicaBridge] Properly handle paged content (#3855) * [ArsTechnicaBridge] Properly handle paged content * [ArsTechnicaBridge] Remove normal site ad wrapper --- bridges/ArsTechnicaBridge.php | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index 613c1c58ca4..2c631871caf 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -35,39 +35,34 @@ public function collectData() protected function parseItem(array $item) { - $item_html = getSimpleHTMLDOMCached($item['uri'] . '&'); + $item_html = getSimpleHTMLDOMCached($item['uri']); $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] = $item_html->find('.article-content', 0); - $item_content = $item_html->find('.article-content.post-page', 0); - if (!$item_content) { - // The dom selector probably broke. Let's just return the item as-is - return $item; + $pages = $item_html->find('nav.page-numbers > .numbers > a', -2); + if (null !== $pages) { + for ($i = 2; $i <= $pages->innertext; $i++) { + $page_url = $item['uri'] . '&page=' . $i; + $page_html = getSimpleHTMLDOMCached($page_url); + $page_html = defaultLinkTo($page_html, self::URI); + $item['content'] .= $page_html->find('.article-content', 0); + } + $item['content'] = str_get_html($item['content']); } - $item['content'] = $item_content; - // remove various ars advertising $item['content']->find('#social-left', 0)->remove(); foreach ($item['content']->find('.ars-component-buy-box') as $ad) { $ad->remove(); } - foreach ($item['content']->find('i-amphtml-sizer') as $ad) { + foreach ($item['content']->find('.ad_wrapper') as $ad) { $ad->remove(); } foreach ($item['content']->find('.sidebar') as $ad) { $ad->remove(); } - foreach ($item['content']->find('a') as $link) { //remove amp redirect links - $url = $link->getAttribute('href'); - if (str_contains($url, 'go.redirectingat.com')) { - $url = extractFromDelimiters($url, 'url=', '&'); - $url = urldecode($url); - $link->setAttribute('href', $url); - } - } - - $item['content'] = backgroundToImg(str_replace('data-amp-original-style="background-image', 'style="background-image', $item['content'])); + $item['content'] = backgroundToImg($item['content']); $item['uid'] = explode('=', $item['uri'])[1]; From 98dafb61ae5519b7c6c4be2d7dd4d66b6bd6a4eb Mon Sep 17 00:00:00 2001 From: xduugu Date: Sat, 23 Dec 2023 08:43:01 +0000 Subject: [PATCH 09/88] [ARDAudiothekBridge] add duration to feed items (#3854) --- bridges/ARDAudiothekBridge.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bridges/ARDAudiothekBridge.php b/bridges/ARDAudiothekBridge.php index 619c0911f06..02b6b00778d 100644 --- a/bridges/ARDAudiothekBridge.php +++ b/bridges/ARDAudiothekBridge.php @@ -125,6 +125,10 @@ public function collectData() $item['categories'] = [$category]; } + $item['itunes'] = [ + 'duration' => $audio->duration, + ]; + $this->items[] = $item; } } From 9f163ab7c651f44c1d6266ca817aca2c0f208f51 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Mon, 25 Dec 2023 14:51:51 +0100 Subject: [PATCH 10/88] [FreeTelechargerBridge] Update to the new URL (#3856) * [FreeTelechargerBridge] Update to the new URL Website has changed URL and some design : this bridge is now adapted to thoses changes * [FreeTelechargerBridge] Fix example value Example valuse seems to use an "old" template, switch to a newer example that use the new template * [FreeTelechargerBridge] Fix notice Fix notice --- bridges/FreeTelechargerBridge.php | 61 ++++++++++++++++--------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/bridges/FreeTelechargerBridge.php b/bridges/FreeTelechargerBridge.php index 8362b4ff74c..f0e5d35a5bb 100644 --- a/bridges/FreeTelechargerBridge.php +++ b/bridges/FreeTelechargerBridge.php @@ -3,7 +3,7 @@ class FreeTelechargerBridge extends BridgeAbstract { const NAME = 'Free-Telecharger'; - const URI = 'https://www.free-telecharger.live/'; + const URI = 'https://www.free-telecharger.art/'; const DESCRIPTION = 'Suivi de série sur Free-Telecharger'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = [ @@ -12,43 +12,46 @@ class FreeTelechargerBridge extends BridgeAbstract 'name' => 'URL de la série', 'type' => 'text', 'required' => true, - 'title' => 'URL d\'une série sans le https://www.free-telecharger.live/', + 'title' => 'URL d\'une série sans le https://www.free-telecharger.art/', 'pattern' => 'series.*\.html', - 'exampleValue' => 'series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html' + 'exampleValue' => 'series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html' ], ] ]; const CACHE_TIMEOUT = 3600; + private string $showTitle; + private string $showTechDetails; + public function collectData() { - $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')); + $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')); - // Find all block content of the page - $blocks = $html->find('div[class=block1]'); + // Find all block content of the page + $blocks = $html->find('div[class=block1]'); - // Global Infos block - $infosBlock = $blocks[0]; - // Links block - $linksBlock = $blocks[2]; + // Global Infos block + $infosBlock = $blocks[0]; + // Links block + $linksBlock = $blocks[2]; - // Extract Global Show infos - $this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext); - $this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext); + // Extract Global Show infos + $this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext); + $this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext); - // Get Episodes names and links - $episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#ff6600]'); - $links = $linksBlock->find('div[id=link]', 0)->find('a'); + // Get Episodes names and links + $episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#e93100]'); + $links = $linksBlock->find('div[id=link]', 0)->find('a'); foreach ($episodes as $index => $episode) { - $item = []; // Create an empty item - $item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-'); - $item['uri'] = $links[$index]->href; - $item['content'] = '' . $item['title'] . ''; - $item['uid'] = hash('md5', $item['uri']); + $item = []; // Create an empty item + $item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-'); + $item['uri'] = $links[$index]->href; + $item['content'] = '' . $item['title'] . ''; + $item['uid'] = hash('md5', $item['uri']); - $this->items[] = $item; // Add this item to the list + $this->items[] = $item; // Add this item to the list } } @@ -57,7 +60,7 @@ public function getName() switch ($this->queriedContext) { case 'Suivi de publication de série': return $this->showTitle . ' ' . $this->showTechDetails . ' - ' . self::NAME; - break; + break; default: return self::NAME; } @@ -68,7 +71,7 @@ public function getURI() switch ($this->queriedContext) { case 'Suivi de publication de série': return self::URI . $this->getInput('url'); - break; + break; default: return self::URI; } @@ -76,14 +79,14 @@ public function getURI() public function detectParameters($url) { - // Example: https://www.free-telecharger.live/series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html + // Example: https://www.free-telecharger.art/series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html $params = []; - $regex = '/^https:\/\/www.*\.free-telecharger\.live\/(series.*\.html)/'; + $regex = '/^https:\/\/www.*\.free-telecharger\.art\/(series.*\.html)/'; if (preg_match($regex, $url, $matches) > 0) { - $params['context'] = 'Suivi de publication de série'; - $params['url'] = urldecode($matches[1]); - return $params; + $params['context'] = 'Suivi de publication de série'; + $params['url'] = urldecode($matches[1]); + return $params; } return null; From c9074facfed51371a59dd189648c5a80751feb4e Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Tue, 26 Dec 2023 12:18:42 +0100 Subject: [PATCH 11/88] [GreatFonBridge] Remove bridge (#3857) Website is unreliable, it's not useful to keep this bridge. --- bridges/GreatFonBridge.php | 140 ------------------------------------- 1 file changed, 140 deletions(-) delete mode 100644 bridges/GreatFonBridge.php diff --git a/bridges/GreatFonBridge.php b/bridges/GreatFonBridge.php deleted file mode 100644 index 2951634c15f..00000000000 --- a/bridges/GreatFonBridge.php +++ /dev/null @@ -1,140 +0,0 @@ - [ - 'u' => [ - 'name' => 'username', - 'type' => 'text', - 'title' => 'Instagram username you want to follow', - 'exampleValue' => 'aesoprockwins', - 'required' => true, - ], - ] - ]; - const TEST_DETECT_PARAMETERS = [ - 'https://www.instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram'], - 'https://instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram'], - 'https://greatfon.com/v/instagram' => ['context' => 'Username', 'u' => 'instagram'], - 'https://www.greatfon.com/v/instagram' => ['context' => 'Username', 'u' => 'instagram'], - ]; - - public function collectData() - { - $username = $this->getInput('u'); - $html = getSimpleHTMLDOMCached(self::URI . '/v/' . $username); - $html = defaultLinkTo($html, self::URI); - - foreach ($html->find('div[class*=content__item]') as $post) { - // Skip the ads - if (!str_contains($post->class, 'ads')) { - $url = $post->find('a[href^=https://greatfon.com/c/]', 0)->href; - $date = $this->parseDate($post->find('div[class=content__time-text]', 0)->plaintext); - $description = $post->find('img', 0)->alt; - $imageUrl = $post->find('img', 0)->src; - $author = $username; - $uid = $url; - $title = 'Post - ' . $username . ' - ' . $this->descriptionToTitle($description); - - // Checking post type - $isVideo = (bool) $post->find('div[class=content__camera]', 0); - $videoNote = $isVideo ? '

(video)

' : ''; - - $this->items[] = [ - 'uri' => $url, - 'author' => $author, - 'timestamp' => $date, - 'title' => $title, - 'thumbnail' => $imageUrl, - 'enclosures' => [$imageUrl], - 'content' => << - {$description} - -{$videoNote} -

{$description}

-HTML, - 'uid' => $uid - ]; - } - } - } - - private function parseDate($content) - { - // Parse date, and transform the date into a timetamp, even in a case of a relative date - $date = date_create(); - - // Content trimmed to be sure that the "article" is at the beginning of the string and remove "ago" to make it a valid PHP date interval - $dateString = trim(str_replace(' ago', '', $content)); - - // Replace the article "an" or "a" by the number "1" to be a valid PHP date interval - $dateString = preg_replace('/^((an|a) )/m', '1 ', $dateString); - - $relativeDate = date_interval_create_from_date_string($dateString); - if ($relativeDate) { - date_sub($date, $relativeDate); - // As the relative interval has the precision of a day for date older than 24 hours, we can remove the hour of the date, as it is not relevant - date_time_set($date, 0, 0, 0, 0); - } else { - $this->logger->info(sprintf('Unable to parse date string: %s', $dateString)); - } - return date_format($date, 'r'); - } - - public function getURI() - { - if (!is_null($this->getInput('u'))) { - return urljoin(self::URI, '/v/' . $this->getInput('u')); - } - - return parent::getURI(); - } - - public function getIcon() - { - return static::URI . '/images/favicon-hub-3ede543aa6d1225e8dc016ccff6879c8.ico?vsn=d'; - } - - private function descriptionToTitle($description) - { - return strlen($description) > 60 ? mb_substr($description, 0, 57) . '...' : $description; - } - - public function getName() - { - if (!is_null($this->getInput('u'))) { - return 'Username ' . $this->getInput('u') . ' - GreatFon Bridge'; - } - return parent::getName(); - } - - public function detectParameters($url) - { - $regex = '/^http(s|):\/\/((www\.|)(instagram.com)\/([a-zA-Z0-9_\.]{1,30})(\/reels\/|\/tagged\/|\/|)|(www\.|)(greatfon.com)\/v\/([a-zA-Z0-9_\.]{1,30}))/'; - if (preg_match($regex, $url, $matches) > 0) { - $params['context'] = 'Username'; - // Extract detected domain using the regex - $domain = $matches[8] ?? $matches[4]; - if ($domain == 'greatfon.com') { - $params['u'] = $matches[9]; - return $params; - } elseif ($domain == 'instagram.com') { - $params['u'] = $matches[5]; - return $params; - } else { - return null; - } - } else { - return null; - } - } -} From 19384463857c35b1d3ef0a7dbbbcc40d2f0cba0c Mon Sep 17 00:00:00 2001 From: Florent V Date: Tue, 26 Dec 2023 12:19:08 +0100 Subject: [PATCH 12/88] [EdfPricesBridge] add new bridge (#3846) * [EdfPricesBridge] add new brige * [EdfPricesBridge] bad refactor * [EdfPricesBridge] support php 7.4 --------- Co-authored-by: Florent VIOLLEAU --- bridges/EdfPricesBridge.php | 106 ++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 bridges/EdfPricesBridge.php diff --git a/bridges/EdfPricesBridge.php b/bridges/EdfPricesBridge.php new file mode 100644 index 00000000000..f67ed30b1c7 --- /dev/null +++ b/bridges/EdfPricesBridge.php @@ -0,0 +1,106 @@ + [ + 'name' => 'Choisir un contrat', + 'type' => 'list', + // we can add later HCHP, EJP, base + 'values' => ['Tempo' => '/energie/edf/tarifs/tempo'], + ] + ] + ]; + const CACHE_TIMEOUT = 7200; // 2h + + /** + * @param simple_html_dom $html + * @param string $contractUri + * @return void + */ + private function tempo(simple_html_dom $html, string $contractUri): void + { + // current color and next + $daysDom = $html->find('#calendrier', 0)->nextSibling()->find('.card--ejp'); + if ($daysDom && count($daysDom) === 2) { + foreach ($daysDom as $dayDom) { + $day = trim($dayDom->find('.card__title', 0)->innertext) . '/' . (new \DateTime('now'))->format(('Y')); + $dayColor = $dayDom->find('.card-ejp__icon span', 0)->innertext; + + $text = $day . ' - ' . $dayColor; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + + // colors + $ulDom = $html->find('#tarif-de-l-offre-edf-tempo-current-date-html-year', 0)->nextSibling()->nextSibling()->nextSibling(); + $elementsDom = $ulDom->find('li'); + if ($elementsDom && count($elementsDom) === 3) { + foreach ($elementsDom as $elementDom) { + $item = []; + + $matches = []; + preg_match_all('/Jour (.*) : Heures (.*) : (.*) € \/ Heures (.*) : (.*) €/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0); + + if ($matches && count($matches[0]) === 6) { + for ($i = 0; $i < 2; $i++) { + $text = 'Jour ' . $matches[0][1] . ' - Heures ' . $matches[0][2 + 2 * $i] . ' : ' . $matches[0][3 + 2 * $i] . '€'; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + } + } + + // powers + $ulPowerContract = $ulDom->nextSibling()->nextSibling(); + $elementsPowerContractDom = $ulPowerContract->find('li'); + if ($elementsPowerContractDom && count($elementsPowerContractDom) === 4) { + foreach ($elementsPowerContractDom as $elementPowerContractDom) { + $item = []; + + $matches = []; + preg_match_all('/(.*) kVA : (.*) €/um', $elementPowerContractDom->innertext, $matches, PREG_SET_ORDER, 0); + + if ($matches && count($matches[0]) === 3) { + $text = $matches[0][1] . ' kVA : ' . $matches[0][2] . '€'; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + } + } + + public function collectData() + { + $contract = $this->getKey('contract'); + $contractUri = $this->getInput('contract'); + $html = getSimpleHTMLDOM(self::URI . $contractUri); + + if ($contract === 'Tempo') { + $this->tempo($html, $contractUri); + } + } +} From ad2d4c7b1b538868070e0264f3692542883cac50 Mon Sep 17 00:00:00 2001 From: Florent V Date: Tue, 26 Dec 2023 12:20:49 +0100 Subject: [PATCH 13/88] [BridgeAbstract] use getParameters instead of static to allow overriding it from bridges (#3858) --- lib/BridgeAbstract.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index a7b811a84da..0f86f454c0d 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -154,8 +154,8 @@ private function setInputWithContext(array $input, $queriedContext) { // Import and assign all inputs to their context foreach ($input as $name => $value) { - foreach (static::PARAMETERS as $context => $set) { - if (array_key_exists($name, static::PARAMETERS[$context])) { + foreach ($this->getParameters() as $context => $set) { + if (array_key_exists($name, $this->getParameters()[$context])) { $this->inputs[$context][$name]['value'] = $value; } } @@ -163,16 +163,16 @@ private function setInputWithContext(array $input, $queriedContext) // Apply default values to missing data $contexts = [$queriedContext]; - if (array_key_exists('global', static::PARAMETERS)) { + if (array_key_exists('global', $this->getParameters())) { $contexts[] = 'global'; } foreach ($contexts as $context) { - if (!isset(static::PARAMETERS[$context])) { + if (!isset($this->getParameters()[$context])) { // unknown context provided by client, throw exception here? or continue? } - foreach (static::PARAMETERS[$context] as $name => $properties) { + foreach ($this->getParameters()[$context] as $name => $properties) { if (isset($this->inputs[$context][$name]['value'])) { continue; } @@ -204,8 +204,8 @@ private function setInputWithContext(array $input, $queriedContext) } // Copy global parameter values to the guessed context - if (array_key_exists('global', static::PARAMETERS)) { - foreach (static::PARAMETERS['global'] as $name => $properties) { + if (array_key_exists('global', $this->getParameters())) { + foreach ($this->getParameters()['global'] as $name => $properties) { if (isset($input[$name])) { $value = $input[$name]; } else { @@ -246,8 +246,8 @@ public function getKey($input) if (!isset($this->inputs[$this->queriedContext][$input]['value'])) { return null; } - if (array_key_exists('global', static::PARAMETERS)) { - if (array_key_exists($input, static::PARAMETERS['global'])) { + if (array_key_exists('global', $this->getParameters())) { + if (array_key_exists($input, $this->getParameters()['global'])) { $context = 'global'; } } @@ -256,7 +256,7 @@ public function getKey($input) } $needle = $this->inputs[$this->queriedContext][$input]['value']; - foreach (static::PARAMETERS[$context][$input]['values'] as $first_level_key => $first_level_value) { + foreach ($this->getParameters()[$context][$input]['values'] as $first_level_key => $first_level_value) { if (!is_array($first_level_value) && $needle === (string)$first_level_value) { return $first_level_key; } elseif (is_array($first_level_value)) { @@ -273,7 +273,7 @@ public function detectParameters($url) { $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; if ( - empty(static::PARAMETERS) + empty($this->getParameters()) && preg_match($regex, $url, $urlMatches) > 0 && preg_match($regex, static::URI, $bridgeUriMatches) > 0 && $urlMatches[3] === $bridgeUriMatches[3] From c8178e1fc409635af1a40167c4f511feb8d3df7f Mon Sep 17 00:00:00 2001 From: Damien Calesse <2787828+kranack@users.noreply.github.com> Date: Wed, 27 Dec 2023 13:17:49 +0100 Subject: [PATCH 14/88] [SensCritique] Fix bridge (#3860) --- bridges/SensCritiqueBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index b823b55c23a..005704e169e 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -57,7 +57,7 @@ public function collectData() } $html = getSimpleHTMLDOM($uri); // This selector name looks like it's automatically generated - $list = $html->find('div.Universes__WrapperProducts-sc-1qa2w66-0.eVdcAv', 0); + $list = $html->find('div[data-testid="row"]', 0); $this->extractDataFromList($list); } @@ -69,6 +69,7 @@ private function extractDataFromList($list) if ($list === null) { returnClientError('Cannot extract data from list'); } + foreach ($list->find('div[data-testid="product-list-item"]') as $movie) { $item = []; $item['title'] = $movie->find('h2 a', 0)->plaintext; From 5ab1924c4f96937885e12bcbd16b7bfb83a3c15b Mon Sep 17 00:00:00 2001 From: tillcash Date: Thu, 28 Dec 2023 18:20:34 +0530 Subject: [PATCH 15/88] Add WorldbankBridge and OglafBridge (#3862) * Add WorldbankBridge and OglafBridge * Update OglafBridge.php Remove redundant parent call to parseItem and rename formal argument to improve code clarity. * Update WorldbankBridge.php fix lint --- bridges/OglafBridge.php | 35 +++++++++++++++++++++++++ bridges/WorldbankBridge.php | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 bridges/OglafBridge.php create mode 100644 bridges/WorldbankBridge.php diff --git a/bridges/OglafBridge.php b/bridges/OglafBridge.php new file mode 100644 index 00000000000..1f4bc1aff9e --- /dev/null +++ b/bridges/OglafBridge.php @@ -0,0 +1,35 @@ + [ + 'name' => 'limit (max 20)', + 'type' => 'number', + 'defaultValue' => 10, + 'required' => true, + ] + ] + ]; + + public function collectData() + { + $url = self::URI . 'feeds/rss/'; + $limit = min(20, $this->getInput('limit')); + $this->collectExpandableDatas($url, $limit); + } + + protected function parseItem($item) + { + $html = getSimpleHTMLDOMCached($item['uri']); + $comicImage = $html->find('img[id="strip"]', 0); + $item['content'] = $comicImage; + + return $item; + } +} diff --git a/bridges/WorldbankBridge.php b/bridges/WorldbankBridge.php new file mode 100644 index 00000000000..9b40e86e5da --- /dev/null +++ b/bridges/WorldbankBridge.php @@ -0,0 +1,52 @@ + [ + 'name' => 'Language', + 'type' => 'list', + 'defaultValue' => 'English', + 'values' => [ + 'English' => 'English', + 'French' => 'French', + ] + ], + 'limit' => [ + 'name' => 'limit (max 100)', + 'type' => 'number', + 'defaultValue' => 5, + 'required' => true, + ] + ] + ]; + + public function collectData() + { + $apiUrl = 'https://search.worldbank.org/api/v2/news?format=json&rows=' + . min(100, $this->getInput('limit')) + . '&lang_exact=' . $this->getInput('lang'); + + $jsonData = json_decode(getContents($apiUrl)); + + // Remove unnecessary data from the original object + if (isset($jsonData->documents->facets)) { + unset($jsonData->documents->facets); + } + + foreach ($jsonData->documents as $element) { + $this->items[] = [ + 'uid' => $element->id, + 'timestamp' => $element->lnchdt, + 'title' => $element->title->{'cdata!'}, + 'uri' => $element->url, + 'content' => $element->descr->{'cdata!'}, + ]; + } + } +} From f67d2eb88adc597cc57fbfc402c28725b671e5a3 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Thu, 28 Dec 2023 13:53:06 +0100 Subject: [PATCH 16/88] [TikTokBridge] Use embed iframe to bypass scraping protection (#3864) The Tiktok Website was totally changed using some "scraping" protection (passing as parameter value generated somewhere in the bunch of javascript to the "API URL" that was before). The iframe embed does not have such protection. It has less information (no date, ...) but it's better than nothing ! --- bridges/TikTokBridge.php | 66 ++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php index 73a18b0468c..6590df66808 100644 --- a/bridges/TikTokBridge.php +++ b/bridges/TikTokBridge.php @@ -8,12 +8,12 @@ class TikTokBridge extends BridgeAbstract const MAINTAINER = 'VerifiedJoseph'; const PARAMETERS = [ 'By user' => [ - 'username' => [ - 'name' => 'Username', - 'type' => 'text', - 'required' => true, - 'exampleValue' => '@tiktok', - ] + 'username' => [ + 'name' => 'Username', + 'type' => 'text', + 'required' => true, + 'exampleValue' => '@tiktok', + ] ]]; const TEST_DETECT_PARAMETERS = [ @@ -24,53 +24,33 @@ class TikTokBridge extends BridgeAbstract const CACHE_TIMEOUT = 900; // 15 minutes - private $feedName = ''; - public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()); + $html = getSimpleHTMLDOMCached('https://www.tiktok.com/embed/' . $this->processUsername()); - $title = $html->find('h1', 0)->plaintext ?? self::NAME; - $this->feedName = htmlspecialchars_decode($title); + $author = $html->find('span[data-e2e=creator-profile-userInfo-TUXText]', 0)->plaintext ?? self::NAME; - $var = $html->find('script[id=SIGI_STATE]', 0); - if (!$var) { - throw new \Exception('Unable to find tiktok user data for ' . $this->processUsername()); - } - $SIGI_STATE_RAW = $var->innertext; - $SIGI_STATE = Json::decode($SIGI_STATE_RAW, false); + $videos = $html->find('div[data-e2e=common-videoList-VideoContainer]'); - if (!isset($SIGI_STATE->ItemModule)) { - return; - } - - foreach ($SIGI_STATE->ItemModule as $key => $value) { + foreach ($videos as $video) { $item = []; - $link = 'https://www.tiktok.com/@' . $value->author . '/video/' . $value->id; - $image = $value->video->dynamicCover; - if (empty($image)) { - $image = $value->video->cover; - } - $views = $value->stats->playCount; - $hastags = []; - foreach ($value->textExtra as $tag) { - $hastags[] = $tag->hashtagName; - } - $hastags_str = ''; - foreach ($hastags as $tag) { - $hastags_str .= '#' . $tag . ' '; - } + // Handle link "untracking" + $linkParts = parse_url($video->find('a', 0)->href); + $link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path']; + + $image = $video->find('video', 0)->poster; + $views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext; + + $enclosures = [$image]; $item['uri'] = $link; - $item['title'] = $value->desc; - $item['timestamp'] = $value->createTime; - $item['author'] = '@' . $value->author; - $item['enclosures'][] = $image; - $item['categories'] = $hastags; + $item['title'] = 'Video'; + $item['author'] = '@' . $author; + $item['enclosures'] = $enclosures; $item['content'] = << -

{$views} views


Hashtags: {$hastags_str} +

{$views} views


EOD; $this->items[] = $item; @@ -91,7 +71,7 @@ public function getName() { switch ($this->queriedContext) { case 'By user': - return $this->feedName . ' (' . $this->processUsername() . ') - TikTok'; + return $this->processUsername() . ' - TikTok'; default: return parent::getName(); } From 2032ed18c49a82fc2e634dfa6f2b91e652228876 Mon Sep 17 00:00:00 2001 From: Damien Calesse <2787828+kranack@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:51:15 +0100 Subject: [PATCH 17/88] [SensCritique] Update the content to add the image (#3865) --- bridges/SensCritiqueBridge.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index 005704e169e..f6a2ea16142 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -71,10 +71,17 @@ private function extractDataFromList($list) } foreach ($list->find('div[data-testid="product-list-item"]') as $movie) { + $synopsis = $movie->find('p[data-testid="synopsis"]', 0); + $item = []; $item['title'] = $movie->find('h2 a', 0)->plaintext; - // todo: fix image - $item['content'] = $movie->innertext; + $item['content'] = sprintf( + '

%s

%s

%s', + $movie->find('span[data-testid="poster-img-wrapper"]', 0)->{'data-srcname'}, + $movie->find('p[data-testid="other-infos"]', 0)->innertext, + $movie->find('p[data-testid="creators"]', 0)->innertext, + $synopsis ? sprintf('

%s

', $synopsis->innertext) : '' + ); $item['id'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); $item['uri'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); $this->items[] = $item; From 7dbe10658213e165c07faac01a8c79771b4917c8 Mon Sep 17 00:00:00 2001 From: Dag Date: Thu, 28 Dec 2023 23:26:14 +0100 Subject: [PATCH 18/88] docs(nginx, phpfpm): improve install and config instructions (#3866) --- README.md | 162 ++++++++++++++++++++++++++++++++++++------- caches/FileCache.php | 1 + index.php | 3 +- 3 files changed, 140 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 2a762d45763..34efc8de3e7 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,15 @@ ![RSS-Bridge](static/logo_600px.png) -RSS-Bridge is a web application. +RSS-Bridge is a PHP web application. It generates web feeds for websites that don't have one. Officially hosted instance: https://rss-bridge.org/bridge01/ +IRC channel #rssbridge at https://libera.chat/ + + [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) @@ -48,54 +51,147 @@ Check out RSS-Bridge right now on https://rss-bridge.org/bridge01/ Alternatively find another [public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html). +Requires minimum PHP 7.4. + ## Tutorial -### Install with composer or git +### How to install on traditional shared web hosting -Requires minimum PHP 7.4. +RSS-Bridge can basically be unzipped in a web folder. Should be working instantly. -```shell -apt install nginx php-fpm php-mbstring php-simplexml php-curl -``` +Latest zip as of Sep 2023: https://github.com/RSS-Bridge/rss-bridge/archive/refs/tags/2023-09-24.zip -```shell -cd /var/www -composer create-project -v --no-dev rss-bridge/rss-bridge -``` +### How to install on Debian 12 (nginx + php-fpm) + +These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month). ```shell +timedatectl set-timezone Europe/Oslo + +apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl + +# Create a new user account +useradd --shell /bin/bash --create-home rss-bridge + cd /var/www -git clone https://github.com/RSS-Bridge/rss-bridge.git -``` -Config: +# Create folder and change ownership +mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/ -```shell -# Give the http user write permission to the cache folder -chown www-data:www-data /var/www/rss-bridge/cache +# Become user +su rss-bridge + +# Fetch latest master +git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/ +cd rss-bridge + +# Copy over the default config +cp -v config.default.ini.php config.ini.php -# Optionally copy over the default config file -cp config.default.ini.php config.ini.php +# Give full permissions only to owner (rss-bridge) +chmod 700 -R ./ + +# Give read and execute to others (nginx and php-fpm) +chmod o+rx ./ ./static + +# Give read to others (nginx) +chmod o+r -R ./static ``` -Example config for nginx: +Nginx config: ```nginx -# /etc/nginx/sites-enabled/rssbridge +# /etc/nginx/sites-enabled/rss-bridge.conf + server { listen 80; server_name example.com; - root /var/www/rss-bridge; - index index.php; + access_log /var/log/nginx/rss-bridge.access.log; + error_log /var/log/nginx/rss-bridge.error.log; + + # Intentionally not setting a root folder here + + # autoindex is off by default but feels good to explicitly turn off + autoindex off; - location ~ \.php$ { + # Static content only served here + location /static/ { + alias /var/www/rss-bridge/static/; + } + + # Pass off to php-fpm only when location is exactly / + location = / { + root /var/www/rss-bridge/; include snippets/fastcgi-php.conf; - fastcgi_read_timeout 60s; - fastcgi_pass unix:/run/php/php-fpm.sock; + fastcgi_pass unix:/run/php/rss-bridge.sock; + } + + # Reduce spam + location = /favicon.ico { + access_log off; + log_not_found off; + } + + # Reduce spam + location = /robots.txt { + access_log off; + log_not_found off; } } ``` +PHP FPM pool config: +```ini +; /etc/php/8.2/fpm/pool.d/rss-bridge.conf + +[rss-bridge] + +user = rss-bridge +group = rss-bridge + +listen = /run/php/rss-bridge.sock + +listen.owner = www-data +listen.group = www-data + +pm = static +pm.max_children = 10 +pm.max_requests = 500 +``` + +PHP ini config: +```ini +; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini + +max_execution_time = 20 +memory_limit = 64M +``` + +Restart fpm and nginx: + +```shell +# Lint and restart php-fpm +php-fpm8.2 -t +systemctl restart php8.2-fpm + +# Lint and restart nginx +nginx -t +systemctl restart nginx +``` + +### How to install from Composer + +Install the latest release. + +```shell +cd /var/www +composer create-project -v --no-dev rss-bridge/rss-bridge +``` + +### How to install with Caddy + +TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785 + ### Install from Docker Hub: Install by downloading the docker image from Docker Hub: @@ -163,6 +259,22 @@ Learn more in ## How-to +### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" + +```shell +# Give rssbridge ownership +chown rssbridge:rssbridge -R /var/www/rss-bridge/cache + +# Or, give www-data ownership +chown www-data:www-data -R /var/www/rss-bridge/cache + +# Or, give everyone write permission +chmod 777 -R /var/www/rss-bridge/cache + +# Or last ditch effort (CAREFUL) +rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/ +``` + ### How to create a new bridge from scratch Create the new bridge in e.g. `bridges/BearBlogBridge.php`: diff --git a/caches/FileCache.php b/caches/FileCache.php index 09d127910ac..7a0eb81d95e 100644 --- a/caches/FileCache.php +++ b/caches/FileCache.php @@ -54,6 +54,7 @@ public function set($key, $value, int $ttl = null): void ]; $cacheFile = $this->createCacheFile($key); $bytes = file_put_contents($cacheFile, serialize($item), LOCK_EX); + // todo: Consider tightening the permissions of the created file. It usually allow others to read, depending on umask if ($bytes === false) { // Consider just logging the error here throw new \Exception(sprintf('Failed to write to: %s', $cacheFile)); diff --git a/index.php b/index.php index 14713e06f75..c2c546a184e 100644 --- a/index.php +++ b/index.php @@ -8,7 +8,8 @@ $errors = Configuration::checkInstallation(); if ($errors) { - die('
' . implode("\n", $errors) . '
'); + print '
' . implode("\n", $errors) . '
'; + exit(1); } $customConfig = []; From fac1f5cd88f04855a891aeb7341f783e57ce5b3c Mon Sep 17 00:00:00 2001 From: Dag Date: Sat, 30 Dec 2023 01:33:31 +0100 Subject: [PATCH 19/88] refactor(reddit) (#3869) * refactor * yup * fix also reporterre --- bridges/RedditBridge.php | 66 +++++++++++------------------------- bridges/ReporterreBridge.php | 44 +++++++++++++----------- 2 files changed, 44 insertions(+), 66 deletions(-) diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index bb3e7afcf38..618463a642d 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -173,7 +173,7 @@ private function collectDataInternal(): void $item['author'] = $data->author; $item['uid'] = $data->id; $item['timestamp'] = $data->created_utc; - $item['uri'] = $this->encodePermalink($data->permalink); + $item['uri'] = $this->urlEncodePathParts($data->permalink); $item['categories'] = []; @@ -193,13 +193,11 @@ private function collectDataInternal(): void if ($post->kind == 't1') { // Comment - $item['content'] - = htmlspecialchars_decode($data->body_html); + $item['content'] = htmlspecialchars_decode($data->body_html); } elseif ($data->is_self) { // Text post - $item['content'] - = htmlspecialchars_decode($data->selftext_html); + $item['content'] = htmlspecialchars_decode($data->selftext_html); } elseif (isset($data->post_hint) && $data->post_hint == 'link') { // Link with preview @@ -215,18 +213,11 @@ private function collectDataInternal(): void $embed = ''; } - $item['content'] = $this->template( - $data->url, - $data->thumbnail, - $data->domain - ) . $embed; - } elseif (isset($data->post_hint) ? $data->post_hint == 'image' : false) { + $item['content'] = $this->createFigureLink($data->url, $data->thumbnail, $data->domain) . $embed; + } elseif (isset($data->post_hint) && $data->post_hint == 'image') { // Single image - $item['content'] = $this->link( - $this->encodePermalink($data->permalink), - '' - ); + $item['content'] = $this->createLink($this->urlEncodePathParts($data->permalink), ''); } elseif ($data->is_gallery ?? false) { // Multiple images @@ -246,32 +237,18 @@ private function collectDataInternal(): void end($data->preview->images[0]->resolutions); $index = key($data->preview->images[0]->resolutions); - $item['content'] = $this->template( - $data->url, - $data->preview->images[0]->resolutions[$index]->url, - 'Video' - ); - } elseif (isset($data->media) ? $data->media->type == 'youtube.com' : false) { + $item['content'] = $this->createFigureLink($data->url, $data->preview->images[0]->resolutions[$index]->url, 'Video'); + } elseif (isset($data->media) && $data->media->type == 'youtube.com') { // Youtube link - - $item['content'] = $this->template( - $data->url, - $data->media->oembed->thumbnail_url, - 'YouTube' - ); + $item['content'] = $this->createFigureLink($data->url, $data->media->oembed->thumbnail_url, 'YouTube'); + //$item['content'] = htmlspecialchars_decode($data->media->oembed->html); } elseif (explode('.', $data->domain)[0] == 'self') { // Crossposted text post // TODO (optionally?) Fetch content of the original post. - - $item['content'] = $this->link( - $this->encodePermalink($data->permalink), - 'Crossposted from r/' - . explode('.', $data->domain)[1] - ); + $item['content'] = $this->createLink($this->urlEncodePathParts($data->permalink), 'Crossposted from r/' . explode('.', $data->domain)[1]); } else { // Link WITHOUT preview - - $item['content'] = $this->link($data->url, $data->domain); + $item['content'] = $this->createLink($data->url, $data->domain); } $this->items[] = $item; @@ -279,7 +256,7 @@ private function collectDataInternal(): void } // Sort the order to put the latest posts first, even for mixed subreddits usort($this->items, function ($a, $b) { - return $a['timestamp'] < $b['timestamp']; + return $b['timestamp'] <=> $a['timestamp']; }); } @@ -299,24 +276,19 @@ public function getName() } } - private function encodePermalink($link) + private function urlEncodePathParts($link) { - return self::URI . implode( - '/', - array_map('urlencode', explode('/', $link)) - ); + return self::URI . implode('/', array_map('urlencode', explode('/', $link))); } - private function template($href, $src, $caption) + private function createFigureLink($href, $src, $caption) { - return '
' - . $caption . '
'; + return sprintf('
%s
', $href, $caption, $src); } - private function link($href, $text) + private function createLink($href, $text) { - return '' . $text . ''; + return sprintf('%s', $href, $text); } public function detectParameters($url) diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php index 18378d2480d..78c60d5f599 100644 --- a/bridges/ReporterreBridge.php +++ b/bridges/ReporterreBridge.php @@ -1,31 +1,20 @@ find('div[style=text-align:justify]') as $e) { - $text = $e->outertext; - } - - $html2->clear(); - unset($html2); - - $text = strip_tags($text, '


'); - return $text; - } + const DESCRIPTION = 'Returns the newest articles. See also their official feed https://reporterre.net/spip.php?page=backend-simple'; public function collectData() { - $html = getSimpleHTMLDOM(self::URI . 'spip.php?page=backend'); + //$url = self::URI . 'spip.php?page=backend'; + $url = self::URI . 'spip.php?page=backend-simple'; + $html = getSimpleHTMLDOM($url); $limit = 0; foreach ($html->find('item') as $element) { @@ -34,10 +23,27 @@ public function collectData() $item['title'] = html_entity_decode($element->find('title', 0)->plaintext); $item['timestamp'] = strtotime($element->find('dc:date', 0)->plaintext); $item['uri'] = $element->find('guid', 0)->innertext; - $item['content'] = html_entity_decode($this->extractContent($item['uri'])); + //$item['content'] = html_entity_decode($this->extractContent($item['uri'])); + $item['content'] = htmlspecialchars_decode($element->find('description', 0)->plaintext); $this->items[] = $item; $limit++; } } } + + private function extractContent($url) + { + $html2 = getSimpleHTMLDOM($url); + $html2 = defaultLinkTo($html2, self::URI); + + foreach ($html2->find('div[style=text-align:justify]') as $e) { + $text = $e->outertext; + } + + $html2->clear(); + unset($html2); + + $text = strip_tags($text, '


'); + return $text; + } } From ef378663aaa98ef54c7145781e8ab1e35fe50e7d Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 2 Jan 2024 16:21:52 +0100 Subject: [PATCH 20/88] test: happy new year (#3873) * test: happy new year * yup --- tests/FeedItemTest.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/FeedItemTest.php b/tests/FeedItemTest.php index 0e7af222e06..3390e7b3534 100644 --- a/tests/FeedItemTest.php +++ b/tests/FeedItemTest.php @@ -41,7 +41,8 @@ public function testTimestamp() $this->assertSame(64800, $item->getTimestamp()); $item->setTimestamp('1st jan last year'); - // This will fail at 2024-01-01 hehe - $this->assertSame(1640995200, $item->getTimestamp()); + + // This will fail at 2025-01-01 hehe + $this->assertSame(1672531200, $item->getTimestamp()); } } From e904de2dc987d6578f9fd5f527aa736801c2185c Mon Sep 17 00:00:00 2001 From: Damien Calesse <2787828+kranack@users.noreply.github.com> Date: Tue, 2 Jan 2024 16:22:39 +0100 Subject: [PATCH 21/88] [YGGTorrent] Update URI (#3871) --- bridges/YGGTorrentBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YGGTorrentBridge.php b/bridges/YGGTorrentBridge.php index f0c31f11dd5..018bcfc4f02 100644 --- a/bridges/YGGTorrentBridge.php +++ b/bridges/YGGTorrentBridge.php @@ -7,7 +7,7 @@ class YGGTorrentBridge extends BridgeAbstract { const MAINTAINER = 'teromene'; const NAME = 'Yggtorrent Bridge'; - const URI = 'https://www5.yggtorrent.fi'; + const URI = 'https://www3.yggtorrent.qa'; const DESCRIPTION = 'Returns torrent search from Yggtorrent'; const PARAMETERS = [ From 0f6fa8034b04e1e007158ef0c5cc784bf8d7ef45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Kol=C3=A1=C5=99?= Date: Tue, 2 Jan 2024 16:23:13 +0100 Subject: [PATCH 22/88] Fixed selector in CeskaTelevizeBridge (#3872) * Fixed selector in CeskaTelevizeBridge * Fixed also description selector --- bridges/CeskaTelevizeBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/CeskaTelevizeBridge.php b/bridges/CeskaTelevizeBridge.php index 003cd4c76f0..be00d6640e7 100644 --- a/bridges/CeskaTelevizeBridge.php +++ b/bridges/CeskaTelevizeBridge.php @@ -57,9 +57,9 @@ public function collectData() $this->feedName .= " ({$category})"; } - foreach ($html->find('#episodeListSection a[data-testid=next-link]') as $element) { + foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) { $itemTitle = $element->find('h3', 0); - $itemContent = $element->find('div[class^=content-]', 0); + $itemContent = $element->find('p[class^=content-]', 0); $itemDate = $element->find('div[class^=playTime-] span', 0); $itemThumbnail = $element->find('img', 0); $itemUri = self::URI . $element->getAttribute('href'); From 12395fcf2d87939a8a95d8bbc95e188e171bfbca Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Fri, 5 Jan 2024 07:22:16 +0100 Subject: [PATCH 23/88] Docker fix default fastcgi.logging (#3875) Mistake from https://github.com/RSS-Bridge/rss-bridge/pull/3500 Wrong file extension: should have been `.ini` and not `.conf` otherwise it has no effect. See https://github.com/docker-library/php/pull/1360 and https://github.com/docker-library/php/issues/878#issuecomment-938595965 --- Dockerfile | 2 +- config/php.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index f504b51f138..2f1f4f3d93a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,7 @@ ENV CURL_IMPERSONATE ff91esr COPY ./config/nginx.conf /etc/nginx/sites-available/default COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf -COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.conf +COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini COPY --chown=www-data:www-data ./ /app/ diff --git a/config/php.ini b/config/php.ini index 115f1c89f37..383afffb0b6 100644 --- a/config/php.ini +++ b/config/php.ini @@ -1,4 +1,4 @@ ; Inspired by https://github.com/docker-library/php/blob/master/8.2/bookworm/fpm/Dockerfile -; https://github.com/docker-library/php/issues/878#issuecomment-938595965' +; https://github.com/docker-library/php/issues/878#issuecomment-938595965 fastcgi.logging = Off From 55ffac5bae8d84ff1b42339d1114117cf32a6854 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Fri, 5 Jan 2024 07:23:40 +0100 Subject: [PATCH 24/88] [PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] (#3876) Fix the Deal source link The HTML does not contain the link to the "Deal source anymore", now only an attribute does contain the information about the Deal Source. The JSON data is now extraced for each Deal, and used to get the Temperature and Deal Source. --- bridges/DealabsBridge.php | 1 + bridges/HotUKDealsBridge.php | 1 + bridges/MydealsBridge.php | 1 + bridges/PepperBridgeAbstract.php | 29 +++++++++++++++++++++-------- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/bridges/DealabsBridge.php b/bridges/DealabsBridge.php index a904c3ff495..4d39502ca9a 100644 --- a/bridges/DealabsBridge.php +++ b/bridges/DealabsBridge.php @@ -1910,6 +1910,7 @@ class DealabsBridge extends PepperBridgeAbstract 'context-talk' => 'Surveillance Discussion', 'uri-group' => 'groupe/', 'uri-deal' => 'bons-plans/', + 'uri-merchant' => 'search/bons-plans?merchant-id=', 'request-error' => 'Impossible de joindre Dealabs', 'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré', 'no-results' => 'Il n'y a rien à afficher pour le moment :(', diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index 69301c42ae2..a7e622500e7 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -3274,6 +3274,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'context-talk' => 'Discussion Monitoring', 'uri-group' => 'tag/', 'uri-deal' => 'deals/', + 'uri-merchant' => 'search/deals?merchant-id=', 'request-error' => 'Could not request HotUKDeals', 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', 'no-results' => 'Ooops, looks like we could', diff --git a/bridges/MydealsBridge.php b/bridges/MydealsBridge.php index 22b4641305d..d7e074a9aac 100644 --- a/bridges/MydealsBridge.php +++ b/bridges/MydealsBridge.php @@ -2021,6 +2021,7 @@ class MydealsBridge extends PepperBridgeAbstract 'context-talk' => 'Überwachung Diskussion', 'uri-group' => 'gruppe/', 'uri-deal' => 'deals/', + 'uri-merchant' => 'search/gutscheine?merchant-id=', 'request-error' => 'Could not request mydeals', 'thread-error' => 'Die ID der Diskussion kann nicht ermittelt werden. Überprüfen Sie die eingegebene URL', 'no-results' => 'Ups, wir konnten nichts', diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 6cb0f3024a1..73bd194da8e 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -104,6 +104,9 @@ protected function collectDeals($url) $item['title'] = $this->getTitle($deal); $item['author'] = $deal->find('span.thread-username', 0)->plaintext; + // Get the JSON Data stored as vue + $jsonDealData = $this->getDealJsonData($deal); + $item['content'] = '
find('div[class=js-vue2]', 0)->getAttribute('data-vue2')); + return $data; + } + /** * Get the source of a Deal if it exists * @return string String of the deal source */ - private function getSource($deal) + private function getSource($jsonData) { - if (($origin = $deal->find('button[class*=text--color-greyShade]', 0)) != null) { - $path = str_replace(' ', '/', trim(Json::decode($origin->{'data-cloak-link'})['path'])); - $text = $origin->find('span[class*=link]', 0); + if ($jsonData['props']['thread']['merchant'] != null) { + $path = $this->i8n('uri-merchant') . $jsonData['props']['thread']['merchant']['merchantId']; + $text = $jsonData['props']['thread']['merchant']['merchantName']; return ''; } else { return ''; From ea58c8d2bcd17b09e7d9dea64297ea44885a3933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B5=D0=B7=D0=B4=D0=B0=D0=BB=D0=B8=D1=81=D1=8C?= =?UTF-8?q?=D0=BA=D0=BE?= <105280814+uandreew@users.noreply.github.com> Date: Sat, 6 Jan 2024 19:13:50 +0200 Subject: [PATCH 25/88] Update 06_Public_Hosts.md (#3877) --- docs/01_General/06_Public_Hosts.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md index c9572824844..4aa905dad49 100644 --- a/docs/01_General/06_Public_Hosts.md +++ b/docs/01_General/06_Public_Hosts.md @@ -22,6 +22,7 @@ | ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.foxhaven.cyou| ![](https://img.shields.io/badge/website-up-brightgreen) | [@Aysilu](https://foxhaven.cyou) | Hosted with Timeweb (Maintained in Poland) | | ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.m3wz.su| ![](https://img.shields.io/badge/website-up-brightgreen) | [@m3oweezed](https://m3wz.su/en/about) | Poland, Hosted with Timeweb Cloud | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany +| ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine ## Inactive instances From 3ce94409ab650e042993480d638482a89901776d Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 20:18:33 +0100 Subject: [PATCH 26/88] feat: support itunes namespace in top channel feed (#3776) Also preserves other properties. --- actions/DisplayAction.php | 11 +- bridges/ItakuBridge.php | 6 +- formats/AtomFormat.php | 81 ++++++------ formats/HtmlFormat.php | 12 +- formats/JsonFormat.php | 18 +-- formats/MrssFormat.php | 124 ++++++++++-------- formats/PlaintextFormat.php | 6 +- lib/BridgeAbstract.php | 53 +++++--- lib/FormatAbstract.php | 59 ++++----- lib/bootstrap.php | 3 - tests/FormatTest.php | 72 ++++++++++ tests/Formats/BaseFormatTest.php | 2 +- .../expectedAtomFormat/feed.common.xml | 6 +- .../samples/expectedAtomFormat/feed.empty.xml | 6 +- .../expectedAtomFormat/feed.emptyItems.xml | 6 +- .../expectedAtomFormat/feed.microblog.xml | 6 +- .../expectedMrssFormat/feed.common.xml | 6 +- .../samples/expectedMrssFormat/feed.empty.xml | 2 +- .../expectedMrssFormat/feed.emptyItems.xml | 2 +- .../expectedMrssFormat/feed.microblog.xml | 6 +- tests/Formats/samples/feed.empty.json | 2 +- tests/Formats/samples/feed.emptyItems.json | 2 +- 22 files changed, 293 insertions(+), 198 deletions(-) create mode 100644 tests/FormatTest.php diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 435639966fd..080da52ea59 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -100,7 +100,7 @@ public function execute(array $request) private function createResponse(array $request, BridgeAbstract $bridge, FormatAbstract $format) { $items = []; - $infos = []; + $feed = []; try { $bridge->loadConfiguration(); @@ -116,12 +116,7 @@ private function createResponse(array $request, BridgeAbstract $bridge, FormatAb } $items = $feedItems; } - $infos = [ - 'name' => $bridge->getName(), - 'uri' => $bridge->getURI(), - 'donationUri' => $bridge->getDonationURI(), - 'icon' => $bridge->getIcon() - ]; + $feed = $bridge->getFeed(); } catch (\Exception $e) { if ($e instanceof HttpException) { // Reproduce (and log) these responses regardless of error output and report limit @@ -155,7 +150,7 @@ private function createResponse(array $request, BridgeAbstract $bridge, FormatAb } $format->setItems($items); - $format->setExtraInfos($infos); + $format->setFeed($feed); $now = time(); $format->setLastModified($now); $headers = [ diff --git a/bridges/ItakuBridge.php b/bridges/ItakuBridge.php index 149757f5c4e..0577752cc55 100644 --- a/bridges/ItakuBridge.php +++ b/bridges/ItakuBridge.php @@ -280,7 +280,7 @@ public function collectData() $opt['range'] = ''; $user_id = $this->getInput('user_id') ?? $this->getOwnerID($this->getInput('user')); - $data = $this->getFeed( + $data = $this->getFeedData( $opt, $user_id ); @@ -289,7 +289,7 @@ public function collectData() if ($this->queriedContext === 'Home feed') { $opt['order'] = $this->getInput('order'); $opt['range'] = $this->getInput('range'); - $data = $this->getFeed($opt); + $data = $this->getFeedData($opt); } foreach ($data['results'] as $record) { @@ -409,7 +409,7 @@ private function getPostsSearch(array $opt) return $this->getData($url, false, true); } - private function getFeed(array $opt, $ownerID = null) + private function getFeedData(array $opt, $ownerID = null) { $url = self::URI . "/api/feed/?date_range={$opt['range']}&ordering={$opt['order']}&page=1&page_size=30&format=json"; diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 07ca7272f5d..1fabef2e7f5 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -17,44 +17,61 @@ class AtomFormat extends FormatAbstract public function stringify() { $document = new \DomDocument('1.0', $this->getCharset()); + $document->formatOutput = true; $feedUrl = get_current_url(); - $extraInfos = $this->getExtraInfos(); - if (empty($extraInfos['uri'])) { - $uri = REPOSITORY; - } else { - $uri = $extraInfos['uri']; - } - - $document->formatOutput = true; $feed = $document->createElementNS(self::ATOM_NS, 'feed'); $document->appendChild($feed); $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS); - $title = $document->createElement('title'); - $feed->appendChild($title); - $title->setAttribute('type', 'text'); - $title->appendChild($document->createTextNode($extraInfos['name'])); + $feedArray = $this->getFeed(); + foreach ($feedArray as $feedKey => $feedValue) { + if (in_array($feedKey, ['donationUri'])) { + continue; + } + if ($feedKey === 'name') { + $title = $document->createElement('title'); + $feed->appendChild($title); + $title->setAttribute('type', 'text'); + $title->appendChild($document->createTextNode($feedValue)); + } elseif ($feedKey === 'icon') { + if ($feedValue) { + $icon = $document->createElement('icon'); + $feed->appendChild($icon); + $icon->appendChild($document->createTextNode($feedValue)); + + $logo = $document->createElement('logo'); + $feed->appendChild($logo); + $logo->appendChild($document->createTextNode($feedValue)); + } + } elseif ($feedKey === 'uri') { + if ($feedValue) { + $linkAlternate = $document->createElement('link'); + $feed->appendChild($linkAlternate); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $feedValue); + + $linkSelf = $document->createElement('link'); + $feed->appendChild($linkSelf); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $linkSelf->setAttribute('href', $feedUrl); + } + } elseif ($feedKey === 'itunes') { + // todo: skip? + } else { + $element = $document->createElement($feedKey); + $feed->appendChild($element); + $element->appendChild($document->createTextNode($feedValue)); + } + } $id = $document->createElement('id'); $feed->appendChild($id); $id->appendChild($document->createTextNode($feedUrl)); - $uriparts = parse_url($uri); - if (empty($extraInfos['icon'])) { - $iconUrl = $uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico'; - } else { - $iconUrl = $extraInfos['icon']; - } - $icon = $document->createElement('icon'); - $feed->appendChild($icon); - $icon->appendChild($document->createTextNode($iconUrl)); - - $logo = $document->createElement('logo'); - $feed->appendChild($logo); - $logo->appendChild($document->createTextNode($iconUrl)); - $feedTimestamp = gmdate(DATE_ATOM, $this->lastModified); $updated = $document->createElement('updated'); $feed->appendChild($updated); @@ -69,17 +86,7 @@ public function stringify() $author->appendChild($authorName); $authorName->appendChild($document->createTextNode($feedAuthor)); - $linkAlternate = $document->createElement('link'); - $feed->appendChild($linkAlternate); - $linkAlternate->setAttribute('rel', 'alternate'); - $linkAlternate->setAttribute('type', 'text/html'); - $linkAlternate->setAttribute('href', $uri); - - $linkSelf = $document->createElement('link'); - $feed->appendChild($linkSelf); - $linkSelf->setAttribute('rel', 'self'); - $linkSelf->setAttribute('type', 'application/atom+xml'); - $linkSelf->setAttribute('href', $feedUrl); + foreach ($this->getItems() as $item) { $itemArray = $item->toArray(); diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 4933af8d720..ef66f493375 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -8,7 +8,7 @@ public function stringify() { $queryString = $_SERVER['QUERY_STRING']; - $extraInfos = $this->getExtraInfos(); + $feedArray = $this->getFeed(); $formatFactory = new FormatFactory(); $buttons = []; $linkTags = []; @@ -29,9 +29,9 @@ public function stringify() ]; } - if (Configuration::getConfig('admin', 'donations') && $extraInfos['donationUri'] !== '') { + if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) { $buttons[] = [ - 'href' => e($extraInfos['donationUri']), + 'href' => e($feedArray['donationUri']), 'value' => 'Donate to maintainer', ]; } @@ -39,7 +39,7 @@ public function stringify() $items = []; foreach ($this->getItems() as $item) { $items[] = [ - 'url' => $item->getURI() ?: $extraInfos['uri'], + 'url' => $item->getURI() ?: $feedArray['uri'], 'title' => $item->getTitle() ?? '(no title)', 'timestamp' => $item->getTimestamp(), 'author' => $item->getAuthor(), @@ -51,9 +51,9 @@ public function stringify() $html = render_template(__DIR__ . '/../templates/html-format.html.php', [ 'charset' => $this->getCharset(), - 'title' => $extraInfos['name'], + 'title' => $feedArray['name'], 'linkTags' => $linkTags, - 'uri' => $extraInfos['uri'], + 'uri' => $feedArray['uri'], 'buttons' => $buttons, 'items' => $items, ]); diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php index dd61da41d8c..016e75e1177 100644 --- a/formats/JsonFormat.php +++ b/formats/JsonFormat.php @@ -25,18 +25,18 @@ class JsonFormat extends FormatAbstract public function stringify() { - $host = $_SERVER['HTTP_HOST'] ?? ''; - $extraInfos = $this->getExtraInfos(); + $feedArray = $this->getFeed(); + $data = [ - 'version' => 'https://jsonfeed.org/version/1', - 'title' => empty($extraInfos['name']) ? $host : $extraInfos['name'], - 'home_page_url' => empty($extraInfos['uri']) ? REPOSITORY : $extraInfos['uri'], - 'feed_url' => get_current_url(), + 'version' => 'https://jsonfeed.org/version/1', + 'title' => $feedArray['name'], + 'home_page_url' => $feedArray['uri'], + 'feed_url' => get_current_url(), ]; - if (!empty($extraInfos['icon'])) { - $data['icon'] = $extraInfos['icon']; - $data['favicon'] = $extraInfos['icon']; + if ($feedArray['icon']) { + $data['icon'] = $feedArray['icon']; + $data['favicon'] = $feedArray['icon']; } $items = []; diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index 5b96a6a75a6..e93a8289fd9 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -35,16 +35,8 @@ class MrssFormat extends FormatAbstract public function stringify() { $document = new \DomDocument('1.0', $this->getCharset()); - - $feedUrl = get_current_url(); - $extraInfos = $this->getExtraInfos(); - if (empty($extraInfos['uri'])) { - $uri = REPOSITORY; - } else { - $uri = $extraInfos['uri']; - } - $document->formatOutput = true; + $feed = $document->createElement('rss'); $document->appendChild($feed); $feed->setAttribute('version', '2.0'); @@ -54,50 +46,73 @@ public function stringify() $channel = $document->createElement('channel'); $feed->appendChild($channel); - $title = $extraInfos['name']; - $channelTitle = $document->createElement('title'); - $channel->appendChild($channelTitle); - $channelTitle->appendChild($document->createTextNode($title)); - - $link = $document->createElement('link'); - $channel->appendChild($link); - $link->appendChild($document->createTextNode($uri)); - - $description = $document->createElement('description'); - $channel->appendChild($description); - $description->appendChild($document->createTextNode($extraInfos['name'])); - - $allowedIconExtensions = [ - '.gif', - '.jpg', - '.png', - ]; - $icon = $extraInfos['icon']; - if (!empty($icon) && in_array(substr($icon, -4), $allowedIconExtensions)) { - $feedImage = $document->createElement('image'); - $channel->appendChild($feedImage); - $iconUrl = $document->createElement('url'); - $iconUrl->appendChild($document->createTextNode($icon)); - $feedImage->appendChild($iconUrl); - $iconTitle = $document->createElement('title'); - $iconTitle->appendChild($document->createTextNode($title)); - $feedImage->appendChild($iconTitle); - $iconLink = $document->createElement('link'); - $iconLink->appendChild($document->createTextNode($uri)); - $feedImage->appendChild($iconLink); - } + $feedArray = $this->getFeed(); + $uri = $feedArray['uri']; + $title = $feedArray['name']; - $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link'); - $channel->appendChild($linkAlternate); - $linkAlternate->setAttribute('rel', 'alternate'); - $linkAlternate->setAttribute('type', 'text/html'); - $linkAlternate->setAttribute('href', $uri); - - $linkSelf = $document->createElementNS(self::ATOM_NS, 'link'); - $channel->appendChild($linkSelf); - $linkSelf->setAttribute('rel', 'self'); - $linkSelf->setAttribute('type', 'application/atom+xml'); - $linkSelf->setAttribute('href', $feedUrl); + foreach ($feedArray as $feedKey => $feedValue) { + if (in_array($feedKey, ['atom', 'donationUri'])) { + continue; + } + if ($feedKey === 'name') { + $channelTitle = $document->createElement('title'); + $channel->appendChild($channelTitle); + $channelTitle->appendChild($document->createTextNode($title)); + + $description = $document->createElement('description'); + $channel->appendChild($description); + $description->appendChild($document->createTextNode($title)); + } elseif ($feedKey === 'uri') { + $link = $document->createElement('link'); + $channel->appendChild($link); + $link->appendChild($document->createTextNode($uri)); + + $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link'); + $channel->appendChild($linkAlternate); + $linkAlternate->setAttribute('rel', 'alternate'); + $linkAlternate->setAttribute('type', 'text/html'); + $linkAlternate->setAttribute('href', $uri); + + $linkSelf = $document->createElementNS(self::ATOM_NS, 'link'); + $channel->appendChild($linkSelf); + $linkSelf->setAttribute('rel', 'self'); + $linkSelf->setAttribute('type', 'application/atom+xml'); + $feedUrl = get_current_url(); + $linkSelf->setAttribute('href', $feedUrl); + } elseif ($feedKey === 'icon') { + $allowedIconExtensions = [ + '.gif', + '.jpg', + '.png', + '.ico', + ]; + $icon = $feedValue; + if ($icon && in_array(substr($icon, -4), $allowedIconExtensions)) { + $feedImage = $document->createElement('image'); + $channel->appendChild($feedImage); + $iconUrl = $document->createElement('url'); + $iconUrl->appendChild($document->createTextNode($icon)); + $feedImage->appendChild($iconUrl); + $iconTitle = $document->createElement('title'); + $iconTitle->appendChild($document->createTextNode($title)); + $feedImage->appendChild($iconTitle); + $iconLink = $document->createElement('link'); + $iconLink->appendChild($document->createTextNode($uri)); + $feedImage->appendChild($iconLink); + } + } elseif ($feedKey === 'itunes') { + $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS); + foreach ($feedValue as $itunesKey => $itunesValue) { + $itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey); + $channel->appendChild($itunesProperty); + $itunesProperty->appendChild($document->createTextNode($itunesValue)); + } + } else { + $element = $document->createElement($feedKey); + $channel->appendChild($element); + $element->appendChild($document->createTextNode($feedValue)); + } + } foreach ($this->getItems() as $item) { $itemArray = $item->toArray(); @@ -135,6 +150,7 @@ public function stringify() $entry->appendChild($itunesProperty); $itunesProperty->appendChild($document->createTextNode($itunesValue)); } + if (isset($itemArray['enclosure'])) { $itunesEnclosure = $document->createElement('enclosure'); $entry->appendChild($itunesEnclosure); @@ -142,7 +158,9 @@ public function stringify() $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); } - } if (!empty($itemUri)) { + } + + if (!empty($itemUri)) { $entryLink = $document->createElement('link'); $entry->appendChild($entryLink); $entryLink->appendChild($document->createTextNode($itemUri)); diff --git a/formats/PlaintextFormat.php b/formats/PlaintextFormat.php index 0a9237d04a9..4e18caa6058 100644 --- a/formats/PlaintextFormat.php +++ b/formats/PlaintextFormat.php @@ -6,11 +6,11 @@ class PlaintextFormat extends FormatAbstract public function stringify() { - $data = []; + $feed = $this->getFeed(); foreach ($this->getItems() as $item) { - $data[] = $item->toArray(); + $feed['items'][] = $item->toArray(); } - $text = print_r($data, true); + $text = print_r($feed, true); // Remove invalid non-UTF8 characters ini_set('mbstring.substitute_character', 'none'); $text = mb_convert_encoding($text, $this->getCharset(), 'UTF-8'); diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 0f86f454c0d..8001ba4fba4 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -40,49 +40,66 @@ public function __construct( abstract public function collectData(); - public function getItems() + public function getFeed(): array { - return $this->items; + return [ + 'name' => $this->getName(), + 'uri' => $this->getURI(), + 'donationUri' => $this->getDonationURI(), + 'icon' => $this->getIcon(), + ]; } - public function getOption(string $name) + public function getName() { - return $this->configuration[$name] ?? null; + return static::NAME; } - public function getDescription() + public function getURI() { - return static::DESCRIPTION; + return static::URI ?? 'https://github.com/RSS-Bridge/rss-bridge/'; } - public function getMaintainer(): string + public function getDonationURI(): string { - return static::MAINTAINER; + return static::DONATION_URI; } - public function getName() + public function getIcon() { - return static::NAME; + if (static::URI) { + // This favicon may or may not exist + return rtrim(static::URI, '/') . '/favicon.ico'; + } + return ''; } - public function getIcon() + public function getOption(string $name) { - return static::URI . '/favicon.ico'; + return $this->configuration[$name] ?? null; } - public function getParameters(): array + /** + * The description is currently not used in feed production + */ + public function getDescription() { - return static::PARAMETERS; + return static::DESCRIPTION; } - public function getURI() + public function getMaintainer(): string { - return static::URI; + return static::MAINTAINER; } - public function getDonationURI(): string + public function getParameters(): array { - return static::DONATION_URI; + return static::PARAMETERS; + } + + public function getItems() + { + return $this->items; } public function getCacheTimeout() diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index c76d1e42166..28eb4bbfa7a 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -9,28 +9,25 @@ abstract class FormatAbstract protected string $charset = 'UTF-8'; protected array $items = []; protected int $lastModified; - protected array $extraInfos = []; - abstract public function stringify(); - - public function getMimeType(): string - { - return static::MIME_TYPE; - } + protected array $feed = []; - public function setCharset(string $charset) - { - $this->charset = $charset; - } + abstract public function stringify(); - public function getCharset(): string + public function setFeed(array $feed) { - return $this->charset; + $default = [ + 'name' => '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + ]; + $this->feed = array_merge($default, $feed); } - public function setLastModified(int $lastModified) + public function getFeed(): array { - $this->lastModified = $lastModified; + return $this->feed; } /** @@ -49,27 +46,23 @@ public function getItems(): array return $this->items; } - public function setExtraInfos(array $infos = []) + public function getMimeType(): string { - $extras = [ - 'name', - 'uri', - 'icon', - 'donationUri', - ]; - foreach ($extras as $extra) { - if (!isset($infos[$extra])) { - $infos[$extra] = ''; - } - } - $this->extraInfos = $infos; + return static::MIME_TYPE; } - public function getExtraInfos(): array + public function setCharset(string $charset) { - if (!$this->extraInfos) { - $this->setExtraInfos(); - } - return $this->extraInfos; + $this->charset = $charset; + } + + public function getCharset(): string + { + return $this->charset; + } + + public function setLastModified(int $lastModified) + { + $this->lastModified = $lastModified; } } diff --git a/lib/bootstrap.php b/lib/bootstrap.php index a95de9dd0ef..85d823e92c1 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -9,9 +9,6 @@ /** Path to the cache folder */ const PATH_CACHE = __DIR__ . '/../cache/'; -/** URL to the RSS-Bridge repository */ -const REPOSITORY = 'https://github.com/RSS-Bridge/rss-bridge/'; - // Allow larger files for simple_html_dom // todo: extract to config (if possible) const MAX_FILE_SIZE = 10000000; diff --git a/tests/FormatTest.php b/tests/FormatTest.php new file mode 100644 index 00000000000..b5df395cccd --- /dev/null +++ b/tests/FormatTest.php @@ -0,0 +1,72 @@ + '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + ]; + $this->assertEquals([], $sut->getFeed()); + + $sut->setFeed([ + 'name' => '0', + 'uri' => '1', + 'icon' => '2', + 'donationUri' => '3', + ]); + $expected = [ + 'name' => '0', + 'uri' => '1', + 'icon' => '2', + 'donationUri' => '3', + ]; + $this->assertEquals($expected, $sut->getFeed()); + + $sut->setFeed([]); + $expected = [ + 'name' => '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + ]; + $this->assertEquals($expected, $sut->getFeed()); + + $sut->setFeed(['foo' => 'bar', 'foo2' => 'bar2']); + $expected = [ + 'name' => '', + 'uri' => '', + 'icon' => '', + 'donationUri' => '', + 'foo' => 'bar', + 'foo2' => 'bar2', + ]; + $this->assertEquals($expected, $sut->getFeed()); + } +} + +class TestFormat extends \FormatAbstract +{ + public function stringify() + { + } +} + +class TestBridge extends \BridgeAbstract +{ + public function collectData() + { + $this->items[] = ['title' => 'kek']; + } +} diff --git a/tests/Formats/BaseFormatTest.php b/tests/Formats/BaseFormatTest.php index 71e196f0260..8999e7722af 100644 --- a/tests/Formats/BaseFormatTest.php +++ b/tests/Formats/BaseFormatTest.php @@ -61,7 +61,7 @@ protected function formatData(string $formatName, \stdClass $sample): string $formatFactory = new FormatFactory(); $format = $formatFactory->create($formatName); $format->setItems($sample->items); - $format->setExtraInfos($sample->meta); + $format->setFeed($sample->meta); $format->setLastModified(strtotime('2000-01-01 12:00:00 UTC')); return $format->stringify(); diff --git a/tests/Formats/samples/expectedAtomFormat/feed.common.xml b/tests/Formats/samples/expectedAtomFormat/feed.common.xml index aa6d0687da2..455e5440529 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.common.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.common.xml @@ -2,15 +2,15 @@ Sample feed with common data - https://example.com/feed?type=common&items=4 + + https://example.com/logo.png https://example.com/logo.png + https://example.com/feed?type=common&items=4 2000-01-01T12:00:00+00:00 RSS-Bridge - - Test Entry diff --git a/tests/Formats/samples/expectedAtomFormat/feed.empty.xml b/tests/Formats/samples/expectedAtomFormat/feed.empty.xml index fc04304da51..083f230f9bf 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.empty.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.empty.xml @@ -2,14 +2,12 @@ Sample feed with minimum data + + https://example.com/feed - https://github.com/favicon.ico - https://github.com/favicon.ico 2000-01-01T12:00:00+00:00 RSS-Bridge - - diff --git a/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml b/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml index 18572fac4f8..d7cb461a1e6 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml @@ -2,15 +2,13 @@ Sample feed with minimum data + + https://example.com/feed - https://github.com/favicon.ico - https://github.com/favicon.ico 2000-01-01T12:00:00+00:00 RSS-Bridge - - Sample Item #1 diff --git a/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml b/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml index 32bc02731e7..8eb0133c83a 100644 --- a/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml +++ b/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml @@ -2,15 +2,15 @@ Sample microblog feed - https://example.com/feed + + https://example.com/logo.png https://example.com/logo.png + https://example.com/feed 2000-01-01T12:00:00+00:00 RSS-Bridge - - Oh 😲 I found three monkeys 🙈🙉🙊 diff --git a/tests/Formats/samples/expectedMrssFormat/feed.common.xml b/tests/Formats/samples/expectedMrssFormat/feed.common.xml index 38a16f88afc..92838ae883e 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.common.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.common.xml @@ -2,15 +2,15 @@ Sample feed with common data - https://example.com/blog/ Sample feed with common data + https://example.com/blog/ + + https://example.com/logo.png Sample feed with common data https://example.com/blog/ - - Test Entry diff --git a/tests/Formats/samples/expectedMrssFormat/feed.empty.xml b/tests/Formats/samples/expectedMrssFormat/feed.empty.xml index 888c42b6cf0..40eecfc6ff9 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.empty.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.empty.xml @@ -2,8 +2,8 @@ Sample feed with minimum data - https://github.com/RSS-Bridge/rss-bridge/ Sample feed with minimum data + https://github.com/RSS-Bridge/rss-bridge/ diff --git a/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml b/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml index 9e712ddd998..8839f5a5918 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml @@ -2,8 +2,8 @@ Sample feed with minimum data - https://github.com/RSS-Bridge/rss-bridge/ Sample feed with minimum data + https://github.com/RSS-Bridge/rss-bridge/ diff --git a/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml b/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml index 81dac87a793..63c04c0f420 100644 --- a/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml +++ b/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml @@ -2,15 +2,15 @@ Sample microblog feed - https://example.com/blog/ Sample microblog feed + https://example.com/blog/ + + https://example.com/logo.png Sample microblog feed https://example.com/blog/ - - 1918f084648b82057c1dd3faa3d091da82a6fac2 diff --git a/tests/Formats/samples/feed.empty.json b/tests/Formats/samples/feed.empty.json index aac09f64994..7b1a2eae54d 100644 --- a/tests/Formats/samples/feed.empty.json +++ b/tests/Formats/samples/feed.empty.json @@ -6,7 +6,7 @@ }, "meta": { "name": "Sample feed with minimum data", - "uri": "", + "uri": "https://github.com/RSS-Bridge/rss-bridge/", "icon": "" }, "items": [] diff --git a/tests/Formats/samples/feed.emptyItems.json b/tests/Formats/samples/feed.emptyItems.json index 0287d428917..4d0774875e2 100644 --- a/tests/Formats/samples/feed.emptyItems.json +++ b/tests/Formats/samples/feed.emptyItems.json @@ -6,7 +6,7 @@ }, "meta": { "name": "Sample feed with minimum data", - "uri": "", + "uri": "https://github.com/RSS-Bridge/rss-bridge/", "icon": "" }, "items": [ From 0bf5dbbc0ba46cc27fe40b554b0c3c0ba705ef8b Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 20:33:35 +0100 Subject: [PATCH 27/88] chore: add tools for manually administrating the configured cache (#3867) --- README.md | 36 +++++++++++++++++++++++--- bridges/PixivBridge.php | 29 ++++++++++----------- docs/10_Bridge_Specific/PixivBridge.md | 15 ++++++++--- index.php | 25 +++--------------- lib/CacheFactory.php | 1 + lib/Configuration.php | 2 +- lib/bootstrap.php | 15 +++++++++++ lib/logger.php | 1 + phpcs.xml | 8 +++++- templates/exception.html.php | 8 ++++++ 10 files changed, 95 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 34efc8de3e7..46bb5a693fc 100644 --- a/README.md +++ b/README.md @@ -251,7 +251,7 @@ Browse http://localhost:3000/ [![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html) [![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge) -The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and +The Heroku quick deploy currently does not work. It might work if you fork this repo and modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688 Learn more in @@ -259,11 +259,29 @@ Learn more in ## How-to +### How to remove all cache items + +As current user: + + bin/cache-clear + +As user rss-bridge: + + sudo -u rss-bridge bin/cache-clear + +As root: + + sudo bin/cache-clear + +### How to remove all expired cache items + + bin/cache-clear + ### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" ```shell -# Give rssbridge ownership -chown rssbridge:rssbridge -R /var/www/rss-bridge/cache +# Give rss-bridge ownership +chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache # Or, give www-data ownership chown www-data:www-data -R /var/www/rss-bridge/cache @@ -275,6 +293,16 @@ chmod 777 -R /var/www/rss-bridge/cache rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/ ``` +### How to fix "attempt to write a readonly database" + +The sqlite files (db, wal and shm) are not writeable. + + chown -v rss-bridge:rss-bridge cache/* + +### How to fix "Unable to prepare statement: 1, no such table: storage" + + rm cache/* + ### How to create a new bridge from scratch Create the new bridge in e.g. `bridges/BearBlogBridge.php`: @@ -389,6 +417,8 @@ These commands require that you have installed the dev dependencies in `composer ./vendor/bin/phpunit ./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./ +https://github.com/squizlabs/PHP_CodeSniffer/wiki + ### How to spawn a minimal development environment php -S 127.0.0.1:9001 diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php index c4f5277f553..fc4443ed2d1 100644 --- a/bridges/PixivBridge.php +++ b/bridges/PixivBridge.php @@ -1,9 +1,11 @@ [ 'posts' => [ @@ -251,14 +252,13 @@ public function collectData() $img_url = preg_replace('/https:\/\/i\.pximg\.net/', $proxy_url, $result['url']); } } else { - //else cache and use image. - $img_url = $this->cacheImage( - $result['url'], - $result['id'], - array_key_exists('illustType', $result) - ); + $img_url = $result['url']; + // Temporarily disabling caching of the image + //$img_url = $this->cacheImage($result['url'], $result['id'], array_key_exists('illustType', $result)); } - $item['content'] = ""; + + // Currently, this might result in broken image due to their strict referrer check + $item['content'] = sprintf('', $img_url, $img_url); // Additional content items if (array_key_exists('pageCount', $result)) { @@ -318,7 +318,7 @@ private function checkOptions() if ( !(strlen($proxy) > 0 && preg_match('/https?:\/\/.*/', $proxy)) ) { - return returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.'); + returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.'); } } @@ -326,8 +326,7 @@ private function checkOptions() if ($cookie) { $isAuth = $this->loadCacheValue('is_authenticated'); if (!$isAuth) { - $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true) - or returnServerError('Invalid PHPSESSID cookie provided. Please check the 🍪 and try again.'); + $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true); if ($res['error'] === false) { $this->saveCacheValue('is_authenticated', true); } @@ -374,11 +373,11 @@ private function getData(string $url, bool $cache = true, bool $getJSON = false, if ($cache) { $data = $this->loadCacheValue($url); if (!$data) { - $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url"); + $data = getContents($url, $httpHeaders, $curlOptions, true); $this->saveCacheValue($url, $data); } } else { - $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url"); + $data = getContents($url, $httpHeaders, $curlOptions, true); } $this->checkCookie($data['headers']); diff --git a/docs/10_Bridge_Specific/PixivBridge.md b/docs/10_Bridge_Specific/PixivBridge.md index b782a4450c5..ba8da2d8e69 100644 --- a/docs/10_Bridge_Specific/PixivBridge.md +++ b/docs/10_Bridge_Specific/PixivBridge.md @@ -2,9 +2,14 @@ PixivBridge =============== # Image proxy -As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set, caching or image proxy is required to use this bridge. -To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php` to the url of the proxy. The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy): +As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set, +caching or image proxy is required to use this bridge. + +To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php` +to the url of the proxy. + +The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy): Before: `https://i.pximg.net/img-original/img/0000/00/00/00/00/00/12345678_p0.png` @@ -15,9 +20,11 @@ proxy_url = "https://proxy.example.com" ``` # Authentication -Authentication is required to view and search R-18+ and non-public images. To enable this, set the following in this bridge's configuration in `config.ini.php`. -``` +Authentication is required to view and search R-18+ and non-public images. +To enable this, set the following in this bridge's configuration in `config.ini.php`. + +```ini ; from cookie "PHPSESSID". Recommend to get in incognito browser. cookie = "00000000_hashedsessionidhere" ``` \ No newline at end of file diff --git a/index.php b/index.php index c2c546a184e..126200daae0 100644 --- a/index.php +++ b/index.php @@ -1,33 +1,14 @@ ' . implode("\n", $errors) . ''; - exit(1); -} - -$customConfig = []; -if (file_exists(__DIR__ . '/config.ini.php')) { - $customConfig = parse_ini_file(__DIR__ . '/config.ini.php', true, INI_SCANNER_TYPED); -} -Configuration::loadConfiguration($customConfig, getenv()); - // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); date_default_timezone_set(Configuration::getConfig('system', 'timezone')); -$rssBridge = new RssBridge(); - set_exception_handler(function (\Throwable $e) { - http_response_code(500); - print render(__DIR__ . '/templates/exception.html.php', ['e' => $e]); RssBridge::getLogger()->error('Uncaught Exception', ['e' => $e]); - exit(1); + http_response_code(500); + exit(render(__DIR__ . '/templates/exception.html.php', ['e' => $e])); }); set_error_handler(function ($code, $message, $file, $line) { @@ -63,4 +44,6 @@ } }); +$rssBridge = new RssBridge(); + $rssBridge->main($argv ?? []); diff --git a/lib/CacheFactory.php b/lib/CacheFactory.php index df78d9cbd56..90aa21ba7be 100644 --- a/lib/CacheFactory.php +++ b/lib/CacheFactory.php @@ -37,6 +37,7 @@ public function create(string $name = null): CacheInterface if ($index === false) { throw new \InvalidArgumentException(sprintf('Invalid cache name: "%s"', $name)); } + $className = $cacheNames[$index] . 'Cache'; if (!preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $className)) { throw new \InvalidArgumentException(sprintf('Invalid cache classname: "%s"', $className)); diff --git a/lib/Configuration.php b/lib/Configuration.php index ac7d29bfbdc..ab1c9cdf4c7 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -59,7 +59,7 @@ public static function loadConfiguration(array $customConfig = [], array $env = } $config = parse_ini_file(__DIR__ . '/../config.default.ini.php', true, INI_SCANNER_TYPED); if (!$config) { - throw new \Exception('Error parsing config'); + throw new \Exception('Error parsing ini config'); } foreach ($config as $header => $section) { foreach ($section as $key => $value) { diff --git a/lib/bootstrap.php b/lib/bootstrap.php index 85d823e92c1..fe2069d366b 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -1,5 +1,9 @@ ' . implode("\n", $errors) . ''); +} + +$customConfig = []; +if (file_exists(__DIR__ . '/../config.ini.php')) { + $customConfig = parse_ini_file(__DIR__ . '/../config.ini.php', true, INI_SCANNER_TYPED); +} +Configuration::loadConfiguration($customConfig, getenv()); diff --git a/lib/logger.php b/lib/logger.php index 7a902b5b75d..e579915dc31 100644 --- a/lib/logger.php +++ b/lib/logger.php @@ -149,6 +149,7 @@ public function __invoke(array $record) ); error_log($text); if ($record['level'] < Logger::ERROR && Debug::isEnabled()) { + // The record level is INFO or WARNING here // Not a good idea to print here because http headers might not have been sent print sprintf("
%s
\n", e($text)); } diff --git a/phpcs.xml b/phpcs.xml index 5e50470a662..21e1f50a579 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -1,6 +1,11 @@ - Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/ + + Originally created with the PHP Coding Standard Generator. + But later manually tweaked. + http://edorian.github.com/php-coding-standard-generator/ + + ./static ./vendor ./templates @@ -11,6 +16,7 @@ + diff --git a/templates/exception.html.php b/templates/exception.html.php index e1dd97c112e..62ac90b4217 100644 --- a/templates/exception.html.php +++ b/templates/exception.html.php @@ -23,6 +23,14 @@

+ getCode() === 403): ?> +

403 Forbidden

+

+ The HTTP 403 Forbidden response status code indicates that the + server understands the request but refuses to authorize it. +

+ + getCode() === 404): ?>

404 Page Not Found

From 0c08f791efbfc6dd92f89d922984a6a41583de44 Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 9 Jan 2024 20:34:56 +0100 Subject: [PATCH 28/88] CssSelectorComplexBridge: Use cookies everywhere (#3827) (#3870) --- bridges/CssSelectorComplexBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index e661fe18418..67ad4c92293 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -245,7 +245,7 @@ protected function filterUrlList($links, $url_pattern, $limit = 0) protected function getTitle($page, $title_cleanup) { if (is_string($page)) { - $page = getSimpleHTMLDOMCached($page); + $page = getSimpleHTMLDOMCached($page, $this->getHeaders()); } $title = html_entity_decode($page->find('title', 0)->plaintext); if (!empty($title)) { @@ -302,7 +302,7 @@ protected function cleanArticleContent($content, $cleanup_selector, $remove_styl protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0) { if (is_string($page)) { - $page = getSimpleHTMLDOM($page); + $page = getSimpleHTMLDOM($page, $this->getHeaders()); } $entryElements = $page->find($entry_selector); @@ -355,7 +355,7 @@ protected function htmlFindEntryElements($page, $entry_selector, $url_selector, */ protected function fetchArticleElementFromPage($entry_url, $content_selector) { - $entry_html = getSimpleHTMLDOMCached($entry_url); + $entry_html = getSimpleHTMLDOMCached($entry_url, $this->getHeaders()); $article_content = $entry_html->find($content_selector, 0); if (is_null($article_content)) { From 1fecc4cfc13072856d68b7a33233a4e5e54a72db Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 21:28:43 +0100 Subject: [PATCH 29/88] Revert "CssSelectorComplexBridge: Use cookies everywhere (#3827) (#3870)" (#3881) This reverts commit 0c08f791efbfc6dd92f89d922984a6a41583de44. --- bridges/CssSelectorComplexBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index 67ad4c92293..e661fe18418 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -245,7 +245,7 @@ protected function filterUrlList($links, $url_pattern, $limit = 0) protected function getTitle($page, $title_cleanup) { if (is_string($page)) { - $page = getSimpleHTMLDOMCached($page, $this->getHeaders()); + $page = getSimpleHTMLDOMCached($page); } $title = html_entity_decode($page->find('title', 0)->plaintext); if (!empty($title)) { @@ -302,7 +302,7 @@ protected function cleanArticleContent($content, $cleanup_selector, $remove_styl protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0) { if (is_string($page)) { - $page = getSimpleHTMLDOM($page, $this->getHeaders()); + $page = getSimpleHTMLDOM($page); } $entryElements = $page->find($entry_selector); @@ -355,7 +355,7 @@ protected function htmlFindEntryElements($page, $entry_selector, $url_selector, */ protected function fetchArticleElementFromPage($entry_url, $content_selector) { - $entry_html = getSimpleHTMLDOMCached($entry_url, $this->getHeaders()); + $entry_html = getSimpleHTMLDOMCached($entry_url); $article_content = $entry_html->find($content_selector, 0); if (is_null($article_content)) { From 2e5d2a88f39afccefab58b4fb40d22da7794a4b8 Mon Sep 17 00:00:00 2001 From: Dag Date: Tue, 9 Jan 2024 21:36:42 +0100 Subject: [PATCH 30/88] fix: only escape iframe,script and link for html output (#3882) --- formats/AtomFormat.php | 2 +- formats/JsonFormat.php | 2 +- formats/MrssFormat.php | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 1fabef2e7f5..5c9f2b6acfb 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -179,7 +179,7 @@ public function stringify() $content = $document->createElement('content'); $content->setAttribute('type', 'html'); - $content->appendChild($document->createTextNode(break_annoying_html_tags($entryContent))); + $content->appendChild($document->createTextNode($entryContent)); $entry->appendChild($content); foreach ($item->getEnclosures() as $enclosure) { diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php index 016e75e1177..586aae0afba 100644 --- a/formats/JsonFormat.php +++ b/formats/JsonFormat.php @@ -47,7 +47,7 @@ public function stringify() $entryTitle = $item->getTitle(); $entryUri = $item->getURI(); $entryTimestamp = $item->getTimestamp(); - $entryContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : ''; + $entryContent = $item->getContent() ?? ''; $entryEnclosures = $item->getEnclosures(); $entryCategories = $item->getCategories(); diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index e93a8289fd9..aaa1d0cd1b8 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -119,7 +119,7 @@ public function stringify() $itemTimestamp = $item->getTimestamp(); $itemTitle = $item->getTitle(); $itemUri = $item->getURI(); - $itemContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : ''; + $itemContent = $item->getContent() ?? ''; $itemUid = $item->getUid(); $isPermaLink = 'false'; From 491cb50219d8f799d85bfb4e6027adf501e9afa4 Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 10 Jan 2024 00:25:36 +0100 Subject: [PATCH 31/88] docs: typo (#3883) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 46bb5a693fc..e027d91219e 100644 --- a/README.md +++ b/README.md @@ -275,7 +275,7 @@ As root: ### How to remove all expired cache items - bin/cache-clear + bin/cache-prune ### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" From 0eb4f6b2678ab17255ee87bde2f919a7e6883799 Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 10 Jan 2024 20:39:15 +0100 Subject: [PATCH 32/88] fix(tiktok): remove duplicate leading slash in url path, fix #3884 (#3885) --- bridges/TikTokBridge.php | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php index 6590df66808..22fdfcefdcc 100644 --- a/bridges/TikTokBridge.php +++ b/bridges/TikTokBridge.php @@ -35,21 +35,23 @@ public function collectData() foreach ($videos as $video) { $item = []; - // Handle link "untracking" - $linkParts = parse_url($video->find('a', 0)->href); - $link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path']; + // Omit query string (remove tracking parameters) + $a = $video->find('a', 0); + $href = $a->href; + $parsedUrl = parse_url($href); + $url = $parsedUrl['scheme'] . '://' . $parsedUrl['host'] . '/' . ltrim($parsedUrl['path'], '/'); $image = $video->find('video', 0)->poster; $views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext; $enclosures = [$image]; - $item['uri'] = $link; + $item['uri'] = $url; $item['title'] = 'Video'; $item['author'] = '@' . $author; $item['enclosures'] = $enclosures; $item['content'] = << +

{$views} views


EOD; From c7e8ddf4865516a4bddc884cf80c058cb5aad770 Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 10 Jan 2024 21:47:34 +0100 Subject: [PATCH 33/88] CssSelectorComplexBridge: Use cookies everywhere (RSS-Bridge#3827) (#3886) v2 after feedback from #3870 --- bridges/CssSelectorComplexBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index e661fe18418..632e6b6aa3c 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -245,7 +245,7 @@ protected function filterUrlList($links, $url_pattern, $limit = 0) protected function getTitle($page, $title_cleanup) { if (is_string($page)) { - $page = getSimpleHTMLDOMCached($page); + $page = getSimpleHTMLDOMCached($page, 86400, $this->getHeaders()); } $title = html_entity_decode($page->find('title', 0)->plaintext); if (!empty($title)) { @@ -302,7 +302,7 @@ protected function cleanArticleContent($content, $cleanup_selector, $remove_styl protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0) { if (is_string($page)) { - $page = getSimpleHTMLDOM($page); + $page = getSimpleHTMLDOM($page, $this->getHeaders()); } $entryElements = $page->find($entry_selector); @@ -355,7 +355,7 @@ protected function htmlFindEntryElements($page, $entry_selector, $url_selector, */ protected function fetchArticleElementFromPage($entry_url, $content_selector) { - $entry_html = getSimpleHTMLDOMCached($entry_url); + $entry_html = getSimpleHTMLDOMCached($entry_url, 86400, $this->getHeaders()); $article_content = $entry_html->find($content_selector, 0); if (is_null($article_content)) { From 080e29365a24c5ad0898f2f8bf99e7068c41856b Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 10 Jan 2024 21:48:12 +0100 Subject: [PATCH 34/88] feat(http-client): add http retry count to config (#3887) --- config.default.ini.php | 5 +++++ lib/contents.php | 3 ++- lib/http.php | 30 ++++++++++++++++-------------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/config.default.ini.php b/config.default.ini.php index 201b1414fcd..21727c5e771 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -49,6 +49,11 @@ [http] ; Operation timeout in seconds timeout = 30 + +; Operation retry count in case of curl error +retries = 2 + +; User agent useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" ; Max http response size in MB diff --git a/lib/contents.php b/lib/contents.php index 8676a2a8df8..9998a3f1d6e 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -38,6 +38,7 @@ function getContents( $config = [ 'useragent' => Configuration::getConfig('http', 'useragent'), 'timeout' => Configuration::getConfig('http', 'timeout'), + 'retries' => Configuration::getConfig('http', 'retries'), 'headers' => array_merge($defaultHttpHeaders, $httpHeadersNormalized), 'curl_options' => $curlOptions, ]; @@ -71,7 +72,7 @@ function getContents( // Ignore invalid 'Last-Modified' HTTP header value } } - // todo: to be nice nice citizen we should also check for Etag + // todo: We should also check for Etag } $response = $httpClient->request($url, $config); diff --git a/lib/http.php b/lib/http.php index bfa6b6bff7f..405b01c6833 100644 --- a/lib/http.php +++ b/lib/http.php @@ -63,7 +63,7 @@ public function request(string $url, array $config = []): Response 'proxy' => null, 'curl_options' => [], 'if_not_modified_since' => null, - 'retries' => 3, + 'retries' => 2, 'max_filesize' => null, 'max_redirections' => 5, ]; @@ -136,26 +136,28 @@ public function request(string $url, array $config = []): Response return $len; }); - $attempts = 0; + // This retry logic is a bit hard to understand, but it works + $tries = 0; while (true) { - $attempts++; + $tries++; $body = curl_exec($ch); if ($body !== false) { // The network call was successful, so break out of the loop break; } - if ($attempts > $config['retries']) { - // Finally give up - $curl_error = curl_error($ch); - $curl_errno = curl_errno($ch); - throw new HttpException(sprintf( - 'cURL error %s: %s (%s) for %s', - $curl_error, - $curl_errno, - 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', - $url - )); + if ($tries <= $config['retries']) { + continue; } + // Max retries reached, give up + $curl_error = curl_error($ch); + $curl_errno = curl_errno($ch); + throw new HttpException(sprintf( + 'cURL error %s: %s (%s) for %s', + $curl_error, + $curl_errno, + 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', + $url + )); } $statusCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE); From d9ac0195506040e68cebfc81c5753e416ab7b22f Mon Sep 17 00:00:00 2001 From: July Date: Wed, 10 Jan 2024 18:42:57 -0500 Subject: [PATCH 35/88] [AnnasArchiveBridge] Add new bridge (#3888) * [AnnasArchiveBridge] Add new bridge * [AnnasArchiveBridge] Add missing exampleValue * [AnnasArchiveBridge] Remove vestigial debug print --- bridges/AnnasArchiveBridge.php | 175 +++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 bridges/AnnasArchiveBridge.php diff --git a/bridges/AnnasArchiveBridge.php b/bridges/AnnasArchiveBridge.php new file mode 100644 index 00000000000..e8a1e8c40f6 --- /dev/null +++ b/bridges/AnnasArchiveBridge.php @@ -0,0 +1,175 @@ + [ + 'name' => 'Query', + 'exampleValue' => 'apothecary diaries', + 'required' => true, + ], + 'ext' => [ + 'name' => 'Extension', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'azw3' => 'azw3', + 'cbr' => 'cbr', + 'cbz' => 'cbz', + 'djvu' => 'djvu', + 'epub' => 'epub', + 'fb2' => 'fb2', + 'fb2.zip' => 'fb2.zip', + 'mobi' => 'mobi', + 'pdf' => 'pdf', + ] + ], + 'lang' => [ + 'name' => 'Language', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Afrikaans [af]' => 'af', + 'Arabic [ar]' => 'ar', + 'Bangla [bn]' => 'bn', + 'Belarusian [be]' => 'be', + 'Bulgarian [bg]' => 'bg', + 'Catalan [ca]' => 'ca', + 'Chinese [zh]' => 'zh', + 'Church Slavic [cu]' => 'cu', + 'Croatian [hr]' => 'hr', + 'Czech [cs]' => 'cs', + 'Danish [da]' => 'da', + 'Dongxiang [sce]' => 'sce', + 'Dutch [nl]' => 'nl', + 'English [en]' => 'en', + 'French [fr]' => 'fr', + 'German [de]' => 'de', + 'Greek [el]' => 'el', + 'Hebrew [he]' => 'he', + 'Hindi [hi]' => 'hi', + 'Hungarian [hu]' => 'hu', + 'Indonesian [id]' => 'id', + 'Irish [ga]' => 'ga', + 'Italian [it]' => 'it', + 'Japanese [ja]' => 'ja', + 'Kazakh [kk]' => 'kk', + 'Korean [ko]' => 'ko', + 'Latin [la]' => 'la', + 'Latvian [lv]' => 'lv', + 'Lithuanian [lt]' => 'lt', + 'Luxembourgish [lb]' => 'lb', + 'Ndolo [ndl]' => 'ndl', + 'Norwegian [no]' => 'no', + 'Persian [fa]' => 'fa', + 'Polish [pl]' => 'pl', + 'Portuguese [pt]' => 'pt', + 'Romanian [ro]' => 'ro', + 'Russian [ru]' => 'ru', + 'Serbian [sr]' => 'sr', + 'Spanish [es]' => 'es', + 'Swedish [sv]' => 'sv', + 'Tamil [ta]' => 'ta', + 'Traditional Chinese [zh‑Hant]' => 'zh‑Hant', + 'Turkish [tr]' => 'tr', + 'Ukrainian [uk]' => 'uk', + 'Unknown language' => '_empty', + 'Unknown language [und]' => 'und', + 'Unknown language [urdu]' => 'urdu', + 'Urdu [ur]' => 'ur', + 'Vietnamese [vi]' => 'vi', + 'Welsh [cy]' => 'cy', + ] + ], + 'content' => [ + 'name' => 'Type', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Book (fiction)' => 'book_fiction', + 'Book (non‑fiction)' => 'book_nonfiction', + 'Book (unknown)' => 'book_unknown', + 'Comic book' => 'book_comic', + 'Journal article' => 'journal_article', + 'Magazine' => 'magazine', + 'Standards document' => 'standards_document', + ] + ], + 'src' => [ + 'name' => 'Source', + 'type' => 'list', + 'values' => [ + 'Any' => null, + 'Internet Archive' => 'ia', + 'Libgen.li' => 'lgli', + 'Libgen.rs' => 'lgrs', + 'Sci‑Hub' => 'scihub', + 'Z‑Library' => 'zlib', + ] + ], + ] + ]; + + public function collectData() + { + $url = $this->getURI(); + $list = getSimpleHTMLDOMCached($url); + $list = defaultLinkTo($list, self::URI); + + // Don't attempt to do anything if not found message is given + if ($list->find('.js-not-found-additional')) { + return; + } + + foreach ($list->find('.w-full > .mb-4 > div > a') as $element) { + $item = []; + $item['title'] = $element->find('h3', 0)->plaintext; + $item['author'] = $element->find('div.italic', 0)->plaintext; + $item['uri'] = $element->href; + $item['content'] = $element->plaintext; + $item['uid'] = $item['uri']; + + if ($item_html = getSimpleHTMLDOMCached($item['uri'])) { + $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] .= $item_html->find('main img', 0); + $item['content'] .= $item_html->find('main .mt-4', 0); // Summary + if ($links = $item_html->find('main ul.mb-4', -1)) { + foreach ($links->find('li > a.js-download-link') as $file) { + $item['enclosures'][] = $file->href; + } + // Remove bulk torrents from enclosures list + $item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']); + } + } + + $this->items[] = $item; + } + } + + public function getName() + { + $name = parent::getName(); + if ($this->getInput('q') != null) { + $name .= ' - ' . $this->getInput('q'); + } + return $name; + } + + public function getURI() + { + $params = array_filter([ // Filter to remove non-provided parameters + 'q' => $this->getInput('q'), + 'ext' => $this->getInput('ext'), + 'lang' => $this->getInput('lang'), + 'src' => $this->getInput('src'), + 'content' => $this->getInput('content'), + ]); + $url = parent::getURI() . 'search?sort=newest&' . http_build_query($params); + return $url; + } +} From d5175aebcc6f74430189caab1525e6511722a6ed Mon Sep 17 00:00:00 2001 From: July Date: Thu, 11 Jan 2024 14:09:45 -0500 Subject: [PATCH 36/88] [ScribbleHubBridge] Get author feed title regardless of CloudFlare (#3892) --- bridges/ScribbleHubBridge.php | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/bridges/ScribbleHubBridge.php b/bridges/ScribbleHubBridge.php index e7cdf337dfc..0f7c7a6c7ff 100644 --- a/bridges/ScribbleHubBridge.php +++ b/bridges/ScribbleHubBridge.php @@ -12,16 +12,16 @@ class ScribbleHubBridge extends FeedExpander 'uid' => [ 'name' => 'uid', 'required' => true, - // Example: Alyson Greaves's stories - 'exampleValue' => '76208', + // Example: miriamrobern's stories + 'exampleValue' => '149271', ], ], 'Series' => [ 'sid' => [ 'name' => 'sid', 'required' => true, - // Example: latest chapters from The Sisters of Dorley by Alyson Greaves - 'exampleValue' => '421879', + // Example: latest chapters from Uskweirs + 'exampleValue' => '965299', ], ] ]; @@ -52,6 +52,10 @@ protected function parseItem(array $item) return []; } + if ($this->queriedContext === 'Author') { + $this->author = $item['author']; + } + $item['comments'] = $item['uri'] . '#comments'; try { @@ -90,16 +94,7 @@ public function getName() $name = parent::getName() . " $this->queriedContext"; switch ($this->queriedContext) { case 'Author': - try { - $page = getSimpleHTMLDOMCached(self::URI . 'profile/' . $this->getInput('uid')); - } catch (HttpException $e) { - // 403 Forbidden, This means we got anti-bot response - if ($e->getCode() === 403) { - return $name; - } - throw $e; - } - $title = html_entity_decode($page->find('.p_m_username.fp_authorname', 0)->plaintext); + $title = $this->author; break; case 'Series': try { From 191e5b0493f3fc1bf2a3fc4169333c03480be23f Mon Sep 17 00:00:00 2001 From: Dag Date: Fri, 12 Jan 2024 01:31:01 +0100 Subject: [PATCH 37/88] feat: add etag support to getContents (#3893) --- README.md | 2 +- config.default.ini.php | 2 +- lib/BridgeCard.php | 5 ++--- lib/FeedExpander.php | 2 +- lib/FeedParser.php | 4 ++-- lib/XPathAbstract.php | 5 ++++- lib/contents.php | 49 +++++++++++++++++++++++------------------- lib/http.php | 4 ++++ 8 files changed, 42 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index e027d91219e..d6d1046c7ba 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ PHP ini config: ```ini ; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini -max_execution_time = 20 +max_execution_time = 15 memory_limit = 64M ``` diff --git a/config.default.ini.php b/config.default.ini.php index 21727c5e771..ee1e54c927d 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -48,7 +48,7 @@ [http] ; Operation timeout in seconds -timeout = 30 +timeout = 15 ; Operation retry count in case of curl error retries = 2 diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 4781ebc18d7..a82f8e5a35c 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -16,7 +16,7 @@ public static function displayBridgeCard($bridgeClassName, $formats, $isActive = $bridge = $bridgeFactory->create($bridgeClassName); - $isHttps = strpos($bridge->getURI(), 'https') === 0; + $isHttps = str_starts_with($bridge->getURI(), 'https'); $uri = $bridge->getURI(); $name = $bridge->getName(); @@ -113,8 +113,7 @@ private static function getFormHeader($bridgeClassName, $isHttps = false, $param } if (!$isHttps) { - $form .= '

Warning : -This bridge is not fetching its content through a secure connection
'; + $form .= '
Warning: This bridge is not fetching its content through a secure connection
'; } return $form; diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index 056578e92d2..c0d7e878cb3 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -41,7 +41,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1) } /** - * This method is overidden by bridges + * This method is overridden by bridges * * @return array */ diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 2d982de160a..510bcb32c80 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -7,9 +7,9 @@ * * Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0. * - * Produce arrays meant to be used inside rss-bridge. + * Produces array meant to be used inside rss-bridge. * - * The item structure is tweaked so that works with FeedItem + * The item structure is tweaked so that it works with FeedItem */ final class FeedParser { diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index e30bb5eba82..2206f79ac9d 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -518,7 +518,10 @@ protected function formatItemUri($value) if (strlen($value) === 0) { return ''; } - if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { + if ( + strpos($value, 'http://') === 0 + || strpos($value, 'https://') === 0 + ) { return $value; } diff --git a/lib/contents.php b/lib/contents.php index 9998a3f1d6e..43db8c031dc 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -24,6 +24,32 @@ function getContents( $headerValue = trim(implode(':', array_slice($parts, 1))); $httpHeadersNormalized[$headerName] = $headerValue; } + + $requestBodyHash = null; + if (isset($curlOptions[CURLOPT_POSTFIELDS])) { + $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false)); + } + $cacheKey = implode('_', ['server', $url, $requestBodyHash]); + + /** @var Response $cachedResponse */ + $cachedResponse = $cache->get($cacheKey); + if ($cachedResponse) { + $lastModified = $cachedResponse->getHeader('last-modified'); + if ($lastModified) { + try { + // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime + $lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified); + $config['if_not_modified_since'] = $lastModified->getTimestamp(); + } catch (Exception $e) { + // Failed to parse last-modified + } + } + $etag = $cachedResponse->getHeader('etag'); + if ($etag) { + $httpHeadersNormalized['if-none-match'] = $etag; + } + } + // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 $defaultHttpHeaders = [ 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', @@ -35,6 +61,7 @@ function getContents( 'Sec-Fetch-User' => '?1', 'TE' => 'trailers', ]; + $config = [ 'useragent' => Configuration::getConfig('http', 'useragent'), 'timeout' => Configuration::getConfig('http', 'timeout'), @@ -53,28 +80,6 @@ function getContents( $config['proxy'] = Configuration::getConfig('proxy', 'url'); } - $requestBodyHash = null; - if (isset($curlOptions[CURLOPT_POSTFIELDS])) { - $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false)); - } - $cacheKey = implode('_', ['server', $url, $requestBodyHash]); - - /** @var Response $cachedResponse */ - $cachedResponse = $cache->get($cacheKey); - if ($cachedResponse) { - $cachedLastModified = $cachedResponse->getHeader('last-modified'); - if ($cachedLastModified) { - try { - // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime - $cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified); - $config['if_not_modified_since'] = $cachedLastModified->getTimestamp(); - } catch (Exception $dateTimeParseFailue) { - // Ignore invalid 'Last-Modified' HTTP header value - } - } - // todo: We should also check for Etag - } - $response = $httpClient->request($url, $config); switch ($response->getCode()) { diff --git a/lib/http.php b/lib/http.php index 405b01c6833..90b65a6e99e 100644 --- a/lib/http.php +++ b/lib/http.php @@ -258,6 +258,10 @@ public function getHeaders(): array } /** + * HTTP response may have multiple headers with the same name. + * + * This method by default, returns only the last header. + * * @return string[]|string|null */ public function getHeader(string $name, bool $all = false) From 6eaf0eaa565361d0a18f23cdcd8df894116ad73a Mon Sep 17 00:00:00 2001 From: Dag Date: Wed, 17 Jan 2024 20:10:32 +0100 Subject: [PATCH 38/88] fix: add cache clearing tools (#3896) Forgot to add these in #3867 --- .gitignore | 1 - bin/cache-clear | 14 ++++++++++++++ bin/cache-prune | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100755 bin/cache-clear create mode 100755 bin/cache-prune diff --git a/.gitignore b/.gitignore index 9725342dc19..6ed95489e41 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ data/ *.pydevproject .project .metadata -bin/ tmp/ *.tmp *.bak diff --git a/bin/cache-clear b/bin/cache-clear new file mode 100755 index 00000000000..3563abadc1a --- /dev/null +++ b/bin/cache-clear @@ -0,0 +1,14 @@ +#!/usr/bin/env php +clear(); diff --git a/bin/cache-prune b/bin/cache-prune new file mode 100755 index 00000000000..7b7a603130d --- /dev/null +++ b/bin/cache-prune @@ -0,0 +1,14 @@ +#!/usr/bin/env php +prune(); From 6408123330a28041344cccf3133981196e62a9a6 Mon Sep 17 00:00:00 2001 From: SebLaus <97241865+SebLaus@users.noreply.github.com> Date: Fri, 19 Jan 2024 03:59:47 +0100 Subject: [PATCH 39/88] [IdealoBridge] added Header with user-agent and fixed typo (#3897) * Added header with useragent * copy paste error from local test environment * Fixed missing space in New before * fixed missing space after comma in argument list --- bridges/IdealoBridge.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php index 89c5f87df90..cef2b812165 100644 --- a/bridges/IdealoBridge.php +++ b/bridges/IdealoBridge.php @@ -42,8 +42,13 @@ public function getIcon() public function collectData() { + // Needs header with user-agent to function properly. + $header = [ + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15' + ]; + $link = $this->getInput('Link'); - $html = getSimpleHTMLDOM($link); + $html = getSimpleHTMLDOM($link, $header); // Get Productname $titleobj = $html->find('.oopStage-title', 0); @@ -80,7 +85,7 @@ public function collectData() // Generate Content if ($PriceNew > 1) { $content = "

Price New:
$PriceNew

"; - $content .= "

Price Newbefore:
$OldPriceNew

"; + $content .= "

Price New before:
$OldPriceNew

"; } if ($this->getInput('MaxPriceNew') != '') { From 12a90e20749471c1f2c794792f6b1fabcb74d13e Mon Sep 17 00:00:00 2001 From: ORelio Date: Fri, 19 Jan 2024 21:30:06 +0100 Subject: [PATCH 40/88] Utils: Add Webp MIME type (#3900) --- lib/utils.php | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/utils.php b/lib/utils.php index e8f00f5484e..07806e7c256 100644 --- a/lib/utils.php +++ b/lib/utils.php @@ -171,6 +171,7 @@ function parse_mime_type($url) 'jpg' => 'image/jpeg', 'gif' => 'image/gif', 'png' => 'image/png', + 'webp' => 'image/webp', 'image' => 'image/*', 'mp3' => 'audio/mpeg', ]; From bb36eb9eb831eb6bce8641323b7e5ce90798575b Mon Sep 17 00:00:00 2001 From: ORelio Date: Fri, 19 Jan 2024 21:30:53 +0100 Subject: [PATCH 41/88] [CssSelectorBridge] Time/Thumbnail improvements (#3879) (#3901) * Implement