find('div[class=js-vue2]', 0)->getAttribute('data-vue2'));
+ return $data;
+ }
+
/**
* Get the source of a Deal if it exists
* @return string String of the deal source
*/
- private function getSource($deal)
+ private function getSource($jsonData)
{
- if (($origin = $deal->find('button[class*=text--color-greyShade]', 0)) != null) {
- $path = str_replace(' ', '/', trim(Json::decode($origin->{'data-cloak-link'})['path']));
- $text = $origin->find('span[class*=link]', 0);
+ if ($jsonData['props']['thread']['merchant'] != null) {
+ $path = $this->i8n('uri-merchant') . $jsonData['props']['thread']['merchant']['merchantId'];
+ $text = $jsonData['props']['thread']['merchant']['merchantName'];
return '';
} else {
return '';
From ea58c8d2bcd17b09e7d9dea64297ea44885a3933 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=BD=D0=B5=D0=B7=D0=B4=D0=B0=D0=BB=D0=B8=D1=81=D1=8C?=
=?UTF-8?q?=D0=BA=D0=BE?= <105280814+uandreew@users.noreply.github.com>
Date: Sat, 6 Jan 2024 19:13:50 +0200
Subject: [PATCH 25/88] Update 06_Public_Hosts.md (#3877)
---
docs/01_General/06_Public_Hosts.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md
index c9572824844..4aa905dad49 100644
--- a/docs/01_General/06_Public_Hosts.md
+++ b/docs/01_General/06_Public_Hosts.md
@@ -22,6 +22,7 @@
| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.foxhaven.cyou| ![](https://img.shields.io/badge/website-up-brightgreen) | [@Aysilu](https://foxhaven.cyou) | Hosted with Timeweb (Maintained in Poland) |
| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.m3wz.su| ![](https://img.shields.io/badge/website-up-brightgreen) | [@m3oweezed](https://m3wz.su/en/about) | Poland, Hosted with Timeweb Cloud |
| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany
+| ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine
## Inactive instances
From 3ce94409ab650e042993480d638482a89901776d Mon Sep 17 00:00:00 2001
From: Dag
Date: Tue, 9 Jan 2024 20:18:33 +0100
Subject: [PATCH 26/88] feat: support itunes namespace in top channel feed
(#3776)
Also preserves other properties.
---
actions/DisplayAction.php | 11 +-
bridges/ItakuBridge.php | 6 +-
formats/AtomFormat.php | 81 ++++++------
formats/HtmlFormat.php | 12 +-
formats/JsonFormat.php | 18 +--
formats/MrssFormat.php | 124 ++++++++++--------
formats/PlaintextFormat.php | 6 +-
lib/BridgeAbstract.php | 53 +++++---
lib/FormatAbstract.php | 59 ++++-----
lib/bootstrap.php | 3 -
tests/FormatTest.php | 72 ++++++++++
tests/Formats/BaseFormatTest.php | 2 +-
.../expectedAtomFormat/feed.common.xml | 6 +-
.../samples/expectedAtomFormat/feed.empty.xml | 6 +-
.../expectedAtomFormat/feed.emptyItems.xml | 6 +-
.../expectedAtomFormat/feed.microblog.xml | 6 +-
.../expectedMrssFormat/feed.common.xml | 6 +-
.../samples/expectedMrssFormat/feed.empty.xml | 2 +-
.../expectedMrssFormat/feed.emptyItems.xml | 2 +-
.../expectedMrssFormat/feed.microblog.xml | 6 +-
tests/Formats/samples/feed.empty.json | 2 +-
tests/Formats/samples/feed.emptyItems.json | 2 +-
22 files changed, 293 insertions(+), 198 deletions(-)
create mode 100644 tests/FormatTest.php
diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php
index 435639966fd..080da52ea59 100644
--- a/actions/DisplayAction.php
+++ b/actions/DisplayAction.php
@@ -100,7 +100,7 @@ public function execute(array $request)
private function createResponse(array $request, BridgeAbstract $bridge, FormatAbstract $format)
{
$items = [];
- $infos = [];
+ $feed = [];
try {
$bridge->loadConfiguration();
@@ -116,12 +116,7 @@ private function createResponse(array $request, BridgeAbstract $bridge, FormatAb
}
$items = $feedItems;
}
- $infos = [
- 'name' => $bridge->getName(),
- 'uri' => $bridge->getURI(),
- 'donationUri' => $bridge->getDonationURI(),
- 'icon' => $bridge->getIcon()
- ];
+ $feed = $bridge->getFeed();
} catch (\Exception $e) {
if ($e instanceof HttpException) {
// Reproduce (and log) these responses regardless of error output and report limit
@@ -155,7 +150,7 @@ private function createResponse(array $request, BridgeAbstract $bridge, FormatAb
}
$format->setItems($items);
- $format->setExtraInfos($infos);
+ $format->setFeed($feed);
$now = time();
$format->setLastModified($now);
$headers = [
diff --git a/bridges/ItakuBridge.php b/bridges/ItakuBridge.php
index 149757f5c4e..0577752cc55 100644
--- a/bridges/ItakuBridge.php
+++ b/bridges/ItakuBridge.php
@@ -280,7 +280,7 @@ public function collectData()
$opt['range'] = '';
$user_id = $this->getInput('user_id') ?? $this->getOwnerID($this->getInput('user'));
- $data = $this->getFeed(
+ $data = $this->getFeedData(
$opt,
$user_id
);
@@ -289,7 +289,7 @@ public function collectData()
if ($this->queriedContext === 'Home feed') {
$opt['order'] = $this->getInput('order');
$opt['range'] = $this->getInput('range');
- $data = $this->getFeed($opt);
+ $data = $this->getFeedData($opt);
}
foreach ($data['results'] as $record) {
@@ -409,7 +409,7 @@ private function getPostsSearch(array $opt)
return $this->getData($url, false, true);
}
- private function getFeed(array $opt, $ownerID = null)
+ private function getFeedData(array $opt, $ownerID = null)
{
$url = self::URI . "/api/feed/?date_range={$opt['range']}&ordering={$opt['order']}&page=1&page_size=30&format=json";
diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php
index 07ca7272f5d..1fabef2e7f5 100644
--- a/formats/AtomFormat.php
+++ b/formats/AtomFormat.php
@@ -17,44 +17,61 @@ class AtomFormat extends FormatAbstract
public function stringify()
{
$document = new \DomDocument('1.0', $this->getCharset());
+ $document->formatOutput = true;
$feedUrl = get_current_url();
- $extraInfos = $this->getExtraInfos();
- if (empty($extraInfos['uri'])) {
- $uri = REPOSITORY;
- } else {
- $uri = $extraInfos['uri'];
- }
-
- $document->formatOutput = true;
$feed = $document->createElementNS(self::ATOM_NS, 'feed');
$document->appendChild($feed);
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:media', self::MRSS_NS);
- $title = $document->createElement('title');
- $feed->appendChild($title);
- $title->setAttribute('type', 'text');
- $title->appendChild($document->createTextNode($extraInfos['name']));
+ $feedArray = $this->getFeed();
+ foreach ($feedArray as $feedKey => $feedValue) {
+ if (in_array($feedKey, ['donationUri'])) {
+ continue;
+ }
+ if ($feedKey === 'name') {
+ $title = $document->createElement('title');
+ $feed->appendChild($title);
+ $title->setAttribute('type', 'text');
+ $title->appendChild($document->createTextNode($feedValue));
+ } elseif ($feedKey === 'icon') {
+ if ($feedValue) {
+ $icon = $document->createElement('icon');
+ $feed->appendChild($icon);
+ $icon->appendChild($document->createTextNode($feedValue));
+
+ $logo = $document->createElement('logo');
+ $feed->appendChild($logo);
+ $logo->appendChild($document->createTextNode($feedValue));
+ }
+ } elseif ($feedKey === 'uri') {
+ if ($feedValue) {
+ $linkAlternate = $document->createElement('link');
+ $feed->appendChild($linkAlternate);
+ $linkAlternate->setAttribute('rel', 'alternate');
+ $linkAlternate->setAttribute('type', 'text/html');
+ $linkAlternate->setAttribute('href', $feedValue);
+
+ $linkSelf = $document->createElement('link');
+ $feed->appendChild($linkSelf);
+ $linkSelf->setAttribute('rel', 'self');
+ $linkSelf->setAttribute('type', 'application/atom+xml');
+ $linkSelf->setAttribute('href', $feedUrl);
+ }
+ } elseif ($feedKey === 'itunes') {
+ // todo: skip?
+ } else {
+ $element = $document->createElement($feedKey);
+ $feed->appendChild($element);
+ $element->appendChild($document->createTextNode($feedValue));
+ }
+ }
$id = $document->createElement('id');
$feed->appendChild($id);
$id->appendChild($document->createTextNode($feedUrl));
- $uriparts = parse_url($uri);
- if (empty($extraInfos['icon'])) {
- $iconUrl = $uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico';
- } else {
- $iconUrl = $extraInfos['icon'];
- }
- $icon = $document->createElement('icon');
- $feed->appendChild($icon);
- $icon->appendChild($document->createTextNode($iconUrl));
-
- $logo = $document->createElement('logo');
- $feed->appendChild($logo);
- $logo->appendChild($document->createTextNode($iconUrl));
-
$feedTimestamp = gmdate(DATE_ATOM, $this->lastModified);
$updated = $document->createElement('updated');
$feed->appendChild($updated);
@@ -69,17 +86,7 @@ public function stringify()
$author->appendChild($authorName);
$authorName->appendChild($document->createTextNode($feedAuthor));
- $linkAlternate = $document->createElement('link');
- $feed->appendChild($linkAlternate);
- $linkAlternate->setAttribute('rel', 'alternate');
- $linkAlternate->setAttribute('type', 'text/html');
- $linkAlternate->setAttribute('href', $uri);
-
- $linkSelf = $document->createElement('link');
- $feed->appendChild($linkSelf);
- $linkSelf->setAttribute('rel', 'self');
- $linkSelf->setAttribute('type', 'application/atom+xml');
- $linkSelf->setAttribute('href', $feedUrl);
+
foreach ($this->getItems() as $item) {
$itemArray = $item->toArray();
diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php
index 4933af8d720..ef66f493375 100644
--- a/formats/HtmlFormat.php
+++ b/formats/HtmlFormat.php
@@ -8,7 +8,7 @@ public function stringify()
{
$queryString = $_SERVER['QUERY_STRING'];
- $extraInfos = $this->getExtraInfos();
+ $feedArray = $this->getFeed();
$formatFactory = new FormatFactory();
$buttons = [];
$linkTags = [];
@@ -29,9 +29,9 @@ public function stringify()
];
}
- if (Configuration::getConfig('admin', 'donations') && $extraInfos['donationUri'] !== '') {
+ if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) {
$buttons[] = [
- 'href' => e($extraInfos['donationUri']),
+ 'href' => e($feedArray['donationUri']),
'value' => 'Donate to maintainer',
];
}
@@ -39,7 +39,7 @@ public function stringify()
$items = [];
foreach ($this->getItems() as $item) {
$items[] = [
- 'url' => $item->getURI() ?: $extraInfos['uri'],
+ 'url' => $item->getURI() ?: $feedArray['uri'],
'title' => $item->getTitle() ?? '(no title)',
'timestamp' => $item->getTimestamp(),
'author' => $item->getAuthor(),
@@ -51,9 +51,9 @@ public function stringify()
$html = render_template(__DIR__ . '/../templates/html-format.html.php', [
'charset' => $this->getCharset(),
- 'title' => $extraInfos['name'],
+ 'title' => $feedArray['name'],
'linkTags' => $linkTags,
- 'uri' => $extraInfos['uri'],
+ 'uri' => $feedArray['uri'],
'buttons' => $buttons,
'items' => $items,
]);
diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php
index dd61da41d8c..016e75e1177 100644
--- a/formats/JsonFormat.php
+++ b/formats/JsonFormat.php
@@ -25,18 +25,18 @@ class JsonFormat extends FormatAbstract
public function stringify()
{
- $host = $_SERVER['HTTP_HOST'] ?? '';
- $extraInfos = $this->getExtraInfos();
+ $feedArray = $this->getFeed();
+
$data = [
- 'version' => 'https://jsonfeed.org/version/1',
- 'title' => empty($extraInfos['name']) ? $host : $extraInfos['name'],
- 'home_page_url' => empty($extraInfos['uri']) ? REPOSITORY : $extraInfos['uri'],
- 'feed_url' => get_current_url(),
+ 'version' => 'https://jsonfeed.org/version/1',
+ 'title' => $feedArray['name'],
+ 'home_page_url' => $feedArray['uri'],
+ 'feed_url' => get_current_url(),
];
- if (!empty($extraInfos['icon'])) {
- $data['icon'] = $extraInfos['icon'];
- $data['favicon'] = $extraInfos['icon'];
+ if ($feedArray['icon']) {
+ $data['icon'] = $feedArray['icon'];
+ $data['favicon'] = $feedArray['icon'];
}
$items = [];
diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php
index 5b96a6a75a6..e93a8289fd9 100644
--- a/formats/MrssFormat.php
+++ b/formats/MrssFormat.php
@@ -35,16 +35,8 @@ class MrssFormat extends FormatAbstract
public function stringify()
{
$document = new \DomDocument('1.0', $this->getCharset());
-
- $feedUrl = get_current_url();
- $extraInfos = $this->getExtraInfos();
- if (empty($extraInfos['uri'])) {
- $uri = REPOSITORY;
- } else {
- $uri = $extraInfos['uri'];
- }
-
$document->formatOutput = true;
+
$feed = $document->createElement('rss');
$document->appendChild($feed);
$feed->setAttribute('version', '2.0');
@@ -54,50 +46,73 @@ public function stringify()
$channel = $document->createElement('channel');
$feed->appendChild($channel);
- $title = $extraInfos['name'];
- $channelTitle = $document->createElement('title');
- $channel->appendChild($channelTitle);
- $channelTitle->appendChild($document->createTextNode($title));
-
- $link = $document->createElement('link');
- $channel->appendChild($link);
- $link->appendChild($document->createTextNode($uri));
-
- $description = $document->createElement('description');
- $channel->appendChild($description);
- $description->appendChild($document->createTextNode($extraInfos['name']));
-
- $allowedIconExtensions = [
- '.gif',
- '.jpg',
- '.png',
- ];
- $icon = $extraInfos['icon'];
- if (!empty($icon) && in_array(substr($icon, -4), $allowedIconExtensions)) {
- $feedImage = $document->createElement('image');
- $channel->appendChild($feedImage);
- $iconUrl = $document->createElement('url');
- $iconUrl->appendChild($document->createTextNode($icon));
- $feedImage->appendChild($iconUrl);
- $iconTitle = $document->createElement('title');
- $iconTitle->appendChild($document->createTextNode($title));
- $feedImage->appendChild($iconTitle);
- $iconLink = $document->createElement('link');
- $iconLink->appendChild($document->createTextNode($uri));
- $feedImage->appendChild($iconLink);
- }
+ $feedArray = $this->getFeed();
+ $uri = $feedArray['uri'];
+ $title = $feedArray['name'];
- $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link');
- $channel->appendChild($linkAlternate);
- $linkAlternate->setAttribute('rel', 'alternate');
- $linkAlternate->setAttribute('type', 'text/html');
- $linkAlternate->setAttribute('href', $uri);
-
- $linkSelf = $document->createElementNS(self::ATOM_NS, 'link');
- $channel->appendChild($linkSelf);
- $linkSelf->setAttribute('rel', 'self');
- $linkSelf->setAttribute('type', 'application/atom+xml');
- $linkSelf->setAttribute('href', $feedUrl);
+ foreach ($feedArray as $feedKey => $feedValue) {
+ if (in_array($feedKey, ['atom', 'donationUri'])) {
+ continue;
+ }
+ if ($feedKey === 'name') {
+ $channelTitle = $document->createElement('title');
+ $channel->appendChild($channelTitle);
+ $channelTitle->appendChild($document->createTextNode($title));
+
+ $description = $document->createElement('description');
+ $channel->appendChild($description);
+ $description->appendChild($document->createTextNode($title));
+ } elseif ($feedKey === 'uri') {
+ $link = $document->createElement('link');
+ $channel->appendChild($link);
+ $link->appendChild($document->createTextNode($uri));
+
+ $linkAlternate = $document->createElementNS(self::ATOM_NS, 'link');
+ $channel->appendChild($linkAlternate);
+ $linkAlternate->setAttribute('rel', 'alternate');
+ $linkAlternate->setAttribute('type', 'text/html');
+ $linkAlternate->setAttribute('href', $uri);
+
+ $linkSelf = $document->createElementNS(self::ATOM_NS, 'link');
+ $channel->appendChild($linkSelf);
+ $linkSelf->setAttribute('rel', 'self');
+ $linkSelf->setAttribute('type', 'application/atom+xml');
+ $feedUrl = get_current_url();
+ $linkSelf->setAttribute('href', $feedUrl);
+ } elseif ($feedKey === 'icon') {
+ $allowedIconExtensions = [
+ '.gif',
+ '.jpg',
+ '.png',
+ '.ico',
+ ];
+ $icon = $feedValue;
+ if ($icon && in_array(substr($icon, -4), $allowedIconExtensions)) {
+ $feedImage = $document->createElement('image');
+ $channel->appendChild($feedImage);
+ $iconUrl = $document->createElement('url');
+ $iconUrl->appendChild($document->createTextNode($icon));
+ $feedImage->appendChild($iconUrl);
+ $iconTitle = $document->createElement('title');
+ $iconTitle->appendChild($document->createTextNode($title));
+ $feedImage->appendChild($iconTitle);
+ $iconLink = $document->createElement('link');
+ $iconLink->appendChild($document->createTextNode($uri));
+ $feedImage->appendChild($iconLink);
+ }
+ } elseif ($feedKey === 'itunes') {
+ $feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS);
+ foreach ($feedValue as $itunesKey => $itunesValue) {
+ $itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey);
+ $channel->appendChild($itunesProperty);
+ $itunesProperty->appendChild($document->createTextNode($itunesValue));
+ }
+ } else {
+ $element = $document->createElement($feedKey);
+ $channel->appendChild($element);
+ $element->appendChild($document->createTextNode($feedValue));
+ }
+ }
foreach ($this->getItems() as $item) {
$itemArray = $item->toArray();
@@ -135,6 +150,7 @@ public function stringify()
$entry->appendChild($itunesProperty);
$itunesProperty->appendChild($document->createTextNode($itunesValue));
}
+
if (isset($itemArray['enclosure'])) {
$itunesEnclosure = $document->createElement('enclosure');
$entry->appendChild($itunesEnclosure);
@@ -142,7 +158,9 @@ public function stringify()
$itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']);
$itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']);
}
- } if (!empty($itemUri)) {
+ }
+
+ if (!empty($itemUri)) {
$entryLink = $document->createElement('link');
$entry->appendChild($entryLink);
$entryLink->appendChild($document->createTextNode($itemUri));
diff --git a/formats/PlaintextFormat.php b/formats/PlaintextFormat.php
index 0a9237d04a9..4e18caa6058 100644
--- a/formats/PlaintextFormat.php
+++ b/formats/PlaintextFormat.php
@@ -6,11 +6,11 @@ class PlaintextFormat extends FormatAbstract
public function stringify()
{
- $data = [];
+ $feed = $this->getFeed();
foreach ($this->getItems() as $item) {
- $data[] = $item->toArray();
+ $feed['items'][] = $item->toArray();
}
- $text = print_r($data, true);
+ $text = print_r($feed, true);
// Remove invalid non-UTF8 characters
ini_set('mbstring.substitute_character', 'none');
$text = mb_convert_encoding($text, $this->getCharset(), 'UTF-8');
diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php
index 0f86f454c0d..8001ba4fba4 100644
--- a/lib/BridgeAbstract.php
+++ b/lib/BridgeAbstract.php
@@ -40,49 +40,66 @@ public function __construct(
abstract public function collectData();
- public function getItems()
+ public function getFeed(): array
{
- return $this->items;
+ return [
+ 'name' => $this->getName(),
+ 'uri' => $this->getURI(),
+ 'donationUri' => $this->getDonationURI(),
+ 'icon' => $this->getIcon(),
+ ];
}
- public function getOption(string $name)
+ public function getName()
{
- return $this->configuration[$name] ?? null;
+ return static::NAME;
}
- public function getDescription()
+ public function getURI()
{
- return static::DESCRIPTION;
+ return static::URI ?? 'https://github.com/RSS-Bridge/rss-bridge/';
}
- public function getMaintainer(): string
+ public function getDonationURI(): string
{
- return static::MAINTAINER;
+ return static::DONATION_URI;
}
- public function getName()
+ public function getIcon()
{
- return static::NAME;
+ if (static::URI) {
+ // This favicon may or may not exist
+ return rtrim(static::URI, '/') . '/favicon.ico';
+ }
+ return '';
}
- public function getIcon()
+ public function getOption(string $name)
{
- return static::URI . '/favicon.ico';
+ return $this->configuration[$name] ?? null;
}
- public function getParameters(): array
+ /**
+ * The description is currently not used in feed production
+ */
+ public function getDescription()
{
- return static::PARAMETERS;
+ return static::DESCRIPTION;
}
- public function getURI()
+ public function getMaintainer(): string
{
- return static::URI;
+ return static::MAINTAINER;
}
- public function getDonationURI(): string
+ public function getParameters(): array
{
- return static::DONATION_URI;
+ return static::PARAMETERS;
+ }
+
+ public function getItems()
+ {
+ return $this->items;
}
public function getCacheTimeout()
diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php
index c76d1e42166..28eb4bbfa7a 100644
--- a/lib/FormatAbstract.php
+++ b/lib/FormatAbstract.php
@@ -9,28 +9,25 @@ abstract class FormatAbstract
protected string $charset = 'UTF-8';
protected array $items = [];
protected int $lastModified;
- protected array $extraInfos = [];
- abstract public function stringify();
-
- public function getMimeType(): string
- {
- return static::MIME_TYPE;
- }
+ protected array $feed = [];
- public function setCharset(string $charset)
- {
- $this->charset = $charset;
- }
+ abstract public function stringify();
- public function getCharset(): string
+ public function setFeed(array $feed)
{
- return $this->charset;
+ $default = [
+ 'name' => '',
+ 'uri' => '',
+ 'icon' => '',
+ 'donationUri' => '',
+ ];
+ $this->feed = array_merge($default, $feed);
}
- public function setLastModified(int $lastModified)
+ public function getFeed(): array
{
- $this->lastModified = $lastModified;
+ return $this->feed;
}
/**
@@ -49,27 +46,23 @@ public function getItems(): array
return $this->items;
}
- public function setExtraInfos(array $infos = [])
+ public function getMimeType(): string
{
- $extras = [
- 'name',
- 'uri',
- 'icon',
- 'donationUri',
- ];
- foreach ($extras as $extra) {
- if (!isset($infos[$extra])) {
- $infos[$extra] = '';
- }
- }
- $this->extraInfos = $infos;
+ return static::MIME_TYPE;
}
- public function getExtraInfos(): array
+ public function setCharset(string $charset)
{
- if (!$this->extraInfos) {
- $this->setExtraInfos();
- }
- return $this->extraInfos;
+ $this->charset = $charset;
+ }
+
+ public function getCharset(): string
+ {
+ return $this->charset;
+ }
+
+ public function setLastModified(int $lastModified)
+ {
+ $this->lastModified = $lastModified;
}
}
diff --git a/lib/bootstrap.php b/lib/bootstrap.php
index a95de9dd0ef..85d823e92c1 100644
--- a/lib/bootstrap.php
+++ b/lib/bootstrap.php
@@ -9,9 +9,6 @@
/** Path to the cache folder */
const PATH_CACHE = __DIR__ . '/../cache/';
-/** URL to the RSS-Bridge repository */
-const REPOSITORY = 'https://github.com/RSS-Bridge/rss-bridge/';
-
// Allow larger files for simple_html_dom
// todo: extract to config (if possible)
const MAX_FILE_SIZE = 10000000;
diff --git a/tests/FormatTest.php b/tests/FormatTest.php
new file mode 100644
index 00000000000..b5df395cccd
--- /dev/null
+++ b/tests/FormatTest.php
@@ -0,0 +1,72 @@
+ '',
+ 'uri' => '',
+ 'icon' => '',
+ 'donationUri' => '',
+ ];
+ $this->assertEquals([], $sut->getFeed());
+
+ $sut->setFeed([
+ 'name' => '0',
+ 'uri' => '1',
+ 'icon' => '2',
+ 'donationUri' => '3',
+ ]);
+ $expected = [
+ 'name' => '0',
+ 'uri' => '1',
+ 'icon' => '2',
+ 'donationUri' => '3',
+ ];
+ $this->assertEquals($expected, $sut->getFeed());
+
+ $sut->setFeed([]);
+ $expected = [
+ 'name' => '',
+ 'uri' => '',
+ 'icon' => '',
+ 'donationUri' => '',
+ ];
+ $this->assertEquals($expected, $sut->getFeed());
+
+ $sut->setFeed(['foo' => 'bar', 'foo2' => 'bar2']);
+ $expected = [
+ 'name' => '',
+ 'uri' => '',
+ 'icon' => '',
+ 'donationUri' => '',
+ 'foo' => 'bar',
+ 'foo2' => 'bar2',
+ ];
+ $this->assertEquals($expected, $sut->getFeed());
+ }
+}
+
+class TestFormat extends \FormatAbstract
+{
+ public function stringify()
+ {
+ }
+}
+
+class TestBridge extends \BridgeAbstract
+{
+ public function collectData()
+ {
+ $this->items[] = ['title' => 'kek'];
+ }
+}
diff --git a/tests/Formats/BaseFormatTest.php b/tests/Formats/BaseFormatTest.php
index 71e196f0260..8999e7722af 100644
--- a/tests/Formats/BaseFormatTest.php
+++ b/tests/Formats/BaseFormatTest.php
@@ -61,7 +61,7 @@ protected function formatData(string $formatName, \stdClass $sample): string
$formatFactory = new FormatFactory();
$format = $formatFactory->create($formatName);
$format->setItems($sample->items);
- $format->setExtraInfos($sample->meta);
+ $format->setFeed($sample->meta);
$format->setLastModified(strtotime('2000-01-01 12:00:00 UTC'));
return $format->stringify();
diff --git a/tests/Formats/samples/expectedAtomFormat/feed.common.xml b/tests/Formats/samples/expectedAtomFormat/feed.common.xml
index aa6d0687da2..455e5440529 100644
--- a/tests/Formats/samples/expectedAtomFormat/feed.common.xml
+++ b/tests/Formats/samples/expectedAtomFormat/feed.common.xml
@@ -2,15 +2,15 @@
Sample feed with common data
- https://example.com/feed?type=common&items=4
+
+
https://example.com/logo.png
https://example.com/logo.png
+ https://example.com/feed?type=common&items=4
2000-01-01T12:00:00+00:00
RSS-Bridge
-
-
Test Entry
diff --git a/tests/Formats/samples/expectedAtomFormat/feed.empty.xml b/tests/Formats/samples/expectedAtomFormat/feed.empty.xml
index fc04304da51..083f230f9bf 100644
--- a/tests/Formats/samples/expectedAtomFormat/feed.empty.xml
+++ b/tests/Formats/samples/expectedAtomFormat/feed.empty.xml
@@ -2,14 +2,12 @@
Sample feed with minimum data
+
+
https://example.com/feed
- https://github.com/favicon.ico
- https://github.com/favicon.ico
2000-01-01T12:00:00+00:00
RSS-Bridge
-
-
diff --git a/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml b/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml
index 18572fac4f8..d7cb461a1e6 100644
--- a/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml
+++ b/tests/Formats/samples/expectedAtomFormat/feed.emptyItems.xml
@@ -2,15 +2,13 @@
Sample feed with minimum data
+
+
https://example.com/feed
- https://github.com/favicon.ico
- https://github.com/favicon.ico
2000-01-01T12:00:00+00:00
RSS-Bridge
-
-
Sample Item #1
diff --git a/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml b/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml
index 32bc02731e7..8eb0133c83a 100644
--- a/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml
+++ b/tests/Formats/samples/expectedAtomFormat/feed.microblog.xml
@@ -2,15 +2,15 @@
Sample microblog feed
- https://example.com/feed
+
+
https://example.com/logo.png
https://example.com/logo.png
+ https://example.com/feed
2000-01-01T12:00:00+00:00
RSS-Bridge
-
-
Oh 😲 I found three monkeys 🙈🙉🙊
diff --git a/tests/Formats/samples/expectedMrssFormat/feed.common.xml b/tests/Formats/samples/expectedMrssFormat/feed.common.xml
index 38a16f88afc..92838ae883e 100644
--- a/tests/Formats/samples/expectedMrssFormat/feed.common.xml
+++ b/tests/Formats/samples/expectedMrssFormat/feed.common.xml
@@ -2,15 +2,15 @@
Sample feed with common data
- https://example.com/blog/
Sample feed with common data
+ https://example.com/blog/
+
+
https://example.com/logo.png
Sample feed with common data
https://example.com/blog/
-
-
-
Test Entry
diff --git a/tests/Formats/samples/expectedMrssFormat/feed.empty.xml b/tests/Formats/samples/expectedMrssFormat/feed.empty.xml
index 888c42b6cf0..40eecfc6ff9 100644
--- a/tests/Formats/samples/expectedMrssFormat/feed.empty.xml
+++ b/tests/Formats/samples/expectedMrssFormat/feed.empty.xml
@@ -2,8 +2,8 @@
Sample feed with minimum data
- https://github.com/RSS-Bridge/rss-bridge/
Sample feed with minimum data
+ https://github.com/RSS-Bridge/rss-bridge/
diff --git a/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml b/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml
index 9e712ddd998..8839f5a5918 100644
--- a/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml
+++ b/tests/Formats/samples/expectedMrssFormat/feed.emptyItems.xml
@@ -2,8 +2,8 @@
Sample feed with minimum data
- https://github.com/RSS-Bridge/rss-bridge/
Sample feed with minimum data
+ https://github.com/RSS-Bridge/rss-bridge/
diff --git a/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml b/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml
index 81dac87a793..63c04c0f420 100644
--- a/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml
+++ b/tests/Formats/samples/expectedMrssFormat/feed.microblog.xml
@@ -2,15 +2,15 @@
Sample microblog feed
- https://example.com/blog/
Sample microblog feed
+ https://example.com/blog/
+
+
https://example.com/logo.png
Sample microblog feed
https://example.com/blog/
-
-
-
1918f084648b82057c1dd3faa3d091da82a6fac2
diff --git a/tests/Formats/samples/feed.empty.json b/tests/Formats/samples/feed.empty.json
index aac09f64994..7b1a2eae54d 100644
--- a/tests/Formats/samples/feed.empty.json
+++ b/tests/Formats/samples/feed.empty.json
@@ -6,7 +6,7 @@
},
"meta": {
"name": "Sample feed with minimum data",
- "uri": "",
+ "uri": "https://github.com/RSS-Bridge/rss-bridge/",
"icon": ""
},
"items": []
diff --git a/tests/Formats/samples/feed.emptyItems.json b/tests/Formats/samples/feed.emptyItems.json
index 0287d428917..4d0774875e2 100644
--- a/tests/Formats/samples/feed.emptyItems.json
+++ b/tests/Formats/samples/feed.emptyItems.json
@@ -6,7 +6,7 @@
},
"meta": {
"name": "Sample feed with minimum data",
- "uri": "",
+ "uri": "https://github.com/RSS-Bridge/rss-bridge/",
"icon": ""
},
"items": [
From 0bf5dbbc0ba46cc27fe40b554b0c3c0ba705ef8b Mon Sep 17 00:00:00 2001
From: Dag
Date: Tue, 9 Jan 2024 20:33:35 +0100
Subject: [PATCH 27/88] chore: add tools for manually administrating the
configured cache (#3867)
---
README.md | 36 +++++++++++++++++++++++---
bridges/PixivBridge.php | 29 ++++++++++-----------
docs/10_Bridge_Specific/PixivBridge.md | 15 ++++++++---
index.php | 25 +++---------------
lib/CacheFactory.php | 1 +
lib/Configuration.php | 2 +-
lib/bootstrap.php | 15 +++++++++++
lib/logger.php | 1 +
phpcs.xml | 8 +++++-
templates/exception.html.php | 8 ++++++
10 files changed, 95 insertions(+), 45 deletions(-)
diff --git a/README.md b/README.md
index 34efc8de3e7..46bb5a693fc 100644
--- a/README.md
+++ b/README.md
@@ -251,7 +251,7 @@ Browse http://localhost:3000/
[![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html)
[![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge)
-The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and
+The Heroku quick deploy currently does not work. It might work if you fork this repo and
modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688
Learn more in
@@ -259,11 +259,29 @@ Learn more in
## How-to
+### How to remove all cache items
+
+As current user:
+
+ bin/cache-clear
+
+As user rss-bridge:
+
+ sudo -u rss-bridge bin/cache-clear
+
+As root:
+
+ sudo bin/cache-clear
+
+### How to remove all expired cache items
+
+ bin/cache-clear
+
### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable"
```shell
-# Give rssbridge ownership
-chown rssbridge:rssbridge -R /var/www/rss-bridge/cache
+# Give rss-bridge ownership
+chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache
# Or, give www-data ownership
chown www-data:www-data -R /var/www/rss-bridge/cache
@@ -275,6 +293,16 @@ chmod 777 -R /var/www/rss-bridge/cache
rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/
```
+### How to fix "attempt to write a readonly database"
+
+The sqlite files (db, wal and shm) are not writeable.
+
+ chown -v rss-bridge:rss-bridge cache/*
+
+### How to fix "Unable to prepare statement: 1, no such table: storage"
+
+ rm cache/*
+
### How to create a new bridge from scratch
Create the new bridge in e.g. `bridges/BearBlogBridge.php`:
@@ -389,6 +417,8 @@ These commands require that you have installed the dev dependencies in `composer
./vendor/bin/phpunit
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
+https://github.com/squizlabs/PHP_CodeSniffer/wiki
+
### How to spawn a minimal development environment
php -S 127.0.0.1:9001
diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php
index c4f5277f553..fc4443ed2d1 100644
--- a/bridges/PixivBridge.php
+++ b/bridges/PixivBridge.php
@@ -1,9 +1,11 @@
[
'posts' => [
@@ -251,14 +252,13 @@ public function collectData()
$img_url = preg_replace('/https:\/\/i\.pximg\.net/', $proxy_url, $result['url']);
}
} else {
- //else cache and use image.
- $img_url = $this->cacheImage(
- $result['url'],
- $result['id'],
- array_key_exists('illustType', $result)
- );
+ $img_url = $result['url'];
+ // Temporarily disabling caching of the image
+ //$img_url = $this->cacheImage($result['url'], $result['id'], array_key_exists('illustType', $result));
}
- $item['content'] = "";
+
+ // Currently, this might result in broken image due to their strict referrer check
+ $item['content'] = sprintf('', $img_url, $img_url);
// Additional content items
if (array_key_exists('pageCount', $result)) {
@@ -318,7 +318,7 @@ private function checkOptions()
if (
!(strlen($proxy) > 0 && preg_match('/https?:\/\/.*/', $proxy))
) {
- return returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.');
+ returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.');
}
}
@@ -326,8 +326,7 @@ private function checkOptions()
if ($cookie) {
$isAuth = $this->loadCacheValue('is_authenticated');
if (!$isAuth) {
- $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true)
- or returnServerError('Invalid PHPSESSID cookie provided. Please check the 🍪 and try again.');
+ $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true);
if ($res['error'] === false) {
$this->saveCacheValue('is_authenticated', true);
}
@@ -374,11 +373,11 @@ private function getData(string $url, bool $cache = true, bool $getJSON = false,
if ($cache) {
$data = $this->loadCacheValue($url);
if (!$data) {
- $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url");
+ $data = getContents($url, $httpHeaders, $curlOptions, true);
$this->saveCacheValue($url, $data);
}
} else {
- $data = getContents($url, $httpHeaders, $curlOptions, true) or returnServerError("Could not load $url");
+ $data = getContents($url, $httpHeaders, $curlOptions, true);
}
$this->checkCookie($data['headers']);
diff --git a/docs/10_Bridge_Specific/PixivBridge.md b/docs/10_Bridge_Specific/PixivBridge.md
index b782a4450c5..ba8da2d8e69 100644
--- a/docs/10_Bridge_Specific/PixivBridge.md
+++ b/docs/10_Bridge_Specific/PixivBridge.md
@@ -2,9 +2,14 @@ PixivBridge
===============
# Image proxy
-As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set, caching or image proxy is required to use this bridge.
-To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php` to the url of the proxy. The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy):
+As Pixiv requires images to be loaded with the `Referer "https://www.pixiv.net/"` header set,
+caching or image proxy is required to use this bridge.
+
+To turn off image caching, set the `proxy_url` value in this bridge's configuration section of `config.ini.php`
+to the url of the proxy.
+
+The bridge will then use the proxy in this format (essentially replacing `https://i.pximg.net` with the proxy):
Before: `https://i.pximg.net/img-original/img/0000/00/00/00/00/00/12345678_p0.png`
@@ -15,9 +20,11 @@ proxy_url = "https://proxy.example.com"
```
# Authentication
-Authentication is required to view and search R-18+ and non-public images. To enable this, set the following in this bridge's configuration in `config.ini.php`.
-```
+Authentication is required to view and search R-18+ and non-public images.
+To enable this, set the following in this bridge's configuration in `config.ini.php`.
+
+```ini
; from cookie "PHPSESSID". Recommend to get in incognito browser.
cookie = "00000000_hashedsessionidhere"
```
\ No newline at end of file
diff --git a/index.php b/index.php
index c2c546a184e..126200daae0 100644
--- a/index.php
+++ b/index.php
@@ -1,33 +1,14 @@
' . implode("\n", $errors) . '';
- exit(1);
-}
-
-$customConfig = [];
-if (file_exists(__DIR__ . '/config.ini.php')) {
- $customConfig = parse_ini_file(__DIR__ . '/config.ini.php', true, INI_SCANNER_TYPED);
-}
-Configuration::loadConfiguration($customConfig, getenv());
-
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
-$rssBridge = new RssBridge();
-
set_exception_handler(function (\Throwable $e) {
- http_response_code(500);
- print render(__DIR__ . '/templates/exception.html.php', ['e' => $e]);
RssBridge::getLogger()->error('Uncaught Exception', ['e' => $e]);
- exit(1);
+ http_response_code(500);
+ exit(render(__DIR__ . '/templates/exception.html.php', ['e' => $e]));
});
set_error_handler(function ($code, $message, $file, $line) {
@@ -63,4 +44,6 @@
}
});
+$rssBridge = new RssBridge();
+
$rssBridge->main($argv ?? []);
diff --git a/lib/CacheFactory.php b/lib/CacheFactory.php
index df78d9cbd56..90aa21ba7be 100644
--- a/lib/CacheFactory.php
+++ b/lib/CacheFactory.php
@@ -37,6 +37,7 @@ public function create(string $name = null): CacheInterface
if ($index === false) {
throw new \InvalidArgumentException(sprintf('Invalid cache name: "%s"', $name));
}
+
$className = $cacheNames[$index] . 'Cache';
if (!preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $className)) {
throw new \InvalidArgumentException(sprintf('Invalid cache classname: "%s"', $className));
diff --git a/lib/Configuration.php b/lib/Configuration.php
index ac7d29bfbdc..ab1c9cdf4c7 100644
--- a/lib/Configuration.php
+++ b/lib/Configuration.php
@@ -59,7 +59,7 @@ public static function loadConfiguration(array $customConfig = [], array $env =
}
$config = parse_ini_file(__DIR__ . '/../config.default.ini.php', true, INI_SCANNER_TYPED);
if (!$config) {
- throw new \Exception('Error parsing config');
+ throw new \Exception('Error parsing ini config');
}
foreach ($config as $header => $section) {
foreach ($section as $key => $value) {
diff --git a/lib/bootstrap.php b/lib/bootstrap.php
index 85d823e92c1..fe2069d366b 100644
--- a/lib/bootstrap.php
+++ b/lib/bootstrap.php
@@ -1,5 +1,9 @@
' . implode("\n", $errors) . '');
+}
+
+$customConfig = [];
+if (file_exists(__DIR__ . '/../config.ini.php')) {
+ $customConfig = parse_ini_file(__DIR__ . '/../config.ini.php', true, INI_SCANNER_TYPED);
+}
+Configuration::loadConfiguration($customConfig, getenv());
diff --git a/lib/logger.php b/lib/logger.php
index 7a902b5b75d..e579915dc31 100644
--- a/lib/logger.php
+++ b/lib/logger.php
@@ -149,6 +149,7 @@ public function __invoke(array $record)
);
error_log($text);
if ($record['level'] < Logger::ERROR && Debug::isEnabled()) {
+ // The record level is INFO or WARNING here
// Not a good idea to print here because http headers might not have been sent
print sprintf("
%s \n", e($text));
}
diff --git a/phpcs.xml b/phpcs.xml
index 5e50470a662..21e1f50a579 100644
--- a/phpcs.xml
+++ b/phpcs.xml
@@ -1,6 +1,11 @@
- Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/
+
+ Originally created with the PHP Coding Standard Generator.
+ But later manually tweaked.
+ http://edorian.github.com/php-coding-standard-generator/
+
+
./static
./vendor
./templates
@@ -11,6 +16,7 @@
+
diff --git a/templates/exception.html.php b/templates/exception.html.php
index e1dd97c112e..62ac90b4217 100644
--- a/templates/exception.html.php
+++ b/templates/exception.html.php
@@ -23,6 +23,14 @@
+ getCode() === 403): ?>
+ 403 Forbidden
+
+ The HTTP 403 Forbidden response status code indicates that the
+ server understands the request but refuses to authorize it.
+
+
+
getCode() === 404): ?>
404 Page Not Found
From 0c08f791efbfc6dd92f89d922984a6a41583de44 Mon Sep 17 00:00:00 2001
From: ORelio
Date: Tue, 9 Jan 2024 20:34:56 +0100
Subject: [PATCH 28/88] CssSelectorComplexBridge: Use cookies everywhere
(#3827) (#3870)
---
bridges/CssSelectorComplexBridge.php | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php
index e661fe18418..67ad4c92293 100644
--- a/bridges/CssSelectorComplexBridge.php
+++ b/bridges/CssSelectorComplexBridge.php
@@ -245,7 +245,7 @@ protected function filterUrlList($links, $url_pattern, $limit = 0)
protected function getTitle($page, $title_cleanup)
{
if (is_string($page)) {
- $page = getSimpleHTMLDOMCached($page);
+ $page = getSimpleHTMLDOMCached($page, $this->getHeaders());
}
$title = html_entity_decode($page->find('title', 0)->plaintext);
if (!empty($title)) {
@@ -302,7 +302,7 @@ protected function cleanArticleContent($content, $cleanup_selector, $remove_styl
protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0)
{
if (is_string($page)) {
- $page = getSimpleHTMLDOM($page);
+ $page = getSimpleHTMLDOM($page, $this->getHeaders());
}
$entryElements = $page->find($entry_selector);
@@ -355,7 +355,7 @@ protected function htmlFindEntryElements($page, $entry_selector, $url_selector,
*/
protected function fetchArticleElementFromPage($entry_url, $content_selector)
{
- $entry_html = getSimpleHTMLDOMCached($entry_url);
+ $entry_html = getSimpleHTMLDOMCached($entry_url, $this->getHeaders());
$article_content = $entry_html->find($content_selector, 0);
if (is_null($article_content)) {
From 1fecc4cfc13072856d68b7a33233a4e5e54a72db Mon Sep 17 00:00:00 2001
From: Dag
Date: Tue, 9 Jan 2024 21:28:43 +0100
Subject: [PATCH 29/88] Revert "CssSelectorComplexBridge: Use cookies
everywhere (#3827) (#3870)" (#3881)
This reverts commit 0c08f791efbfc6dd92f89d922984a6a41583de44.
---
bridges/CssSelectorComplexBridge.php | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php
index 67ad4c92293..e661fe18418 100644
--- a/bridges/CssSelectorComplexBridge.php
+++ b/bridges/CssSelectorComplexBridge.php
@@ -245,7 +245,7 @@ protected function filterUrlList($links, $url_pattern, $limit = 0)
protected function getTitle($page, $title_cleanup)
{
if (is_string($page)) {
- $page = getSimpleHTMLDOMCached($page, $this->getHeaders());
+ $page = getSimpleHTMLDOMCached($page);
}
$title = html_entity_decode($page->find('title', 0)->plaintext);
if (!empty($title)) {
@@ -302,7 +302,7 @@ protected function cleanArticleContent($content, $cleanup_selector, $remove_styl
protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0)
{
if (is_string($page)) {
- $page = getSimpleHTMLDOM($page, $this->getHeaders());
+ $page = getSimpleHTMLDOM($page);
}
$entryElements = $page->find($entry_selector);
@@ -355,7 +355,7 @@ protected function htmlFindEntryElements($page, $entry_selector, $url_selector,
*/
protected function fetchArticleElementFromPage($entry_url, $content_selector)
{
- $entry_html = getSimpleHTMLDOMCached($entry_url, $this->getHeaders());
+ $entry_html = getSimpleHTMLDOMCached($entry_url);
$article_content = $entry_html->find($content_selector, 0);
if (is_null($article_content)) {
From 2e5d2a88f39afccefab58b4fb40d22da7794a4b8 Mon Sep 17 00:00:00 2001
From: Dag
Date: Tue, 9 Jan 2024 21:36:42 +0100
Subject: [PATCH 30/88] fix: only escape iframe,script and link for html output
(#3882)
---
formats/AtomFormat.php | 2 +-
formats/JsonFormat.php | 2 +-
formats/MrssFormat.php | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php
index 1fabef2e7f5..5c9f2b6acfb 100644
--- a/formats/AtomFormat.php
+++ b/formats/AtomFormat.php
@@ -179,7 +179,7 @@ public function stringify()
$content = $document->createElement('content');
$content->setAttribute('type', 'html');
- $content->appendChild($document->createTextNode(break_annoying_html_tags($entryContent)));
+ $content->appendChild($document->createTextNode($entryContent));
$entry->appendChild($content);
foreach ($item->getEnclosures() as $enclosure) {
diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php
index 016e75e1177..586aae0afba 100644
--- a/formats/JsonFormat.php
+++ b/formats/JsonFormat.php
@@ -47,7 +47,7 @@ public function stringify()
$entryTitle = $item->getTitle();
$entryUri = $item->getURI();
$entryTimestamp = $item->getTimestamp();
- $entryContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : '';
+ $entryContent = $item->getContent() ?? '';
$entryEnclosures = $item->getEnclosures();
$entryCategories = $item->getCategories();
diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php
index e93a8289fd9..aaa1d0cd1b8 100644
--- a/formats/MrssFormat.php
+++ b/formats/MrssFormat.php
@@ -119,7 +119,7 @@ public function stringify()
$itemTimestamp = $item->getTimestamp();
$itemTitle = $item->getTitle();
$itemUri = $item->getURI();
- $itemContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : '';
+ $itemContent = $item->getContent() ?? '';
$itemUid = $item->getUid();
$isPermaLink = 'false';
From 491cb50219d8f799d85bfb4e6027adf501e9afa4 Mon Sep 17 00:00:00 2001
From: Dag
Date: Wed, 10 Jan 2024 00:25:36 +0100
Subject: [PATCH 31/88] docs: typo (#3883)
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 46bb5a693fc..e027d91219e 100644
--- a/README.md
+++ b/README.md
@@ -275,7 +275,7 @@ As root:
### How to remove all expired cache items
- bin/cache-clear
+ bin/cache-prune
### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable"
From 0eb4f6b2678ab17255ee87bde2f919a7e6883799 Mon Sep 17 00:00:00 2001
From: Dag
Date: Wed, 10 Jan 2024 20:39:15 +0100
Subject: [PATCH 32/88] fix(tiktok): remove duplicate leading slash in url
path, fix #3884 (#3885)
---
bridges/TikTokBridge.php | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php
index 6590df66808..22fdfcefdcc 100644
--- a/bridges/TikTokBridge.php
+++ b/bridges/TikTokBridge.php
@@ -35,21 +35,23 @@ public function collectData()
foreach ($videos as $video) {
$item = [];
- // Handle link "untracking"
- $linkParts = parse_url($video->find('a', 0)->href);
- $link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path'];
+ // Omit query string (remove tracking parameters)
+ $a = $video->find('a', 0);
+ $href = $a->href;
+ $parsedUrl = parse_url($href);
+ $url = $parsedUrl['scheme'] . '://' . $parsedUrl['host'] . '/' . ltrim($parsedUrl['path'], '/');
$image = $video->find('video', 0)->poster;
$views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext;
$enclosures = [$image];
- $item['uri'] = $link;
+ $item['uri'] = $url;
$item['title'] = 'Video';
$item['author'] = '@' . $author;
$item['enclosures'] = $enclosures;
$item['content'] = <<
+
{$views} views
EOD;
From c7e8ddf4865516a4bddc884cf80c058cb5aad770 Mon Sep 17 00:00:00 2001
From: ORelio
Date: Wed, 10 Jan 2024 21:47:34 +0100
Subject: [PATCH 33/88] CssSelectorComplexBridge: Use cookies everywhere
(RSS-Bridge#3827) (#3886)
v2 after feedback from #3870
---
bridges/CssSelectorComplexBridge.php | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php
index e661fe18418..632e6b6aa3c 100644
--- a/bridges/CssSelectorComplexBridge.php
+++ b/bridges/CssSelectorComplexBridge.php
@@ -245,7 +245,7 @@ protected function filterUrlList($links, $url_pattern, $limit = 0)
protected function getTitle($page, $title_cleanup)
{
if (is_string($page)) {
- $page = getSimpleHTMLDOMCached($page);
+ $page = getSimpleHTMLDOMCached($page, 86400, $this->getHeaders());
}
$title = html_entity_decode($page->find('title', 0)->plaintext);
if (!empty($title)) {
@@ -302,7 +302,7 @@ protected function cleanArticleContent($content, $cleanup_selector, $remove_styl
protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0)
{
if (is_string($page)) {
- $page = getSimpleHTMLDOM($page);
+ $page = getSimpleHTMLDOM($page, $this->getHeaders());
}
$entryElements = $page->find($entry_selector);
@@ -355,7 +355,7 @@ protected function htmlFindEntryElements($page, $entry_selector, $url_selector,
*/
protected function fetchArticleElementFromPage($entry_url, $content_selector)
{
- $entry_html = getSimpleHTMLDOMCached($entry_url);
+ $entry_html = getSimpleHTMLDOMCached($entry_url, 86400, $this->getHeaders());
$article_content = $entry_html->find($content_selector, 0);
if (is_null($article_content)) {
From 080e29365a24c5ad0898f2f8bf99e7068c41856b Mon Sep 17 00:00:00 2001
From: Dag
Date: Wed, 10 Jan 2024 21:48:12 +0100
Subject: [PATCH 34/88] feat(http-client): add http retry count to config
(#3887)
---
config.default.ini.php | 5 +++++
lib/contents.php | 3 ++-
lib/http.php | 30 ++++++++++++++++--------------
3 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/config.default.ini.php b/config.default.ini.php
index 201b1414fcd..21727c5e771 100644
--- a/config.default.ini.php
+++ b/config.default.ini.php
@@ -49,6 +49,11 @@
[http]
; Operation timeout in seconds
timeout = 30
+
+; Operation retry count in case of curl error
+retries = 2
+
+; User agent
useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"
; Max http response size in MB
diff --git a/lib/contents.php b/lib/contents.php
index 8676a2a8df8..9998a3f1d6e 100644
--- a/lib/contents.php
+++ b/lib/contents.php
@@ -38,6 +38,7 @@ function getContents(
$config = [
'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'),
+ 'retries' => Configuration::getConfig('http', 'retries'),
'headers' => array_merge($defaultHttpHeaders, $httpHeadersNormalized),
'curl_options' => $curlOptions,
];
@@ -71,7 +72,7 @@ function getContents(
// Ignore invalid 'Last-Modified' HTTP header value
}
}
- // todo: to be nice nice citizen we should also check for Etag
+ // todo: We should also check for Etag
}
$response = $httpClient->request($url, $config);
diff --git a/lib/http.php b/lib/http.php
index bfa6b6bff7f..405b01c6833 100644
--- a/lib/http.php
+++ b/lib/http.php
@@ -63,7 +63,7 @@ public function request(string $url, array $config = []): Response
'proxy' => null,
'curl_options' => [],
'if_not_modified_since' => null,
- 'retries' => 3,
+ 'retries' => 2,
'max_filesize' => null,
'max_redirections' => 5,
];
@@ -136,26 +136,28 @@ public function request(string $url, array $config = []): Response
return $len;
});
- $attempts = 0;
+ // This retry logic is a bit hard to understand, but it works
+ $tries = 0;
while (true) {
- $attempts++;
+ $tries++;
$body = curl_exec($ch);
if ($body !== false) {
// The network call was successful, so break out of the loop
break;
}
- if ($attempts > $config['retries']) {
- // Finally give up
- $curl_error = curl_error($ch);
- $curl_errno = curl_errno($ch);
- throw new HttpException(sprintf(
- 'cURL error %s: %s (%s) for %s',
- $curl_error,
- $curl_errno,
- 'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
- $url
- ));
+ if ($tries <= $config['retries']) {
+ continue;
}
+ // Max retries reached, give up
+ $curl_error = curl_error($ch);
+ $curl_errno = curl_errno($ch);
+ throw new HttpException(sprintf(
+ 'cURL error %s: %s (%s) for %s',
+ $curl_error,
+ $curl_errno,
+ 'https://curl.haxx.se/libcurl/c/libcurl-errors.html',
+ $url
+ ));
}
$statusCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
From d9ac0195506040e68cebfc81c5753e416ab7b22f Mon Sep 17 00:00:00 2001
From: July
Date: Wed, 10 Jan 2024 18:42:57 -0500
Subject: [PATCH 35/88] [AnnasArchiveBridge] Add new bridge (#3888)
* [AnnasArchiveBridge] Add new bridge
* [AnnasArchiveBridge] Add missing exampleValue
* [AnnasArchiveBridge] Remove vestigial debug print
---
bridges/AnnasArchiveBridge.php | 175 +++++++++++++++++++++++++++++++++
1 file changed, 175 insertions(+)
create mode 100644 bridges/AnnasArchiveBridge.php
diff --git a/bridges/AnnasArchiveBridge.php b/bridges/AnnasArchiveBridge.php
new file mode 100644
index 00000000000..e8a1e8c40f6
--- /dev/null
+++ b/bridges/AnnasArchiveBridge.php
@@ -0,0 +1,175 @@
+ [
+ 'name' => 'Query',
+ 'exampleValue' => 'apothecary diaries',
+ 'required' => true,
+ ],
+ 'ext' => [
+ 'name' => 'Extension',
+ 'type' => 'list',
+ 'values' => [
+ 'Any' => null,
+ 'azw3' => 'azw3',
+ 'cbr' => 'cbr',
+ 'cbz' => 'cbz',
+ 'djvu' => 'djvu',
+ 'epub' => 'epub',
+ 'fb2' => 'fb2',
+ 'fb2.zip' => 'fb2.zip',
+ 'mobi' => 'mobi',
+ 'pdf' => 'pdf',
+ ]
+ ],
+ 'lang' => [
+ 'name' => 'Language',
+ 'type' => 'list',
+ 'values' => [
+ 'Any' => null,
+ 'Afrikaans [af]' => 'af',
+ 'Arabic [ar]' => 'ar',
+ 'Bangla [bn]' => 'bn',
+ 'Belarusian [be]' => 'be',
+ 'Bulgarian [bg]' => 'bg',
+ 'Catalan [ca]' => 'ca',
+ 'Chinese [zh]' => 'zh',
+ 'Church Slavic [cu]' => 'cu',
+ 'Croatian [hr]' => 'hr',
+ 'Czech [cs]' => 'cs',
+ 'Danish [da]' => 'da',
+ 'Dongxiang [sce]' => 'sce',
+ 'Dutch [nl]' => 'nl',
+ 'English [en]' => 'en',
+ 'French [fr]' => 'fr',
+ 'German [de]' => 'de',
+ 'Greek [el]' => 'el',
+ 'Hebrew [he]' => 'he',
+ 'Hindi [hi]' => 'hi',
+ 'Hungarian [hu]' => 'hu',
+ 'Indonesian [id]' => 'id',
+ 'Irish [ga]' => 'ga',
+ 'Italian [it]' => 'it',
+ 'Japanese [ja]' => 'ja',
+ 'Kazakh [kk]' => 'kk',
+ 'Korean [ko]' => 'ko',
+ 'Latin [la]' => 'la',
+ 'Latvian [lv]' => 'lv',
+ 'Lithuanian [lt]' => 'lt',
+ 'Luxembourgish [lb]' => 'lb',
+ 'Ndolo [ndl]' => 'ndl',
+ 'Norwegian [no]' => 'no',
+ 'Persian [fa]' => 'fa',
+ 'Polish [pl]' => 'pl',
+ 'Portuguese [pt]' => 'pt',
+ 'Romanian [ro]' => 'ro',
+ 'Russian [ru]' => 'ru',
+ 'Serbian [sr]' => 'sr',
+ 'Spanish [es]' => 'es',
+ 'Swedish [sv]' => 'sv',
+ 'Tamil [ta]' => 'ta',
+ 'Traditional Chinese [zh‑Hant]' => 'zh‑Hant',
+ 'Turkish [tr]' => 'tr',
+ 'Ukrainian [uk]' => 'uk',
+ 'Unknown language' => '_empty',
+ 'Unknown language [und]' => 'und',
+ 'Unknown language [urdu]' => 'urdu',
+ 'Urdu [ur]' => 'ur',
+ 'Vietnamese [vi]' => 'vi',
+ 'Welsh [cy]' => 'cy',
+ ]
+ ],
+ 'content' => [
+ 'name' => 'Type',
+ 'type' => 'list',
+ 'values' => [
+ 'Any' => null,
+ 'Book (fiction)' => 'book_fiction',
+ 'Book (non‑fiction)' => 'book_nonfiction',
+ 'Book (unknown)' => 'book_unknown',
+ 'Comic book' => 'book_comic',
+ 'Journal article' => 'journal_article',
+ 'Magazine' => 'magazine',
+ 'Standards document' => 'standards_document',
+ ]
+ ],
+ 'src' => [
+ 'name' => 'Source',
+ 'type' => 'list',
+ 'values' => [
+ 'Any' => null,
+ 'Internet Archive' => 'ia',
+ 'Libgen.li' => 'lgli',
+ 'Libgen.rs' => 'lgrs',
+ 'Sci‑Hub' => 'scihub',
+ 'Z‑Library' => 'zlib',
+ ]
+ ],
+ ]
+ ];
+
+ public function collectData()
+ {
+ $url = $this->getURI();
+ $list = getSimpleHTMLDOMCached($url);
+ $list = defaultLinkTo($list, self::URI);
+
+ // Don't attempt to do anything if not found message is given
+ if ($list->find('.js-not-found-additional')) {
+ return;
+ }
+
+ foreach ($list->find('.w-full > .mb-4 > div > a') as $element) {
+ $item = [];
+ $item['title'] = $element->find('h3', 0)->plaintext;
+ $item['author'] = $element->find('div.italic', 0)->plaintext;
+ $item['uri'] = $element->href;
+ $item['content'] = $element->plaintext;
+ $item['uid'] = $item['uri'];
+
+ if ($item_html = getSimpleHTMLDOMCached($item['uri'])) {
+ $item_html = defaultLinkTo($item_html, self::URI);
+ $item['content'] .= $item_html->find('main img', 0);
+ $item['content'] .= $item_html->find('main .mt-4', 0); // Summary
+ if ($links = $item_html->find('main ul.mb-4', -1)) {
+ foreach ($links->find('li > a.js-download-link') as $file) {
+ $item['enclosures'][] = $file->href;
+ }
+ // Remove bulk torrents from enclosures list
+ $item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']);
+ }
+ }
+
+ $this->items[] = $item;
+ }
+ }
+
+ public function getName()
+ {
+ $name = parent::getName();
+ if ($this->getInput('q') != null) {
+ $name .= ' - ' . $this->getInput('q');
+ }
+ return $name;
+ }
+
+ public function getURI()
+ {
+ $params = array_filter([ // Filter to remove non-provided parameters
+ 'q' => $this->getInput('q'),
+ 'ext' => $this->getInput('ext'),
+ 'lang' => $this->getInput('lang'),
+ 'src' => $this->getInput('src'),
+ 'content' => $this->getInput('content'),
+ ]);
+ $url = parent::getURI() . 'search?sort=newest&' . http_build_query($params);
+ return $url;
+ }
+}
From d5175aebcc6f74430189caab1525e6511722a6ed Mon Sep 17 00:00:00 2001
From: July
Date: Thu, 11 Jan 2024 14:09:45 -0500
Subject: [PATCH 36/88] [ScribbleHubBridge] Get author feed title regardless of
CloudFlare (#3892)
---
bridges/ScribbleHubBridge.php | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/bridges/ScribbleHubBridge.php b/bridges/ScribbleHubBridge.php
index e7cdf337dfc..0f7c7a6c7ff 100644
--- a/bridges/ScribbleHubBridge.php
+++ b/bridges/ScribbleHubBridge.php
@@ -12,16 +12,16 @@ class ScribbleHubBridge extends FeedExpander
'uid' => [
'name' => 'uid',
'required' => true,
- // Example: Alyson Greaves's stories
- 'exampleValue' => '76208',
+ // Example: miriamrobern's stories
+ 'exampleValue' => '149271',
],
],
'Series' => [
'sid' => [
'name' => 'sid',
'required' => true,
- // Example: latest chapters from The Sisters of Dorley by Alyson Greaves
- 'exampleValue' => '421879',
+ // Example: latest chapters from Uskweirs
+ 'exampleValue' => '965299',
],
]
];
@@ -52,6 +52,10 @@ protected function parseItem(array $item)
return [];
}
+ if ($this->queriedContext === 'Author') {
+ $this->author = $item['author'];
+ }
+
$item['comments'] = $item['uri'] . '#comments';
try {
@@ -90,16 +94,7 @@ public function getName()
$name = parent::getName() . " $this->queriedContext";
switch ($this->queriedContext) {
case 'Author':
- try {
- $page = getSimpleHTMLDOMCached(self::URI . 'profile/' . $this->getInput('uid'));
- } catch (HttpException $e) {
- // 403 Forbidden, This means we got anti-bot response
- if ($e->getCode() === 403) {
- return $name;
- }
- throw $e;
- }
- $title = html_entity_decode($page->find('.p_m_username.fp_authorname', 0)->plaintext);
+ $title = $this->author;
break;
case 'Series':
try {
From 191e5b0493f3fc1bf2a3fc4169333c03480be23f Mon Sep 17 00:00:00 2001
From: Dag
Date: Fri, 12 Jan 2024 01:31:01 +0100
Subject: [PATCH 37/88] feat: add etag support to getContents (#3893)
---
README.md | 2 +-
config.default.ini.php | 2 +-
lib/BridgeCard.php | 5 ++---
lib/FeedExpander.php | 2 +-
lib/FeedParser.php | 4 ++--
lib/XPathAbstract.php | 5 ++++-
lib/contents.php | 49 +++++++++++++++++++++++-------------------
lib/http.php | 4 ++++
8 files changed, 42 insertions(+), 31 deletions(-)
diff --git a/README.md b/README.md
index e027d91219e..d6d1046c7ba 100644
--- a/README.md
+++ b/README.md
@@ -163,7 +163,7 @@ PHP ini config:
```ini
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
-max_execution_time = 20
+max_execution_time = 15
memory_limit = 64M
```
diff --git a/config.default.ini.php b/config.default.ini.php
index 21727c5e771..ee1e54c927d 100644
--- a/config.default.ini.php
+++ b/config.default.ini.php
@@ -48,7 +48,7 @@
[http]
; Operation timeout in seconds
-timeout = 30
+timeout = 15
; Operation retry count in case of curl error
retries = 2
diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php
index 4781ebc18d7..a82f8e5a35c 100644
--- a/lib/BridgeCard.php
+++ b/lib/BridgeCard.php
@@ -16,7 +16,7 @@ public static function displayBridgeCard($bridgeClassName, $formats, $isActive =
$bridge = $bridgeFactory->create($bridgeClassName);
- $isHttps = strpos($bridge->getURI(), 'https') === 0;
+ $isHttps = str_starts_with($bridge->getURI(), 'https');
$uri = $bridge->getURI();
$name = $bridge->getName();
@@ -113,8 +113,7 @@ private static function getFormHeader($bridgeClassName, $isHttps = false, $param
}
if (!$isHttps) {
- $form .= 'Warning :
-This bridge is not fetching its content through a secure connection ';
+ $form .= 'Warning: This bridge is not fetching its content through a secure connection ';
}
return $form;
diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php
index 056578e92d2..c0d7e878cb3 100644
--- a/lib/FeedExpander.php
+++ b/lib/FeedExpander.php
@@ -41,7 +41,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1)
}
/**
- * This method is overidden by bridges
+ * This method is overridden by bridges
*
* @return array
*/
diff --git a/lib/FeedParser.php b/lib/FeedParser.php
index 2d982de160a..510bcb32c80 100644
--- a/lib/FeedParser.php
+++ b/lib/FeedParser.php
@@ -7,9 +7,9 @@
*
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
*
- * Produce arrays meant to be used inside rss-bridge.
+ * Produces array meant to be used inside rss-bridge.
*
- * The item structure is tweaked so that works with FeedItem
+ * The item structure is tweaked so that it works with FeedItem
*/
final class FeedParser
{
diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php
index e30bb5eba82..2206f79ac9d 100644
--- a/lib/XPathAbstract.php
+++ b/lib/XPathAbstract.php
@@ -518,7 +518,10 @@ protected function formatItemUri($value)
if (strlen($value) === 0) {
return '';
}
- if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
+ if (
+ strpos($value, 'http://') === 0
+ || strpos($value, 'https://') === 0
+ ) {
return $value;
}
diff --git a/lib/contents.php b/lib/contents.php
index 9998a3f1d6e..43db8c031dc 100644
--- a/lib/contents.php
+++ b/lib/contents.php
@@ -24,6 +24,32 @@ function getContents(
$headerValue = trim(implode(':', array_slice($parts, 1)));
$httpHeadersNormalized[$headerName] = $headerValue;
}
+
+ $requestBodyHash = null;
+ if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
+ $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
+ }
+ $cacheKey = implode('_', ['server', $url, $requestBodyHash]);
+
+ /** @var Response $cachedResponse */
+ $cachedResponse = $cache->get($cacheKey);
+ if ($cachedResponse) {
+ $lastModified = $cachedResponse->getHeader('last-modified');
+ if ($lastModified) {
+ try {
+ // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
+ $lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified);
+ $config['if_not_modified_since'] = $lastModified->getTimestamp();
+ } catch (Exception $e) {
+ // Failed to parse last-modified
+ }
+ }
+ $etag = $cachedResponse->getHeader('etag');
+ if ($etag) {
+ $httpHeadersNormalized['if-none-match'] = $etag;
+ }
+ }
+
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
$defaultHttpHeaders = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
@@ -35,6 +61,7 @@ function getContents(
'Sec-Fetch-User' => '?1',
'TE' => 'trailers',
];
+
$config = [
'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'),
@@ -53,28 +80,6 @@ function getContents(
$config['proxy'] = Configuration::getConfig('proxy', 'url');
}
- $requestBodyHash = null;
- if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
- $requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
- }
- $cacheKey = implode('_', ['server', $url, $requestBodyHash]);
-
- /** @var Response $cachedResponse */
- $cachedResponse = $cache->get($cacheKey);
- if ($cachedResponse) {
- $cachedLastModified = $cachedResponse->getHeader('last-modified');
- if ($cachedLastModified) {
- try {
- // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
- $cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified);
- $config['if_not_modified_since'] = $cachedLastModified->getTimestamp();
- } catch (Exception $dateTimeParseFailue) {
- // Ignore invalid 'Last-Modified' HTTP header value
- }
- }
- // todo: We should also check for Etag
- }
-
$response = $httpClient->request($url, $config);
switch ($response->getCode()) {
diff --git a/lib/http.php b/lib/http.php
index 405b01c6833..90b65a6e99e 100644
--- a/lib/http.php
+++ b/lib/http.php
@@ -258,6 +258,10 @@ public function getHeaders(): array
}
/**
+ * HTTP response may have multiple headers with the same name.
+ *
+ * This method by default, returns only the last header.
+ *
* @return string[]|string|null
*/
public function getHeader(string $name, bool $all = false)
From 6eaf0eaa565361d0a18f23cdcd8df894116ad73a Mon Sep 17 00:00:00 2001
From: Dag
Date: Wed, 17 Jan 2024 20:10:32 +0100
Subject: [PATCH 38/88] fix: add cache clearing tools (#3896)
Forgot to add these in #3867
---
.gitignore | 1 -
bin/cache-clear | 14 ++++++++++++++
bin/cache-prune | 14 ++++++++++++++
3 files changed, 28 insertions(+), 1 deletion(-)
create mode 100755 bin/cache-clear
create mode 100755 bin/cache-prune
diff --git a/.gitignore b/.gitignore
index 9725342dc19..6ed95489e41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,6 @@ data/
*.pydevproject
.project
.metadata
-bin/
tmp/
*.tmp
*.bak
diff --git a/bin/cache-clear b/bin/cache-clear
new file mode 100755
index 00000000000..3563abadc1a
--- /dev/null
+++ b/bin/cache-clear
@@ -0,0 +1,14 @@
+#!/usr/bin/env php
+clear();
diff --git a/bin/cache-prune b/bin/cache-prune
new file mode 100755
index 00000000000..7b7a603130d
--- /dev/null
+++ b/bin/cache-prune
@@ -0,0 +1,14 @@
+#!/usr/bin/env php
+prune();
From 6408123330a28041344cccf3133981196e62a9a6 Mon Sep 17 00:00:00 2001
From: SebLaus <97241865+SebLaus@users.noreply.github.com>
Date: Fri, 19 Jan 2024 03:59:47 +0100
Subject: [PATCH 39/88] [IdealoBridge] added Header with user-agent and fixed
typo (#3897)
* Added header with useragent
* copy paste error from local test environment
* Fixed missing space in New before
* fixed missing space after comma in argument list
---
bridges/IdealoBridge.php | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php
index 89c5f87df90..cef2b812165 100644
--- a/bridges/IdealoBridge.php
+++ b/bridges/IdealoBridge.php
@@ -42,8 +42,13 @@ public function getIcon()
public function collectData()
{
+ // Needs header with user-agent to function properly.
+ $header = [
+ 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15'
+ ];
+
$link = $this->getInput('Link');
- $html = getSimpleHTMLDOM($link);
+ $html = getSimpleHTMLDOM($link, $header);
// Get Productname
$titleobj = $html->find('.oopStage-title', 0);
@@ -80,7 +85,7 @@ public function collectData()
// Generate Content
if ($PriceNew > 1) {
$content = "Price New: $PriceNew ";
- $content .= "Price Newbefore: $OldPriceNew ";
+ $content .= "Price New before: $OldPriceNew ";
}
if ($this->getInput('MaxPriceNew') != '') {
From 12a90e20749471c1f2c794792f6b1fabcb74d13e Mon Sep 17 00:00:00 2001
From: ORelio
Date: Fri, 19 Jan 2024 21:30:06 +0100
Subject: [PATCH 40/88] Utils: Add Webp MIME type (#3900)
---
lib/utils.php | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/utils.php b/lib/utils.php
index e8f00f5484e..07806e7c256 100644
--- a/lib/utils.php
+++ b/lib/utils.php
@@ -171,6 +171,7 @@ function parse_mime_type($url)
'jpg' => 'image/jpeg',
'gif' => 'image/gif',
'png' => 'image/png',
+ 'webp' => 'image/webp',
'image' => 'image/*',
'mp3' => 'audio/mpeg',
];
From bb36eb9eb831eb6bce8641323b7e5ce90798575b Mon Sep 17 00:00:00 2001
From: ORelio
Date: Fri, 19 Jan 2024 21:30:53 +0100
Subject: [PATCH 41/88] [CssSelectorBridge] Time/Thumbnail improvements (#3879)
(#3901)
* Implement
|