diff --git a/.github/prtester.py b/.github/prtester.py index 93b8328987e..df6cc1ffb73 100644 --- a/.github/prtester.py +++ b/.github/prtester.py @@ -52,10 +52,14 @@ def testBridges(bridges,status): for listing in lists: selectionvalue = '' listname = listing.get('name') - if 'optgroup' in listing.contents[0].name: - listing = list(itertools.chain.from_iterable(listing)) + cleanlist = [] + for option in listing.contents: + if 'optgroup' in option.name: + cleanlist.extend(option) + else: + cleanlist.append(option) firstselectionentry = 1 - for selectionentry in listing: + for selectionentry in cleanlist: if firstselectionentry: selectionvalue = selectionentry.get('value') firstselectionentry = 0 diff --git a/.github/workflows/prhtmlgenerator.yml b/.github/workflows/prhtmlgenerator.yml index cacb6642472..ce82aef1d7b 100644 --- a/.github/workflows/prhtmlgenerator.yml +++ b/.github/workflows/prhtmlgenerator.yml @@ -18,11 +18,11 @@ jobs: - name: Check out rss-bridge run: | PR=${{github.event.number}}; - wget -O requirements.txt https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester-requirements.txt; - wget https://raw.githubusercontent.com/RSS-Bridge/rss-bridge/master/.github/prtester.py; + wget -O requirements.txt https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester-requirements.txt; + wget https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester.py; wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch; touch DEBUG; - cat $PR.patch | grep " bridges/.*\.php" | sed "s= bridges/\(.*\)Bridge.php.*=\1=g" | sort | uniq > whitelist.txt + cat $PR.patch | grep "\bbridges/.*Bridge\.php\b" | sed "s=.*\bbridges/\(.*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt - name: Start Docker - Current run: | docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest diff --git a/README.md b/README.md index b54194a7221..e0487e6b07f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,11 @@ ![RSS-Bridge](static/logo_600px.png) -RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one. +RSS-Bridge is a web application. + +It generates web feeds for websites that don't have one. + +Officially hosted instance: https://rss-bridge.org/bridge01/ [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) @@ -17,43 +21,58 @@ RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for website |![Screenshot #5](/static/screenshot-5.png?raw=true)|![Screenshot #6](/static/screenshot-6.png?raw=true)| |![Screenshot #7](/static/twitter-form.png?raw=true)|![Screenshot #8](/static/twitter-rasmus.png?raw=true)| -## A subset of bridges - -* `YouTube` : YouTube user channel, playlist or search -* `Twitter` : Return keyword/hashtag search or user timeline -* `Telegram` : Return the latest posts from a public group -* `Reddit` : Return the latest posts from a subreddit or user -* `Filter` : Filter an existing feed url -* `Vk` : Latest posts from a user or group -* `FeedMerge` : Merge two or more existing feeds into one -* `Twitch` : Fetch the latest videos from a channel -* `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords - -And [many more](bridges/), thanks to the community! +## A subset of bridges (17/412) + +* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge) +* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge) +* `FeedReducerBridge`: [Reduce a noisy feed by some percentage](https://rss-bridge.org/bridge01/#bridge-FeedReducerBridge) +* `FilterBridge`: [Filter a feed by excluding/including items by keyword](https://rss-bridge.org/bridge01/#bridge-FilterBridge) +* `GettrBridge`: [Fetches the latest posts from a GETTR user](https://rss-bridge.org/bridge01/#bridge-GettrBridge) +* `MastodonBridge`: [Fetches statuses from a Mastodon (ActivityPub) instance](https://rss-bridge.org/bridge01/#bridge-MastodonBridge) +* `RedditBridge`: [Fetches posts from a user/subredit (with filtering options)](https://rss-bridge.org/bridge01/#bridge-RedditBridge) +* `RumbleBridge`: [Fetches channel/user videos](https://rss-bridge.org/bridge01/#bridge-RumbleBridge) +* `SoundcloudBridge`: [Fetches music by username](https://rss-bridge.org/bridge01/#bridge-SoundcloudBridge) +* `TelegramBridge`: [Fetches posts from a public channel](https://rss-bridge.org/bridge01/#bridge-TelegramBridge) +* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge) +* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge) +* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge) +* `TwitterBridge`: [Fetches tweets](https://rss-bridge.org/bridge01/#bridge-TwitterBridge) +* `VkBridge`: [Fetches posts from user/group](https://rss-bridge.org/bridge01/#bridge-VkBridge) +* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge) +* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge) +* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's community tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge) [Full documentation](https://rss-bridge.github.io/rss-bridge/index.html) -Check out RSS-Bridge right now on https://rss-bridge.org/bridge01 or find another +Check out RSS-Bridge right now on https://rss-bridge.org/bridge01/ + +Alternatively find another [public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html). ## Tutorial -RSS-Bridge requires php 7.4 (or higher). +### Install with composer or git -### Install with git: +Requires minimum PHP 7.4. -```bash +```shell +cd /var/www +composer create-project -v --no-dev rss-bridge/rss-bridge +``` + +```shell cd /var/www git clone https://github.com/RSS-Bridge/rss-bridge.git +``` + +Config: +```shell # Give the http user write permission to the cache folder chown www-data:www-data /var/www/rss-bridge/cache # Optionally copy over the default config file cp config.default.ini.php config.ini.php - -# Optionally copy over the default whitelist file -cp whitelist.default.txt whitelist.txt ``` Example config for nginx: @@ -74,9 +93,9 @@ server { } ``` -### Install with Docker: +### Install from Docker Hub: -Install by using docker image from Docker Hub: +Install by downloading the docker image from Docker Hub: ```bash # Create container @@ -88,7 +107,7 @@ docker start rss-bridge Browse http://localhost:3000/ -Install by locally building the image: +### Install by locally building from Dockerfile ```bash # Build image from Dockerfile @@ -97,13 +116,13 @@ docker build -t rss-bridge . # Create container docker create --name rss-bridge --publish 3000:80 rss-bridge -# Start the container +# Start container docker start rss-bridge ``` Browse http://localhost:3000/ -#### Install with docker-compose +### Install with docker-compose Create a `docker-compose.yml` file locally with with the following content: ```yml @@ -126,7 +145,7 @@ docker-compose up Browse http://localhost:3000/ -### Alternative installation methods +### Other installation methods [![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge) [![Deploy to Heroku](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) @@ -169,30 +188,95 @@ Learn more in [bridge api](https://rss-bridge.github.io/rss-bridge/Bridge_API/in ### How to enable all bridges -Write an asterisks to `whitelist.txt`: - - echo '*' > whitelist.txt - -Learn more in [enabling briges](https://rss-bridge.github.io/rss-bridge/For_Hosts/Whitelisting.html) +Modify `config.ini.php`: -### How to enable a bridge + enabled_bridges[] = * -Add the bridge name to `whitelist.txt`: +### How to enable some bridges - echo 'FirefoxAddonsBridge' >> whitelist.txt +``` +enabled_bridges[] = TwitchBridge +enabled_bridges[] = GettrBridge +``` ### How to enable debug mode -Set in `config.ini.php`: +The +[debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html) +disables the majority of caching operations. enable_debug_mode = true -Learn more in [debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html). +### How to switch to memcached as cache backend + +``` +[cache] + +; Cache backend: file (default), sqlite, memcached, null +type = "memcached" +``` + +### How to switch to sqlite3 as cache backend + + type = "sqlite" + +### How to disable bridge errors (as feed items) + +When a bridge fails, RSS-Bridge will produce a feed with a single item describing the error. + +This way, feed readers pick it up and you are notified. + +If you don't want this behaviour, switch the error output to `http`: + + [error] + + ; Defines how error messages are returned by RSS-Bridge + ; + ; "feed" = As part of the feed (default) + ; "http" = As HTTP error message + ; "none" = No errors are reported + output = "http" + +### How to accumulate errors before finally reporting it + +Modify `report_limit` so that an error must occur 3 times before it is reported. + + ; Defines how often an error must occur before it is reported to the user + report_limit = 3 + +### How to password-protect the instance + +HTTP basic access authentication: + + [authentication] + + enable = true + username = "alice" + password = "cat" + +Will typically require feed readers to be configured with the credentials. + +It may also be possible to manually include the credentials in the URL: + +https://alice:cat@rss-bridge.org/bridge01/?action=display&bridge=FabriceBellardBridge&format=Html ### How to create a new output format [Create a new format](https://rss-bridge.github.io/rss-bridge/Format_API/index.html). +### How to run unit tests and linter + +These commands require that you have installed the dev dependencies in `composer.json`. + + ./vendor/bin/phpunit + ./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./ + +### How to spawn a minimal development environment + + php -S 127.0.0.1:9001 + +http://127.0.0.1:9001/ + ## Explanation We are RSS-Bridge community, a group of developers continuing the project initiated by sebsauvage, @@ -204,15 +288,19 @@ webmaster of See [CONTRIBUTORS.md](CONTRIBUTORS.md) RSS-Bridge uses caching to prevent services from banning your server for repeatedly updating feeds. -The specific cache duration can be different between bridges. Cached files are deleted automatically after 24 hours. +The specific cache duration can be different between bridges. +Cached files are deleted automatically after 24 hours. RSS-Bridge allows you to take full control over which bridges are displayed to the user. That way you can host your own RSS-Bridge service with your favorite collection of bridges! +Current maintainers (as of 2023): @dvikan and @Mynacol #2519 ## Reference -### FeedItem properties +### Feed item structure + +This is the feed item structure that bridges are expected to produce. ```php $item = [ @@ -235,13 +323,21 @@ That way you can host your own RSS-Bridge service with your favorite collection ] ``` -### Output formats: +### Output formats + +* `Atom`: Atom feed, for use in feed readers +* `Html`: Simple HTML page +* `Json`: JSON, for consumption by other applications +* `Mrss`: MRSS feed, for use in feed readers +* `Plaintext`: Raw text, for consumption by other applications +* `Sfeed`: Text, TAB separated + +### Cache backends -* `Atom` : Atom feed, for use in feed readers -* `Html` : Simple HTML page -* `Json` : JSON, for consumption by other applications -* `Mrss` : MRSS feed, for use in feed readers -* `Plaintext` : Raw text, for consumption by other applications +* `file` +* `sqlite` +* `memcached` +* `null` ### Licenses diff --git a/actions/ConnectivityAction.php b/actions/ConnectivityAction.php index 19e6b9a6794..c11e6595fa1 100644 --- a/actions/ConnectivityAction.php +++ b/actions/ConnectivityAction.php @@ -41,18 +41,14 @@ public function execute(array $request) return render_template('connectivity.html.php'); } - $bridgeClassName = $this->bridgeFactory->sanitizeBridgeName($request['bridge']); - - if ($bridgeClassName === null) { - throw new \InvalidArgumentException('Bridge name invalid!'); - } + $bridgeClassName = $this->bridgeFactory->createBridgeClassName($request['bridge']); return $this->reportBridgeConnectivity($bridgeClassName); } private function reportBridgeConnectivity($bridgeClassName) { - if (!$this->bridgeFactory->isWhitelisted($bridgeClassName)) { + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { throw new \Exception('Bridge is not whitelisted!'); } diff --git a/actions/DetectAction.php b/actions/DetectAction.php index 6524bdfed72..6c9fa22dfd7 100644 --- a/actions/DetectAction.php +++ b/actions/DetectAction.php @@ -29,7 +29,7 @@ public function execute(array $request) $bridgeFactory = new BridgeFactory(); foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) { - if (!$bridgeFactory->isWhitelisted($bridgeClassName)) { + if (!$bridgeFactory->isEnabled($bridgeClassName)) { continue; } diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 0a3d6dcda04..129d45871be 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -1,37 +1,45 @@ sanitizeBridgeName($request['bridge']); + if (Configuration::getConfig('system', 'enable_maintenance_mode')) { + return new Response('503 Service Unavailable', 503); } - - if ($bridgeClassName === null) { - throw new \InvalidArgumentException('Bridge name invalid!'); + $this->cache = RssBridge::getCache(); + $this->cache->setScope('http'); + $this->cache->setKey($request); + // avg timeout of 20m + $timeout = 60 * 15 + rand(1, 60 * 10); + /** @var Response $cachedResponse */ + $cachedResponse = $this->cache->loadData($timeout); + if ($cachedResponse && !Debug::isEnabled()) { + //Logger::info(sprintf('Returning cached (http) response: %s', $cachedResponse->getBody())); + return $cachedResponse; + } + $response = $this->createResponse($request); + if (in_array($response->getCode(), [429, 503])) { + //Logger::info(sprintf('Storing cached (http) response: %s', $response->getBody())); + $this->cache->setScope('http'); + $this->cache->setKey($request); + $this->cache->saveData($response); } + return $response; + } + + private function createResponse(array $request) + { + $bridgeFactory = new BridgeFactory(); + $bridgeClassName = $bridgeFactory->createBridgeClassName($request['bridge'] ?? ''); $format = $request['format'] ?? null; if (!$format) { throw new \Exception('You must specify a format!'); } - if (!$bridgeFactory->isWhitelisted($bridgeClassName)) { + if (!$bridgeFactory->isEnabled($bridgeClassName)) { throw new \Exception('This bridge is not whitelisted'); } @@ -41,22 +49,22 @@ public function execute(array $request) $bridge = $bridgeFactory->create($bridgeClassName); $bridge->loadConfiguration(); - $noproxy = array_key_exists('_noproxy', $request) && filter_var($request['_noproxy'], FILTER_VALIDATE_BOOLEAN); - - if (Configuration::getConfig('proxy', 'url') && Configuration::getConfig('proxy', 'by_bridge') && $noproxy) { + $noproxy = $request['_noproxy'] ?? null; + if ( + Configuration::getConfig('proxy', 'url') + && Configuration::getConfig('proxy', 'by_bridge') + && $noproxy + ) { + // This const is only used once in getContents() define('NOPROXY', true); } - if (array_key_exists('_cache_timeout', $request)) { - if (! Configuration::getConfig('cache', 'custom_timeout')) { - unset($request['_cache_timeout']); - $uri = parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) . '?' . http_build_query($request); - return new Response('', 301, ['Location' => $uri]); - } - - $cache_timeout = filter_var($request['_cache_timeout'], FILTER_VALIDATE_INT); + $cacheTimeout = $request['_cache_timeout'] ?? null; + if (Configuration::getConfig('cache', 'custom_timeout') && $cacheTimeout) { + $cacheTimeout = (int) $cacheTimeout; } else { - $cache_timeout = $bridge->getCacheTimeout(); + // At this point the query argument might still be in the url but it won't be used + $cacheTimeout = $bridge->getCacheTimeout(); } // Remove parameters that don't concern bridges @@ -90,49 +98,36 @@ public function execute(array $request) ) ); - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); - $cache->setScope(''); - $cache->purgeCache(86400); // 24 hours - $cache->setKey($cache_params); + $this->cache->setScope(''); + $this->cache->setKey($cache_params); $items = []; $infos = []; - $mtime = $cache->getTime(); - if ( - $mtime !== false - && (time() - $cache_timeout < $mtime) - && !Debug::isEnabled() - ) { - // At this point we found the feed in the cache and debug mode is disabled + $feed = $this->cache->loadData($cacheTimeout); + if ($feed && !Debug::isEnabled()) { if (isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) { + $modificationTime = $this->cache->getTime(); // The client wants to know if the feed has changed since its last check - $stime = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); - if ($mtime <= $stime) { - $lastModified2 = gmdate('D, d M Y H:i:s ', $mtime) . 'GMT'; - return new Response('', 304, ['Last-Modified' => $lastModified2]); + $modifiedSince = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); + if ($modificationTime <= $modifiedSince) { + $modificationTimeGMT = gmdate('D, d M Y H:i:s ', $modificationTime); + return new Response('', 304, ['Last-Modified' => $modificationTimeGMT . 'GMT']); } } - // Load the feed from cache and prepare it - $cached = $cache->loadData(); - if (isset($cached['items']) && isset($cached['extraInfos'])) { - foreach ($cached['items'] as $item) { + if (isset($feed['items']) && isset($feed['extraInfos'])) { + foreach ($feed['items'] as $item) { $items[] = new FeedItem($item); } - $infos = $cached['extraInfos']; + $infos = $feed['extraInfos']; } } else { - // At this point we did NOT find the feed in the cache or debug mode is enabled. try { $bridge->setDatas($bridge_params); $bridge->collectData(); - $items = $bridge->getItems(); - if (isset($items[0]) && is_array($items[0])) { $feedItems = []; foreach ($items as $item) { @@ -146,43 +141,63 @@ public function execute(array $request) 'donationUri' => $bridge->getDonationURI(), 'icon' => $bridge->getIcon() ]; - } catch (\Throwable $e) { + } catch (\Exception $e) { + $errorOutput = Configuration::getConfig('error', 'output'); + $reportLimit = Configuration::getConfig('error', 'report_limit'); if ($e instanceof HttpException) { - // Produce a smaller log record for http exceptions - Logger::warning(sprintf('Exception in %s: %s', $bridgeClassName, create_sane_exception_message($e))); - } else { - // Log the exception - Logger::error(sprintf('Exception in %s', $bridgeClassName), ['e' => $e]); + // Reproduce (and log) these responses regardless of error output and report limit + if ($e->getCode() === 429) { + Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e))); + return new Response('429 Too Many Requests', 429); + } + if ($e->getCode() === 503) { + Logger::info(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e))); + return new Response('503 Service Unavailable', 503); + } + // Might want to cache other codes such as 504 Gateway Timeout } - - // Emit error only if we are passed the error report limit - $errorCount = self::logBridgeError($bridge->getName(), $e->getCode()); - if ($errorCount >= Configuration::getConfig('error', 'report_limit')) { - if (Configuration::getConfig('error', 'output') === 'feed') { - // Emit the error as a feed item in a feed so that feed readers can pick it up + if (in_array($errorOutput, ['feed', 'none'])) { + Logger::error(sprintf('Exception in DisplayAction(%s): %s', $bridgeClassName, create_sane_exception_message($e)), ['e' => $e]); + } + $errorCount = 1; + if ($reportLimit > 1) { + $errorCount = $this->logBridgeError($bridge->getName(), $e->getCode()); + } + // Let clients know about the error if we are passed the report limit + if ($errorCount >= $reportLimit) { + if ($errorOutput === 'feed') { + // Render the exception as a feed item $items[] = $this->createFeedItemFromException($e, $bridge); - } elseif (Configuration::getConfig('error', 'output') === 'http') { - // Emit as a regular web response + } elseif ($errorOutput === 'http') { + // Rethrow so that the main exception handler in RssBridge.php produces an HTTP 500 throw $e; + } elseif ($errorOutput === 'none') { + // Do nothing (produces an empty feed) + } else { + // Do nothing, unknown error output? Maybe throw exception or validate in Configuration.php } } } - $cache->saveData([ + // Unfortunately need to set scope and key again because they might be modified + $this->cache->setScope(''); + $this->cache->setKey($cache_params); + $this->cache->saveData([ 'items' => array_map(function (FeedItem $item) { return $item->toArray(); }, $items), 'extraInfos' => $infos ]); + $this->cache->purgeCache(); } $format->setItems($items); $format->setExtraInfos($infos); - $lastModified = $cache->getTime(); - $format->setLastModified($lastModified); + $newModificationTime = $this->cache->getTime(); + $format->setLastModified($newModificationTime); $headers = []; - if ($lastModified) { - $headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $lastModified) . 'GMT'; + if ($newModificationTime) { + $headers['Last-Modified'] = gmdate('D, d M Y H:i:s ', $newModificationTime) . 'GMT'; } $headers['Content-Type'] = $format->getMimeType() . '; charset=' . $format->getCharset(); return new Response($format->stringify(), 200, $headers); @@ -212,14 +227,12 @@ private function createFeedItemFromException($e, BridgeInterface $bridge): FeedI return $item; } - private static function logBridgeError($bridgeName, $code) + private function logBridgeError($bridgeName, $code) { - $cacheFactory = new CacheFactory(); - $cache = $cacheFactory->create(); - $cache->setScope('error_reporting'); - $cache->setkey([$bridgeName . '_' . $code]); - $cache->purgeCache(86400); // 24 hours - if ($report = $cache->loadData()) { + $this->cache->setScope('error_reporting'); + $this->cache->setkey([$bridgeName . '_' . $code]); + $report = $this->cache->loadData(); + if ($report) { $report = Json::decode($report); $report['time'] = time(); $report['count']++; @@ -230,7 +243,7 @@ private static function logBridgeError($bridgeName, $code) 'count' => 1, ]; } - $cache->saveData(Json::encode($report)); + $this->cache->saveData(Json::encode($report)); return $report['count']; } diff --git a/actions/FrontpageAction.php b/actions/FrontpageAction.php index f7ba56e65b0..40d25ea4805 100644 --- a/actions/FrontpageAction.php +++ b/actions/FrontpageAction.php @@ -15,7 +15,7 @@ public function execute(array $request) $body = ''; foreach ($bridgeClassNames as $bridgeClassName) { - if ($bridgeFactory->isWhitelisted($bridgeClassName)) { + if ($bridgeFactory->isEnabled($bridgeClassName)) { $body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats); $activeBridges++; } elseif ($showInactive) { diff --git a/actions/HealthAction.php b/actions/HealthAction.php new file mode 100644 index 00000000000..8ae5df1b4ae --- /dev/null +++ b/actions/HealthAction.php @@ -0,0 +1,15 @@ + 200, + 'message' => 'all is good', + ]; + return new Response(Json::encode($response), 200, ['content-type' => 'application/json']); + } +} diff --git a/actions/ListAction.php b/actions/ListAction.php index 3e15169077d..6ce7e33ee58 100644 --- a/actions/ListAction.php +++ b/actions/ListAction.php @@ -26,7 +26,7 @@ public function execute(array $request) $bridge = $bridgeFactory->create($bridgeClassName); $list->bridges[$bridgeClassName] = [ - 'status' => $bridgeFactory->isWhitelisted($bridgeClassName) ? 'active' : 'inactive', + 'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive', 'uri' => $bridge->getURI(), 'donationUri' => $bridge->getDonationURI(), 'name' => $bridge->getName(), diff --git a/actions/SetBridgeCacheAction.php b/actions/SetBridgeCacheAction.php index ac56f7eabfd..a9a598bd426 100644 --- a/actions/SetBridgeCacheAction.php +++ b/actions/SetBridgeCacheAction.php @@ -23,17 +23,10 @@ public function execute(array $request) $bridgeFactory = new BridgeFactory(); - $bridgeClassName = null; - if (isset($request['bridge'])) { - $bridgeClassName = $bridgeFactory->sanitizeBridgeName($request['bridge']); - } - - if ($bridgeClassName === null) { - throw new \InvalidArgumentException('Bridge name invalid!'); - } + $bridgeClassName = $bridgeFactory->createBridgeClassName($request['bridge'] ?? ''); // whitelist control - if (!$bridgeFactory->isWhitelisted($bridgeClassName)) { + if (!$bridgeFactory->isEnabled($bridgeClassName)) { throw new \Exception('This bridge is not whitelisted', 401); die; } @@ -42,10 +35,12 @@ public function execute(array $request) $bridge->loadConfiguration(); $value = $request['value']; - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); + $cache = RssBridge::getCache(); $cache->setScope(get_class($bridge)); + if (!is_array($key)) { + // not sure if $key is an array when it comes in from request + $key = [$key]; + } $cache->setKey($key); $cache->saveData($value); diff --git a/bridges/ABolaBridge.php b/bridges/ABolaBridge.php new file mode 100644 index 00000000000..1f1c5da1954 --- /dev/null +++ b/bridges/ABolaBridge.php @@ -0,0 +1,116 @@ + [ + 'name' => 'News Feed', + 'type' => 'list', + 'title' => 'Feeds from the Portuguese sports newspaper A BOLA.PT', + 'values' => [ + 'Últimas' => 'Nnh/Noticias', + 'Seleção Nacional' => 'Selecao/Noticias', + 'Futebol Nacional' => [ + 'Notícias' => 'Nacional/Noticias', + 'Primeira Liga' => 'Nacional/Liga/Noticias', + 'Liga 2' => 'Nacional/Liga2/Noticias', + 'Liga 3' => 'Nacional/Liga3/Noticias', + 'Liga Revelação' => 'Nacional/Liga-Revelacao/Noticias', + 'Campeonato de Portugal' => 'Nacional/Campeonato-Portugal/Noticias', + 'Distritais' => 'Nacional/Distritais/Noticias', + 'Taça de Portugal' => 'Nacional/TPortugal/Noticias', + 'Futebol Feminino' => 'Nacional/FFeminino/Noticias', + 'Futsal' => 'Nacional/Futsal/Noticias', + ], + 'Futebol Internacional' => [ + 'Notícias' => 'Internacional/Noticias/Noticias', + 'Liga dos Campeões' => 'Internacional/Liga-dos-campeoes/Noticias', + 'Liga Europa' => 'Internacional/Liga-europa/Noticias', + 'Liga Conferência' => 'Internacional/Liga-conferencia/Noticias', + 'Liga das Nações' => 'Internacional/Liga-das-nacoes/Noticias', + 'UEFA Youth League' => 'Internacional/Uefa-Youth-League/Noticias', + ], + 'Mercado' => 'Mercado', + 'Modalidades' => 'Modalidades/Noticias', + 'Motores' => 'Motores/Noticias', + ] + ] + ] + ]; + + public function getIcon() + { + return 'https://abola.pt/img/icons/favicon-96x96.png'; + } + + public function getName() + { + return !is_null($this->getKey('feed')) ? self::NAME . ' | ' . $this->getKey('feed') : self::NAME; + } + + public function getURI() + { + return self::URI . $this->getInput('feed'); + } + + public function collectData() + { + $url = sprintf('https://abola.pt/%s', $this->getInput('feed')); + $dom = getSimpleHTMLDOM($url); + if ($this->getInput('feed') !== 'Mercado') { + $dom = $dom->find('div#body_Todas1_upNoticiasTodas', 0); + } else { + $dom = $dom->find('div#body_NoticiasMercado_upNoticiasTodas', 0); + } + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + foreach ($dom->find('div.media') as $key => $article) { + //Get thumbnail + $image = $article->find('.media-img', 0)->style; + $image = preg_replace('/background-image: url\(/i', '', $image); + $image = substr_replace($image, '', -4); + $image = preg_replace('/https:\/\//i', '', $image); + $image = preg_replace('/www\./i', '', $image); + $image = preg_replace('/\/\//', '/', $image); + $image = preg_replace('/\/\/\//', '//', $image); + $image = substr($image, 7); + $image = 'https://' . $image; + $image = preg_replace('/ptimg/', 'pt/img', $image); + $image = preg_replace('/\/\/bola/', 'www.abola', $image); + //Timestamp + $date = date('Y/m/d'); + if (!is_null($article->find("span#body_Todas1_rptNoticiasTodas_lblData_$key", 0))) { + $date = $article->find("span#body_Todas1_rptNoticiasTodas_lblData_$key", 0)->plaintext; + $date = preg_replace('/\./', '/', $date); + } + $time = $article->find("span#body_Todas1_rptNoticiasTodas_lblHora_$key", 0)->plaintext; + $date = explode('/', $date); + $time = explode(':', $time); + $year = $date[0]; + $month = $date[1]; + $day = $date[2]; + $hour = $time[0]; + $minute = $time[1]; + $timestamp = mktime($hour, $minute, 0, $month, $day, $year); + //Content + $image = '' . $article->find('h4 span', 0)->plaintext . ''; + $description = '

' . $article->find('.media-texto > span', 0)->plaintext . '

'; + $content = $image . '
' . $description; + $a = $article->find('.media-body > a', 0); + $this->items[] = [ + 'title' => $a->find('h4 span', 0)->plaintext, + 'uri' => $a->href, + 'content' => $content, + 'timestamp' => $timestamp, + ]; + } + } +} diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 6ca59cc5b24..57e12fbde49 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -92,7 +92,12 @@ private function collectList($url) private function collectWork($id) { $url = self::URI . "/works/$id/navigate"; - $response = _http_request($url, ['useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)']); + $httpClient = RssBridge::getHttpClient(); + + $response = $httpClient->request($url, [ + 'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)', + ]); + $html = \str_get_html($response['body']); $html = defaultLinkTo($html, self::URI); diff --git a/bridges/ASRockNewsBridge.php b/bridges/ASRockNewsBridge.php index 1b516377057..1a3279784a0 100644 --- a/bridges/ASRockNewsBridge.php +++ b/bridges/ASRockNewsBridge.php @@ -34,7 +34,12 @@ public function collectData() $item['content'] = $contents->innertext; $item['timestamp'] = $this->extractDate($a->plaintext); - $item['enclosures'][] = $a->find('img', 0)->src; + + $img = $a->find('img', 0); + if ($img) { + $item['enclosures'][] = $img->src; + } + $this->items[] = $item; if (count($this->items) >= 10) { diff --git a/bridges/AllegroBridge.php b/bridges/AllegroBridge.php index 5545741645b..be240857de8 100644 --- a/bridges/AllegroBridge.php +++ b/bridges/AllegroBridge.php @@ -16,14 +16,20 @@ class AllegroBridge extends BridgeAbstract 'sessioncookie' => [ 'name' => 'The \'wdctx\' session cookie', 'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits', - 'pattern' => '^.{250,};?$', + 'pattern' => '^.{70,};?$', // phpcs:ignore 'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd', 'required' => false, ], 'includeSponsoredOffers' => [ 'type' => 'checkbox', - 'name' => 'Include Sponsored Offers' + 'name' => 'Include Sponsored Offers', + 'defaultValue' => 'checked' + ], + 'includePromotedOffers' => [ + 'type' => 'checkbox', + 'name' => 'Include Promoted Offers', + 'defaultValue' => 'checked' ] ]]; @@ -63,58 +69,57 @@ public function collectData() return; } - $results = $html->find('._6a66d_V7Lel article'); + $results = $html->find('article[data-analytics-view-custom-context="REGULAR"]'); if (!$this->getInput('includeSponsoredOffers')) { - $results = array_filter($results, function ($node) { - return $node->{'data-analytics-view-label'} != 'showSponsoredItems'; - }); + $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]')); + } + + if (!$this->getInput('includePromotedOffers')) { + $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]')); } foreach ($results as $post) { $item = []; - $item['uri'] = $post->find('._6a66d_LX75-', 0)->href; - -//TODO: port this over, whatever it does, from https://github.com/MK-PL/AllegroRSS -// if (arrayLinks.includes('events/clicks?')) { -// let sponsoredLink = new URL(arrayLinks).searchParams.get('redirect') -// arrayLinks = sponsoredLink.slice(0, sponsoredLink.indexOf('?')) -// } - - $item['title'] = $post->find('._6a66d_LX75-', 0)->innertext; - $item['uid'] = $post->{'data-analytics-view-value'}; - $descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/']; - $descriptionReplacements = ['', ': ', '', '  ']; - $description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext; - $descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description); + $item_link = $post->find('a[href*="' . $item['uid'] . '"], a[href*="allegrolokalnie"]', 0); - $buyNowAuction = $post->find('.mqu1_g3.mvrt_0.mgn2_12', 0)->innertext ?? ''; - $buyNowAuction = str_replace(' href; - $auctionTimeLeft = $post->find('._6a66d_ImOzU', 0)->innertext ?? ''; + $item['title'] = $item_link->find('img', 0)->alt; - $price = $post->find('._6a66d_6R3iN', 0)->plaintext; - $price = empty($auctionTimeLeft) ? $price : $price . '- kwota licytacji'; + $image = $item_link->find('img', 0)->{'data-src'} ?: $item_link->find('img', 0)->src ?? false; - $image = $post->find('._6a66d_44ioA img', 0)->{'data-src'} ?: $post->find('._6a66d_44ioA img', 0)->src ?? false; if ($image) { $item['enclosures'] = [$image . '#.image']; } - $offerExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) { + $price = $post->{'data-analytics-view-json-custom-price'}; + if ($price) { + $priceDecoded = json_decode(html_entity_decode($price)); + $price = $priceDecoded->amount . ' ' . $priceDecoded->currency; + } + + $descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/']; + $descriptionReplacements = ['', ': ', '', '  ']; + $description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext; + $descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description); + + $pricingExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) { return empty($node->find('.mvrt_0')); }); - $offerExtraInfo = $offerExtraInfo[0]->plaintext ?? ''; + $pricingExtraInfo = $pricingExtraInfo[0]->plaintext ?? ''; + + $offerExtraInfo = array_map(function ($node) { + return str_contains($node->plaintext, 'zapłać później') ? '' : $node->outertext; + }, $post->find('div.mpof_ki.mwdn_1.mj7a_4.mgn2_12')); - $isSmart = $post->find('._6a66d_TC2Zk', 0)->innertext ?? ''; - if (str_contains($isSmart, 'z kurierem')) { - $offerExtraInfo .= ', Smart z kurierem'; - } else { - $offerExtraInfo .= ', Smart'; + $isSmart = $post->find('img[alt="Smart!"]', 0) ?? false; + if ($isSmart) { + $pricingExtraInfo .= $isSmart->outertext; } $item['categories'] = []; @@ -131,11 +136,9 @@ public function collectData() . '
' . $price . '
' - . $auctionTimeLeft - . '
' - . $buyNowAuction + . implode('
', $offerExtraInfo) . '
' - . $offerExtraInfo + . $pricingExtraInfo . '

'; $this->items[] = $item; diff --git a/bridges/AllocineFRSortiesBridge.php b/bridges/AllocineFRSortiesBridge.php index b77c2f9b197..a75187bec87 100644 --- a/bridges/AllocineFRSortiesBridge.php +++ b/bridges/AllocineFRSortiesBridge.php @@ -24,6 +24,7 @@ public function collectData() $thumb = $element->find('figure.thumbnail', 0); $meta = $element->find('div.meta-body', 0); $synopsis = $element->find('div.synopsis', 0); + $date = $element->find('span.date', 0); $title = $element->find('a[class*=meta-title-link]', 0); $content = trim(defaultLinkTo($thumb->outertext . $meta->outertext . $synopsis->outertext, static::URI)); @@ -34,8 +35,32 @@ public function collectData() $item['content'] = $content; $item['title'] = trim($title->innertext); + $item['timestamp'] = $this->frenchPubDateToTimestamp($date->plaintext); $item['uri'] = static::BASE_URI . '/' . substr($title->href, 1); $this->items[] = $item; } } + + private function frenchPubDateToTimestamp($date) + { + return strtotime( + strtr( + strtolower($date), + [ + 'janvier' => 'jan', + 'février' => 'feb', + 'mars' => 'march', + 'avril' => 'apr', + 'mai' => 'may', + 'juin' => 'jun', + 'juillet' => 'jul', + 'août' => 'aug', + 'septembre' => 'sep', + 'octobre' => 'oct', + 'novembre' => 'nov', + 'décembre' => 'dec' + ] + ) + ); + } } diff --git a/bridges/AmazonPriceTrackerBridge.php b/bridges/AmazonPriceTrackerBridge.php index 6de451f1e7d..b07bdb7cf0a 100644 --- a/bridges/AmazonPriceTrackerBridge.php +++ b/bridges/AmazonPriceTrackerBridge.php @@ -125,14 +125,13 @@ private function parseDynamicImage($attribute) */ private function getImage($html) { + $image = 'https://placekitten.com/200/300'; $imageSrc = $html->find('#main-image-container img', 0); - if ($imageSrc) { $hiresImage = $imageSrc->getAttribute('data-old-hires'); $dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image'); $image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute); } - $image = $image ?: 'https://placekitten.com/200/300'; return << diff --git a/bridges/CVEDetailsBridge.php b/bridges/CVEDetailsBridge.php index 38b37bb7b2a..5334c1705c7 100644 --- a/bridges/CVEDetailsBridge.php +++ b/bridges/CVEDetailsBridge.php @@ -61,7 +61,7 @@ private function fetchContent() $html = getSimpleHTMLDOM($this->buildUrl()); $this->html = defaultLinkTo($html, self::URI); - $vendor = $html->find('#contentdiv > h1 > a', 0); + $vendor = $html->find('#contentdiv h1 > a', 0); if ($vendor == null) { returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . @@ -70,7 +70,7 @@ private function fetchContent() } $this->vendor = $vendor->innertext; - $product = $html->find('#contentdiv > h1 > a', 1); + $product = $html->find('#contentdiv h1 > a', 1); if ($product != null) { $this->product = $product->innertext; } @@ -102,38 +102,43 @@ public function collectData() $this->fetchContent(); } - foreach ($this->html->find('#vulnslisttable .srrowns') as $i => $tr) { + foreach ($this->html->find('#searchresults > .row') as $i => $tr) { // There are some optional vulnerability types, which will be // added to the categories as well as the CWE number -- which is // always given. $categories = [$this->vendor]; $enclosures = []; - $cwe = $tr->find('td', 2)->find('a', 0); - if ($cwe != null) { - $cwe = $cwe->innertext; - $categories[] = 'CWE-' . $cwe; - $enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe . '.html'; - } - $c = $tr->find('td', 4)->innertext; - if (trim($c) != '') { - $categories[] = $c; + $detailLink = $tr->find('.cveheader > h3 > a', 0); + $detailHtml = getSimpleHTMLDOM($detailLink->href); + + $div = $detailHtml->find('.cvedetailssummary', 0); + + // The CVE number itself + $title = $div->find('h1 > a', 0)->innertext; + $content = $div->find('.ssc-paragraph', 0)->innertext; + $cweList = $detailHtml->find('h2', 2)->next_sibling(); + foreach ($cweList->find('li') as $li) { + $cweWithDescription = $li->find('a', 0)->innertext; + preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe); + if (count($cwe) > 1) { + $categories[] = 'CWE-' . $cwe[1]; + $enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html'; + } } + if ($this->product != '') { $categories[] = $this->product; } - // The CVE number itself - $title = $tr->find('td', 1)->find('a', 0)->innertext; - $this->items[] = [ - 'uri' => $tr->find('td', 1)->find('a', 0)->href, + 'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href, 'title' => $title, 'timestamp' => $tr->find('td', 5)->innertext, - 'content' => $tr->next_sibling()->innertext, + 'content' => $content, 'categories' => $categories, 'enclosures' => $enclosures, - 'uid' => $tr->find('td', 1)->find('a', 0)->innertext, + 'uid' => $title, ]; // We only want to fetch the latest 10 CVEs diff --git a/bridges/CorreioDaFeiraBridge.php b/bridges/CorreioDaFeiraBridge.php new file mode 100644 index 00000000000..2cb42edef41 --- /dev/null +++ b/bridges/CorreioDaFeiraBridge.php @@ -0,0 +1,75 @@ + [ + 'name' => 'News Feed', + 'type' => 'list', + 'title' => 'Feeds from the Portuguese sports newspaper A BOLA.PT', + 'values' => [ + 'Cultura' => 'cultura', + 'Desporto' => 'desporto', + 'Economia' => 'economia', + 'Entrevista' => 'entrevista', + 'Freguesias' => 'freguesias', + 'Justiça' => 'justica', + 'Opinião' => 'opiniao', + 'Política' => 'politica', + 'Reportagem' => 'reportagem', + 'Sociedade' => 'sociedade', + 'Tecnologia' => 'tecnologia', + ] + ] + ] + ]; + + public function getIcon() + { + return 'https://www.correiodafeira.pt/wp-content/uploads/base_reporter-200x200.jpg'; + } + + public function getName() + { + return !is_null($this->getKey('feed')) ? self::NAME . ' | ' . $this->getKey('feed') : self::NAME; + } + + public function getURI() + { + return self::URI . $this->getInput('feed'); + } + + public function collectData() + { + $url = sprintf('https://www.correiodafeira.pt/categoria/%s', $this->getInput('feed')); + $dom = getSimpleHTMLDOM($url); + $dom = $dom->find('main', 0); + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + foreach ($dom->find('div.post') as $article) { + $a = $article->find('div.blog-box', 0); + //Get date and time of publishing + $time = $a->find('.post-date > :nth-child(2)', 0)->plaintext; + $datetime = explode('/', $time); + $year = $datetime[2]; + $month = $datetime[1]; + $day = $datetime[0]; + $timestamp = mktime(0, 0, 0, $month, $day, $year); + $this->items[] = [ + 'title' => $a->find('h2.entry-title > a', 0)->plaintext, + 'uri' => $a->find('h2.entry-title > a', 0)->href, + 'author' => $a->find('li.post-author > a', 0)->plaintext, + 'content' => $a->find('.entry-content > p', 0)->plaintext, + 'timestamp' => $timestamp, + ]; + } + } +} diff --git a/bridges/CraigslistBridge.php b/bridges/CraigslistBridge.php index a230422e904..9a2c1358719 100644 --- a/bridges/CraigslistBridge.php +++ b/bridges/CraigslistBridge.php @@ -63,7 +63,7 @@ public function collectData() $html = getSimpleHTMLDOM($uri); // Check if no results page is shown (nearby results) - if ($html->find('.displaycountShow', 0)->plaintext == '0') { + if (($html->find('.displaycountShow', 0)->plaintext ?? '') == '0') { return; } diff --git a/bridges/CssSelectorBridge.php b/bridges/CssSelectorBridge.php new file mode 100644 index 00000000000..2d7489de40f --- /dev/null +++ b/bridges/CssSelectorBridge.php @@ -0,0 +1,265 @@ + [ + 'name' => 'Site URL: Home page with latest articles', + 'exampleValue' => 'https://example.com/blog/', + 'required' => true + ], + 'url_selector' => [ + 'name' => 'Selector for article links or their parent elements', + 'title' => <<TITLE on home page, each one being treated as a feed item. + Instead of just a link you can selet one of its parent element. Everything inside that + element becomes feed item content, e.g. image and summary present on home page. + When doing so, the first link inside the selected element becomes feed item URL/Title. + EOT, + 'exampleValue' => 'a.article', + 'required' => true + ], + 'url_pattern' => [ + 'name' => '[Optional] Pattern for site URLs to keep in feed', + 'title' => 'Optionally filter items by applying a regular expression on their URL', + 'exampleValue' => '/blog/article/.*', + ], + 'content_selector' => [ + 'name' => '[Optional] Selector to expand each article content', + 'title' => << 'article.content', + ], + 'content_cleanup' => [ + 'name' => '[Optional] Content cleanup: List of items to remove', + 'title' => 'Selector for unnecessary elements to remove inside article contents.', + 'exampleValue' => 'div.ads, div.comments', + ], + 'title_cleanup' => [ + 'name' => '[Optional] Text to remove from expanded article title', + 'title' => << ' | BlogName', + ], + 'limit' => self::LIMIT + ] + ]; + + private $feedName = ''; + + public function getURI() + { + $url = $this->getInput('home_page'); + if (empty($url)) { + $url = parent::getURI(); + } + return $url; + } + + public function getName() + { + if (!empty($this->feedName)) { + return $this->feedName; + } + return parent::getName(); + } + + public function collectData() + { + $url = $this->getInput('home_page'); + $url_selector = $this->getInput('url_selector'); + $url_pattern = $this->getInput('url_pattern'); + $content_selector = $this->getInput('content_selector'); + $content_cleanup = $this->getInput('content_cleanup'); + $title_cleanup = $this->getInput('title_cleanup'); + $limit = $this->getInput('limit') ?? 10; + + $html = defaultLinkTo(getSimpleHTMLDOM($url), $url); + $this->feedName = $this->getPageTitle($html, $title_cleanup); + $items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup); + + if (empty($content_selector)) { + $this->items = $items; + } else { + foreach ($items as $item) { + $this->items[] = $this->expandEntryWithSelector( + $item['uri'], + $content_selector, + $content_cleanup, + $title_cleanup, + $item['title'] + ); + } + } + } + + /** + * Filter a list of URLs using a pattern and limit + * @param array $links List of URLs + * @param string $url_pattern Pattern to look for in URLs + * @param int $limit Optional maximum amount of URLs to return + * @return array Array of URLs + */ + protected function filterUrlList($links, $url_pattern, $limit = 0) + { + if (!empty($url_pattern)) { + $url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/'; + $links = array_filter($links, function ($url) { + return preg_match($url_pattern, $url) === 1; + }); + } + + if ($limit > 0 && count($links) > $limit) { + $links = array_slice($links, 0, $limit); + } + + return $links; + } + + /** + * Retrieve title from webpage URL or DOM + * @param string|object $page URL or DOM to retrieve title from + * @param string $title_cleanup optional string to remove from webpage title, e.g. " | BlogName" + * @return string Webpage title + */ + protected function getPageTitle($page, $title_cleanup = null) + { + if (is_string($page)) { + $page = getSimpleHTMLDOMCached($page); + } + $title = html_entity_decode($page->find('title', 0)->plaintext); + if (!empty($title)) { + $title = trim(str_replace($title_cleanup, '', $title)); + } + return $title; + } + + /** + * Remove all elements from HTML content matching cleanup selector + * @param string|object $content HTML content as HTML object or string + * @return string|object Cleaned content (same type as input) + */ + protected function cleanArticleContent($content, $cleanup_selector) + { + $string_convert = false; + if (is_string($content)) { + $string_convert = true; + $content = str_get_html($content); + } + + if (!empty($cleanup_selector)) { + foreach ($content->find($cleanup_selector) as $item_to_clean) { + $item_to_clean->outertext = ''; + } + } + + if ($string_convert) { + $content = $content->outertext; + } + return $content; + } + + /** + * Retrieve first N link+title+truncated-content from webpage URL or DOM satisfying the specified criteria + * @param string|object $page URL or DOM to retrieve feed items from + * @param string $url_selector DOM selector for matching links or their parent element + * @param string $url_pattern Optional filter to keep only links matching the pattern + * @param int $limit Optional maximum amount of URLs to return + * @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments" + * @return array of items {'uri': entry_url, 'title': entry_title, ['content': when present in DOM] } + */ + protected function htmlFindEntries($page, $url_selector, $url_pattern = '', $limit = 0, $content_cleanup = null) + { + if (is_string($page)) { + $page = getSimpleHTMLDOM($page); + } + + $links = $page->find($url_selector); + + if (empty($links)) { + returnClientError('No results for URL selector'); + } + + $link_to_item = []; + foreach ($links as $link) { + $item = []; + if ($link->innertext != $link->plaintext) { + $item['content'] = $link->innertext; + } + if ($link->tag != 'a') { + $link = $link->find('a', 0); + } + $item['uri'] = $link->href; + $item['title'] = $link->plaintext; + if (isset($item['content'])) { + $item['content'] = convertLazyLoading($item['content']); + $item['content'] = defaultLinkTo($item['content'], $item['uri']); + $item['content'] = $this->cleanArticleContent($item['content'], $content_cleanup); + } + $link_to_item[$link->href] = $item; + } + + $links = $this->filterUrlList(array_keys($link_to_item), $url_pattern, $limit); + + if (empty($links)) { + returnClientError('No results for URL pattern'); + } + + $items = []; + foreach ($links as $link) { + $items[] = $link_to_item[$link]; + } + + return $items; + } + + /** + * Retrieve article content from its URL using content selector and return a feed item + * @param string $entry_url URL to retrieve article from + * @param string $content_selector HTML selector for extracting content, e.g. "article.content" + * @param string $content_cleanup Optional selector for removing elements, e.g. "div.ads, div.comments" + * @param string $title_cleanup Optional string to remove from article title, e.g. " | BlogName" + * @param string $title_default Optional title to use when could not extract title reliably + * @return array Entry data: uri, title, content + */ + protected function expandEntryWithSelector($entry_url, $content_selector, $content_cleanup = null, $title_cleanup = null, $title_default = null) + { + if (empty($content_selector)) { + returnClientError('Please specify a content selector'); + } + + $entry_html = getSimpleHTMLDOMCached($entry_url); + $article_content = $entry_html->find($content_selector); + + if (!empty($article_content)) { + $article_content = $article_content[0]; + } else { + returnClientError('Could not find content selector at URL: ' . $entry_url); + } + + $article_content = convertLazyLoading($article_content); + $article_content = defaultLinkTo($article_content, $entry_url); + $article_content = $this->cleanArticleContent($article_content, $content_cleanup); + + $article_title = $this->getPageTitle($entry_html, $title_cleanup); + if (!empty($title_default) && (empty($article_title) || $article_title === $this->feedName)) { + $article_title = $title_default; + } + + $item = []; + $item['uri'] = $entry_url; + $item['title'] = $article_title; + $item['content'] = $article_content; + return $item; + } +} diff --git a/bridges/CuriousCatBridge.php b/bridges/CuriousCatBridge.php index 573c776f8f7..3d6e87d0941 100644 --- a/bridges/CuriousCatBridge.php +++ b/bridges/CuriousCatBridge.php @@ -23,7 +23,10 @@ public function collectData() $apiJson = getContents($url); - $apiData = json_decode($apiJson, true); + $apiData = Json::decode($apiJson); + if (isset($apiData['error'])) { + throw new \Exception($apiData['error_code']); + } foreach ($apiData['posts'] as $post) { $item = []; diff --git a/bridges/DemoBridge.php b/bridges/DemoBridge.php index 06ec4e1e9c7..15ab7377f27 100644 --- a/bridges/DemoBridge.php +++ b/bridges/DemoBridge.php @@ -6,6 +6,7 @@ class DemoBridge extends BridgeAbstract const NAME = 'DemoBridge'; const URI = 'http://github.com/rss-bridge/rss-bridge'; const DESCRIPTION = 'Bridge used for demos'; + const CACHE_TIMEOUT = 15; const PARAMETERS = [ 'testCheckbox' => [ diff --git a/bridges/DilbertBridge.php b/bridges/DilbertBridge.php deleted file mode 100644 index cd509ea4a56..00000000000 --- a/bridges/DilbertBridge.php +++ /dev/null @@ -1,36 +0,0 @@ -find('section.comic-item') as $element) { - $img = $element->find('img', 0); - $link = $element->find('a', 0); - $comic = $img->src; - $title = $img->alt; - $url = $link->href; - $date = substr(strrchr($url, '/'), 1); - if (empty($title)) { - $title = 'Dilbert Comic Strip on ' . $date; - } - $date = strtotime($date); - - $item = []; - $item['uri'] = $url; - $item['title'] = $title; - $item['author'] = 'Scott Adams'; - $item['timestamp'] = $date; - $item['content'] = '' . $img->alt . ''; - $this->items[] = $item; - } - } -} diff --git a/bridges/DoujinStyleBridge.php b/bridges/DoujinStyleBridge.php new file mode 100644 index 00000000000..0df96280d10 --- /dev/null +++ b/bridges/DoujinStyleBridge.php @@ -0,0 +1,148 @@ + [], + 'Randomly selected items' => [], + 'From search results' => [ + 'query' => [ + 'name' => 'Search query', + 'required' => true, + 'exampleValue' => 'FELT', + ], + 'flac' => [ + 'name' => 'Include FLAC', + 'type' => 'checkbox', + 'defaultValue' => false, + ], + 'mp3' => [ + 'name' => 'Include MP3', + 'type' => 'checkbox', + 'defaultValue' => false, + ], + 'tta' => [ + 'name' => 'Include TTA', + 'type' => 'checkbox', + 'defaultValue' => false, + ], + 'opus' => [ + 'name' => 'Include Opus', + 'type' => 'checkbox', + 'defaultValue' => false, + ], + 'ogg' => [ + 'name' => 'Include OGG', + 'type' => 'checkbox', + 'defaultValue' => false, + ] + ] + ]; + + public function collectData() + { + $html = getSimpleHTMLDOM($this->getURI()); + $html = defaultLinkTo($html, $this->getURI()); + + $submissions = $html->find('.gridBox .gridDetails'); + foreach ($submissions as $submission) { + $item = []; + + $item['uri'] = $submission->find('a', 0)->href; + + $content = getSimpleHTMLDOM($item['uri']); + $content = defaultLinkTo($content, $this->getURI()); + + $title = $content->find('h2', 0)->plaintext; + + $cover = $content->find('#imgClick a', 0); + if (is_null($cover)) { + $cover = $content->find('.coverWrap', 0)->src; + } else { + $cover = $cover->href; + } + + $item['content'] = ""; + + $keys = []; + foreach ($content->find('.pageWrap .pageSpan1') as $key) { + $keys[] = $key->plaintext; + } + + $values = $content->find('.pageWrap .pageSpan2'); + $metadata = array_combine($keys, $values); + + $format = 'Unknown'; + + foreach ($metadata as $key => $value) { + switch ($key) { + case 'Artist': + $artist = $value->find('a', 0)->plaintext; + $item['title'] = "$artist - $title"; + $item['content'] .= "
Artist: $artist"; + break; + case 'Tags:': + $item['categories'] = []; + foreach ($value->find('a') as $tag) { + $tag = str_replace('-', '-', $tag->plaintext); + $item['categories'][] = $tag; + } + + $item['content'] .= '
Tags: ' . join(', ', $item['categories']); + break; + case 'Format:': + $item['content'] .= "
Format: $value->plaintext"; + break; + case 'Date Added:': + $item['timestamp'] = $value->plaintext; + break; + case 'Provided By:': + $item['author'] = $value->find('a', 0)->plaintext; + break; + } + } + + $this->items[] = $item; + } + } + + public function getURI() + { + $url = self::URI; + + switch ($this->queriedContext) { + case 'From search results': + $url .= '?p=search&type=blanket'; + $url .= '&result=' . $this->getInput('query'); + + if ($this->getInput('flac') == 1) { + $url .= '&format0=on'; + } + if ($this->getInput('mp3') == 1) { + $url .= '&format1=on'; + } + if ($this->getInput('tta') == 1) { + $url .= '&format2=on'; + } + if ($this->getInput('opus') == 1) { + $url .= '&format3=on'; + } + if ($this->getInput('ogg') == 1) { + $url .= '&format4=on'; + } + break; + case 'Randomly selected items': + $url .= '?p=random'; + break; + } + + return $url; + } +} diff --git a/bridges/EBayBridge.php b/bridges/EBayBridge.php index f2919938c93..66fad10c5e8 100644 --- a/bridges/EBayBridge.php +++ b/bridges/EBayBridge.php @@ -66,16 +66,27 @@ public function collectData() $new_listing_label->remove(); } - $item['title'] = $listing->find('.s-item__title', 0)->plaintext; + $listingTitle = $listing->find('.s-item__title', 0); + if ($listingTitle) { + $item['title'] = $listingTitle->plaintext; + } $subtitle = implode('', $listing->find('.s-item__subtitle')); - $item['uri'] = $listing->find('.s-item__link', 0)->href; + $listingUrl = $listing->find('.s-item__link', 0); + if ($listingUrl) { + $item['uri'] = $listingUrl->href; + } else { + $item['uri'] = null; + } - preg_match('/.*\/itm\/(\d+).*/i', $item['uri'], $matches); - $item['uid'] = $matches[1]; + if (preg_match('/.*\/itm\/(\d+).*/i', $item['uri'], $matches)) { + $item['uid'] = $matches[1]; + } + + $priceDom = $listing->find('.s-item__details > .s-item__detail > .s-item__price', 0); + $price = $priceDom->plaintext ?? 'N/A'; - $price = $listing->find('.s-item__details > .s-item__detail > .s-item__price', 0)->plaintext; $shippingFree = $listing->find('.s-item__details > .s-item__detail > .s-item__freeXDays', 0)->plaintext ?? ''; $localDelivery = $listing->find('.s-item__details > .s-item__detail > .s-item__localDelivery', 0)->plaintext ?? ''; $logisticsCost = $listing->find('.s-item__details > .s-item__detail > .s-item__logisticsCost', 0)->plaintext ?? ''; @@ -84,7 +95,12 @@ public function collectData() $sellerInfo = $listing->find('.s-item__seller-info-text', 0)->plaintext ?? ''; - $item['enclosures'] = [ $listing->find('.s-item__image-wrapper > img', 0)->src . '#.image' ]; + $image = $listing->find('.s-item__image-wrapper > img', 0); + if ($image) { + // Not quite sure why append fragment here + $imageUrl = $image->src . '#.image'; + $item['enclosures'] = [$imageUrl]; + } $item['content'] = <<$sellerInfo $location

diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 63062e2d63c..a2db3eadb74 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -48,7 +48,7 @@ class EZTVBridge extends BridgeAbstract public function collectData() { $eztv_uri = $this->getEztvUri(); - Debug::log($eztv_uri); + Logger::debug($eztv_uri); $ids = explode(',', trim($this->getInput('ids'))); foreach ($ids as $id) { $data = json_decode(getContents(sprintf('%s/api/get-torrents?imdb_id=%s', $eztv_uri, $id))); diff --git a/bridges/ElloBridge.php b/bridges/ElloBridge.php index c45e554a93c..4cc1858b098 100644 --- a/bridges/ElloBridge.php +++ b/bridges/ElloBridge.php @@ -113,9 +113,7 @@ private function getUsername($post, $postData) private function getAPIKey() { - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); + $cache = RssBridge::getCache(); $cache->setScope('ElloBridge'); $cache->setKey(['key']); $key = $cache->loadData(); diff --git a/bridges/EtsyBridge.php b/bridges/EtsyBridge.php index 05bf7d261f4..f9e2b4cbdd9 100644 --- a/bridges/EtsyBridge.php +++ b/bridges/EtsyBridge.php @@ -47,11 +47,11 @@ public function collectData() $item['title'] = $result->find('a', 0)->title; $item['uri'] = $result->find('a', 0)->href; - $item['author'] = $result->find('p.wt-text-gray > span', 2)->plaintext; + $item['author'] = $result->find('p.wt-text-gray > span', 2)->plaintext ?? ''; $item['content'] = '

' - . $result->find('span.currency-symbol', 0)->plaintext - . $result->find('span.currency-value', 0)->plaintext + . ($result->find('span.currency-symbol', 0)->plaintext ?? '') + . ($result->find('span.currency-value', 0)->plaintext ?? '') . '

' . $result->find('a', 0)->title . '

'; diff --git a/bridges/FB2Bridge.php b/bridges/FB2Bridge.php index 19030dd2e29..141ea59b962 100644 --- a/bridges/FB2Bridge.php +++ b/bridges/FB2Bridge.php @@ -304,7 +304,11 @@ private function getPageInfos($page, $cookies) $regex = '/"pageID":"([0-9]*)"/'; preg_match($regex, $pageContent, $matches); - return ['userId' => $matches[1], 'username' => $username]; + $arr = [ + 'userId' => $matches[1] ?? null, + 'username' => $username, + ]; + return $arr; } public function getName() diff --git a/bridges/FallGuysBridge.php b/bridges/FallGuysBridge.php new file mode 100644 index 00000000000..dbb34792602 --- /dev/null +++ b/bridges/FallGuysBridge.php @@ -0,0 +1,134 @@ + [ + 'name' => 'Language', + 'type' => 'list', + 'values' => [ + 'English' => 'en-US', + 'لعربية' => 'ar', + 'Deutsch' => 'de', + 'Español (Spain)' => 'es-ES', + 'Español (LA)' => 'es-MX', + 'Français' => 'fr', + 'Italiano' => 'it', + '日本語' => 'ja', + '한국어' => 'ko', + 'Polski' => 'pl', + 'Português (Brasil)' => 'pt-BR', + 'Русский' => 'ru', + 'Türkçe' => 'tr', + '简体中文' => 'zh-CN', + ], + 'defaultValue' => self::DEFAULT_LOCALE, + ] + ] + ]; + + public function collectData() + { + $html = getSimpleHTMLDOM(self::getURI()); + + $data = json_decode($html->find('#__NEXT_DATA__', 0)->innertext); + + foreach ($data->props->pageProps->newsList as $newsItem) { + $headerDescription = property_exists($newsItem->header, 'description') ? $newsItem->header->description : ''; + $headerImage = $newsItem->header->image->src; + + $contentImages = [$headerImage]; + + $content = <<{$headerDescription}

+

+ HTML; + + foreach ($newsItem->content->items as $contentItem) { + if (property_exists($contentItem, 'articleCopy')) { + if (property_exists($contentItem->articleCopy, 'title')) { + $title = $contentItem->articleCopy->title; + + $content .= <<{$title} + HTML; + } + + $text = $contentItem->articleCopy->copy; + + $content .= <<{$text}

+ HTML; + } elseif (property_exists($contentItem, 'articleImage')) { + $image = $contentItem->articleImage->imageSrc; + + if ($image != $headerImage) { + $contentImages[] = $image; + + $content .= <<

+ HTML; + } + } elseif (property_exists($contentItem, 'embeddedVideo')) { + $mediaOptions = $contentItem->embeddedVideo->mediaOptions; + $mainContentOptions = $contentItem->embeddedVideo->mainContentOptions; + + if (count($mediaOptions) == count($mainContentOptions)) { + for ($i = 0; $i < count($mediaOptions); $i++) { + if (property_exists($mediaOptions[$i], 'youtubeVideo')) { + $videoUrl = 'https://youtu.be/' . $mediaOptions[$i]->youtubeVideo->contentId; + $image = $mainContentOptions[$i]->image->src; + + $content .= '

'; + + if ($image != $headerImage) { + $contentImages[] = $image; + + $content .= <<
+ HTML; + } + + $content .= <<(Video: {$videoUrl}) + HTML; + + $content .= '

'; + } + } + } + } + } + + $item = [ + 'uid' => $newsItem->_id, + 'uri' => self::getURI() . '/' . $newsItem->_slug, + 'title' => $newsItem->_title, + 'timestamp' => $newsItem->lastModified, + 'content' => $content, + 'enclosures' => $contentImages, + ]; + + $this->items[] = $item; + } + } + + public function getURI() + { + $locale = $this->getInput('locale') ?? self::DEFAULT_LOCALE; + return self::BASE_URI . '/' . $locale . '/news'; + } + + public function getIcon() + { + return self::BASE_URI . '/favicon.ico'; + } +} diff --git a/bridges/FeedMergeBridge.php b/bridges/FeedMergeBridge.php index 8fc9dc20fd4..cf1b10a2218 100644 --- a/bridges/FeedMergeBridge.php +++ b/bridges/FeedMergeBridge.php @@ -14,7 +14,7 @@ class FeedMergeBridge extends FeedExpander 'feed_name' => [ 'name' => 'Feed name', 'type' => 'text', - 'exampleValue' => 'rss-bridge/FeedMerger', + 'exampleValue' => 'FeedMerge', ], 'feed_1' => [ 'name' => 'Feed url', @@ -58,9 +58,29 @@ public function collectData() $feeds = array_filter($feeds); foreach ($feeds as $feed) { - // Fetch all items from the feed - // todo: consider wrapping this in a try..catch to not let a single feed break the entire bridge? - $this->collectExpandableDatas($feed); + if (count($feeds) > 1) { + // Allow one or more feeds to fail + try { + $this->collectExpandableDatas($feed); + } catch (HttpException $e) { + Logger::warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e))); + $this->items[] = [ + 'title' => 'RSS-Bridge: ' . $e->getMessage(), + // Give current time so it sorts to the top + 'timestamp' => time(), + ]; + continue; + } catch (\Exception $e) { + if (str_starts_with($e->getMessage(), 'Unable to parse xml')) { + // Allow this particular exception from FeedExpander + Logger::warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e))); + continue; + } + throw $e; + } + } else { + $this->collectExpandableDatas($feed); + } } // Sort by timestamp descending @@ -91,6 +111,6 @@ public function getIcon() public function getName() { - return $this->getInput('feed_name') ?: 'rss-bridge/FeedMerger'; + return $this->getInput('feed_name') ?: 'FeedMerge'; } } diff --git a/bridges/FilterBridge.php b/bridges/FilterBridge.php index 61ce6d78e23..992fe0c3779 100644 --- a/bridges/FilterBridge.php +++ b/bridges/FilterBridge.php @@ -80,10 +80,15 @@ protected function parseItem($newItem) // Generate title from first 50 characters of content? if ($this->getInput('title_from_content') && array_key_exists('content', $item)) { $content = str_get_html($item['content']); - $pos = strpos($item['content'], ' ', 50); - $item['title'] = substr($content->plaintext, 0, $pos); - if (strlen($content->plaintext) >= $pos) { - $item['title'] .= '...'; + $plaintext = $content->plaintext; + if (mb_strlen($plaintext) < 51) { + $item['title'] = $plaintext; + } else { + $pos = strpos($item['content'], ' ', 50); + $item['title'] = substr($plaintext, 0, $pos); + if (strlen($plaintext) >= $pos) { + $item['title'] .= '...'; + } } } diff --git a/bridges/FurAffinityBridge.php b/bridges/FurAffinityBridge.php index 6185de87ace..c548ff65d99 100644 --- a/bridges/FurAffinityBridge.php +++ b/bridges/FurAffinityBridge.php @@ -900,10 +900,16 @@ private function itemsFromSubmissionList($html, $limit) $submissionHTML = $this->getFASimpleHTMLDOM($submissionURL, $cache); $stats = $submissionHTML->find('.stats-container', 0); - $item['timestamp'] = strtotime($stats->find('.popup_date', 0)->title); - $item['enclosures'] = [ - $submissionHTML->find('.actions a[href^=https://d.facdn]', 0)->href - ]; + $popupDate = $stats->find('.popup_date', 0); + if ($popupDate) { + $item['timestamp'] = strtotime($popupDate->title); + } + + $var = $submissionHTML->find('.actions a[href^=https://d.facdn]', 0); + if ($var) { + $item['enclosures'] = [$var->href]; + } + foreach ($stats->find('#keywords a') as $keyword) { $item['categories'][] = $keyword->plaintext; } diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 97926bed1d5..0aa394fa9cc 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -90,7 +90,7 @@ protected function parseItem($newsItem) $item = parent::parseItem($newsItem); $item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']); $article = getSimpleHTMLDOMCached($item['uri']); - $item['content'] = $this->extractArticleContent($article); + //$item['content'] = $this->extractArticleContent($article); $author = $this->extractAuthor($article); if (!empty($author)) { $item['author'] = $author; diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index c7b7c37f2d9..61dccb1a6b9 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -20,11 +20,13 @@ public function collectData() $apiUrl = self::URI . $api_endpoint . http_build_query($params); $rawContent = getContents($apiUrl); - $cleanedContent = str_replace('\r\n', '', substr($rawContent, 1, -1)); - $cleanedContent = str_replace('\"', '"', $cleanedContent); + $cleanedContent = str_replace([ + '', + '', + ], '', $rawContent); // The content is actually a json between quotes with \r\n inserted - $json = json_decode($cleanedContent); + $json = Json::decode($cleanedContent, false); foreach ($json as $article) { $item = []; @@ -57,8 +59,10 @@ protected function getItemContent($articleUri) $article_html = defaultLinkTo($article_html, $this->getURI()); $top_description = '

' . $article_html->find('div.article_top_description', 0)->innertext . '

'; - $hero_image = 'getAttribute('data-src') . '>'; - + $heroImage = $article_html->find('img.article_top_DMT_Image', 0); + if ($heroImage) { + $hero_image = ''; + } $article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0); // Remove the menu bar on some articles (PDF download etc.) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index 9ca84010183..e4e995e30c4 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -5,7 +5,7 @@ class GithubIssueBridge extends BridgeAbstract const MAINTAINER = 'Pierre Mazière'; const NAME = 'Github Issue'; const URI = 'https://github.com/'; - const CACHE_TIMEOUT = 0; // 10min + const CACHE_TIMEOUT = 600; // 10m const DESCRIPTION = 'Returns the issues or comments of an issue of a github project'; const PARAMETERS = [ @@ -137,7 +137,8 @@ private function extractIssueComment($issueNbr, $title, $comment) { $uri = $this->buildGitHubIssueCommentUri($issueNbr, $comment->id); - $author = $comment->find('.author', 0)->plaintext; + $authorDom = $comment->find('.author', 0); + $author = $authorDom->plaintext ?? null; $header = $comment->find('.timeline-comment-header > h3', 0); $title .= ' / ' . ($header ? $header->plaintext : 'Activity'); diff --git a/bridges/GizmodoBridge.php b/bridges/GizmodoBridge.php index 64e2fc8ae36..8ed30704152 100644 --- a/bridges/GizmodoBridge.php +++ b/bridges/GizmodoBridge.php @@ -22,7 +22,7 @@ protected function parseItem($item) // Get header image $image = $html->find('meta[property="og:image"]', 0)->content; - $item['content'] = $html->find('div.js_post-content', 0)->innertext; + $item['content'] = $html->find('div.js_post-content', 0)->innertext ?? ''; // Get categories $categories = explode(',', $html->find('meta[name="keywords"]', 0)->content); diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index 58c07984f73..96fa450631d 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -115,7 +115,7 @@ private function extractContent($page) // delete known bad elements foreach ( $article->find('div[id*="adtile"], #job-market, #seminars, iframe, - div.gbox_affiliate, div.toc, .embedcontent') as $bad + div.gbox_affiliate, div.toc, .embedcontent, script') as $bad ) { $bad->remove(); } diff --git a/bridges/GoogleScholarBridge.php b/bridges/GoogleScholarBridge.php index 932efb5b3d8..981355dd32a 100644 --- a/bridges/GoogleScholarBridge.php +++ b/bridges/GoogleScholarBridge.php @@ -2,19 +2,101 @@ class GoogleScholarBridge extends BridgeAbstract { - const NAME = 'Goolge Scholar'; + const NAME = 'Google Scholar v2'; const URI = 'https://scholar.google.com/'; - const DESCRIPTION = 'Follow authors of scientific publications.'; - const MAINTAINER = 'thefranke'; + const DESCRIPTION = 'Search for publications or follow authors on Google Scholar.'; + const MAINTAINER = 'nicholasmccarthy'; const CACHE_TIMEOUT = 86400; // 24h - const PARAMETERS = [[ - 'userId' => [ - 'name' => 'User ID', - 'exampleValue' => 'qc6CJjYAAAAJ', - 'required' => true - ] - ]]; + const PARAMETERS = [ + 'user' => [ + 'userId' => [ + 'name' => 'User ID', + 'exampleValue' => 'qc6CJjYAAAAJ', + 'required' => true + ] + ], + 'query' => [ + 'q' => [ + 'name' => 'Search Query', + 'title' => 'Search Query', + 'required' => true, + 'exampleValue' => 'machine learning' + ], + 'cites' => [ + 'name' => 'Cites', + 'required' => false, + 'default' => '', + 'exampleValue' => '1275980731835430123', + 'title' => 'Parameter defines unique ID for an article to trigger Cited By searches. Usage of cites + will bring up a list of citing documents in Google Scholar. Example value: cites=1275980731835430123. + Usage of cites and q parameters triggers search within citing articles.' + ], + 'language' => [ + 'name' => 'Language', + 'required' => false, + 'default' => '', + 'exampleValue' => 'en', + 'title' => 'Parameter defines the language to use for the Google Scholar search. ' + ], + 'minCitations' => [ + 'name' => 'Minimum Citations', + 'required' => false, + 'type' => 'number', + 'default' => '0', + 'title' => 'Parameter defines the minimum number of citations in order for the results to be included.' + ], + 'sinceYear' => [ + 'name' => 'Since Year', + 'required' => false, + 'type' => 'number', + 'default' => '0', + 'title' => 'Parameter defines the year from which you want the results to be included.' + ], + 'untilYear' => [ + 'name' => 'Until Year', + 'required' => false, + 'type' => 'number', + 'default' => '0', + 'title' => 'Parameter defines the year until which you want the results to be included.' + ], + 'sortBy' => [ + 'name' => 'Sort By Date', + 'type' => 'checkbox', + 'default' => false, + 'title' => 'Parameter defines articles added in the last year, sorted by date. Alternatively sorts + by relevance. This overrides Since-Until Year values.', + ], + 'includePatents' => [ + 'name' => 'Include Patents', + 'type' => 'checkbox', + 'default' => false, + 'title' => 'Include Patents', + ], + 'includeCitations' => [ + 'name' => 'Include Citations', + 'type' => 'checkbox', + 'default' => true, + 'title' => 'Parameter defines whether you would like to include citations or not.', + ], + 'reviewArticles' => [ + 'name' => 'Only Review Articles', + 'type' => 'checkbox', + 'default' => false, + 'title' => 'Parameter defines whether you would like to show only review articles or not (these + articles consist of topic reviews, or discuss the works or authors you have searched for).', + ], + 'numResults' => [ + 'name' => 'Number of Results (max 20)', + 'required' => false, + 'type' => 'number', + 'default' => 10, + 'exampleValue' => 10, + 'title' => 'Number of results to return' + ] + ], + ]; + public function getIcon() { @@ -23,58 +105,138 @@ public function getIcon() public function collectData() { - $uri = self::URI . '/citations?hl=en&view_op=list_works&sortby=pubdate&user=' . $this->getInput('userId'); - - $html = getSimpleHTMLDOM($uri) - or returnServerError('Could not fetch Google Scholar data.'); - - $publications = $html->find('tr[class="gsc_a_tr"]'); - - foreach ($publications as $publication) { - $articleUrl = self::URI . htmlspecialchars_decode($publication->find('a[class="gsc_a_at"]', 0)->href); - $articleTitle = $publication->find('a[class="gsc_a_at"]', 0)->plaintext; - - # fetch the article itself to extract rest of content - $contentArticle = getSimpleHTMLDOMCached($articleUrl); - $articleEntries = $contentArticle->find('div[class="gs_scl"]'); - - $articleDate = ''; - $articleAbstract = ''; - $articleAuthor = ''; - $content = ''; - - foreach ($articleEntries as $entry) { - $field = $entry->find('div[class="gsc_oci_field"]', 0)->plaintext; - $value = $entry->find('div[class="gsc_oci_value"]', 0)->plaintext; - - if ($field == 'Publication date') { - $articleDate = $value; - } else if ($field == 'Description') { - $articleAbstract = $value; - } else if ($field == 'Authors') { - $articleAuthor = $value; - } else if ($field == 'Scholar articles' || $field == 'Total citations') { - continue; - } else { - $content = $content . $field . ': ' . $value . '

'; + switch ($this->queriedContext) { + case 'user': + $userId = $this->getInput('userId'); + $uri = self::URI . '/citations?hl=en&view_op=list_works&sortby=pubdate&user=' . $userId; + $html = getSimpleHTMLDOM($uri) or returnServerError('Could not fetch Google Scholar data.'); + + $publications = $html->find('tr[class="gsc_a_tr"]'); + + foreach ($publications as $publication) { + $articleUrl = self::URI . htmlspecialchars_decode($publication->find('a[class="gsc_a_at"]', 0)->href); + $articleTitle = $publication->find('a[class="gsc_a_at"]', 0)->plaintext; + + # fetch the article itself to extract rest of content + $contentArticle = getSimpleHTMLDOMCached($articleUrl); + $articleEntries = $contentArticle->find('div[class="gs_scl"]'); + + $articleDate = ''; + $articleAbstract = ''; + $articleAuthor = ''; + $content = ''; + + foreach ($articleEntries as $entry) { + $field = $entry->find('div[class="gsc_oci_field"]', 0)->plaintext; + $value = $entry->find('div[class="gsc_oci_value"]', 0)->plaintext; + + if ($field == 'Publication date') { + $articleDate = $value; + } elseif ($field == 'Description') { + $articleAbstract = $value; + } elseif ($field == 'Authors') { + $articleAuthor = $value; + } elseif ($field == 'Scholar articles' || $field == 'Total citations') { + continue; + } else { + $content = $content . $field . ': ' . $value . '

'; + } + } + + $content = $content . $articleAbstract; + + $item = []; + + $item['title'] = $articleTitle; + $item['uri'] = $articleUrl; + $item['timestamp'] = strtotime($articleDate); + $item['author'] = $articleAuthor; + $item['content'] = $content; + + $this->items[] = $item; + + if (count($this->items) >= 10) { + break; + } + } + break; + case 'query': + $query = urlencode($this->getInput('q')); + $cites = $this->getInput('cites'); + $language = $this->getInput('language'); + $sinceYear = $this->getInput('sinceYear'); + $untilYear = $this->getInput('untilYear'); + $minCitations = (int)$this->getInput('minCitations'); + $includeCitations = $this->getInput('includeCitations'); + $includePatents = $this->getInput('includePatents'); + $reviewArticles = $this->getInput('reviewArticles'); + $sortBy = $this->getInput('sortBy'); + $numResults = $this->getInput('numResults'); + + # Build URI + $uri = self::URI . 'scholar?q=' . $query; + $uri .= $sinceYear != 0 ? '&as_ylo=' . $sinceYear : ''; + $uri .= $untilYear != 0 ? '&as_yhi=' . $untilYear : ''; + $uri .= $language != '' ? '&hl=' . $language : ''; + $uri .= $includePatents ? '&as_vis=7' : '&as_vis=0'; + $uri .= $includeCitations ? '&as_vis=0' : ($includePatents ? '&as_vis=1' : ''); + $uri .= $reviewArticles ? '&as_rr=1' : ''; + $uri .= $sortBy ? '&scisbd=1' : ''; + $uri .= $numResults ? '&num=' . $numResults : ''; + + $html = getSimpleHTMLDOM($uri) or returnServerError('Could not fetch Google Scholar data.'); + + $publications = $html->find('div[class="gs_r gs_or gs_scl"]'); + + foreach ($publications as $publication) { + $articleTitleElement = $publication->find('h3[class="gs_rt"]', 0); + $articleUrl = $articleTitleElement->find('a', 0)->href; + $articleTitle = $articleTitleElement->plaintext; + + $articleDateElement = $publication->find('div[class="gs_a"]', 0); + $articleDate = $articleDateElement ? $articleDateElement->plaintext : ''; + + $articleAbstractElement = $publication->find('div[class="gs_rs"]', 0); + $articleAbstract = $articleAbstractElement ? $articleAbstractElement->plaintext : ''; + + $articleAuthorElement = $publication->find('div[class="gs_a"]', 0); + $articleAuthor = $articleAuthorElement ? $articleAuthorElement->plaintext : ''; + + $bottomRowElement = $publication->find('div[class="gs_fl"]', 0); + + $item = [ + 'title' => $articleTitle, + 'uri' => $articleUrl, + 'timestamp' => strtotime($articleDate), + 'author' => $articleAuthor, + 'content' => $articleAbstract + ]; + + switch ($this->queriedContext) { + case 'user': + $this->items[] = $item; + break; + case 'query': + $citedBy = 0; + if ($bottomRowElement) { + $anchorTags = $bottomRowElement->find('a'); + foreach ($anchorTags as $anchorTag) { + if (strpos($anchorTag->plaintext, 'Cited') !== false) { + $parts = explode('Cited by ', $anchorTag->plaintext); + if (isset($parts[1])) { + $citedBy = (int)$parts[1]; + } + break; + } + } + } + if ($citedBy >= $minCitations) { + $this->items[] = $item; + } + break; + } } - } - - $content = $content . $articleAbstract; - - $item = []; - - $item['title'] = $articleTitle; - $item['uri'] = $articleUrl; - $item['timestamp'] = strtotime($articleDate); - $item['author'] = $articleAuthor; - $item['content'] = $content; - - $this->items[] = $item; - - if (count($this->items) >= 10) { break; - } } } } diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 59465e89e7a..9b0713acaf6 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -5,7 +5,7 @@ class GoogleSearchBridge extends BridgeAbstract const MAINTAINER = 'sebsauvage'; const NAME = 'Google search'; const URI = 'https://www.google.com/'; - const CACHE_TIMEOUT = 1800; // 30min + const CACHE_TIMEOUT = 60 * 30; // 30m const DESCRIPTION = 'Returns max 100 results from the past year.'; const PARAMETERS = [[ diff --git a/bridges/ImgsedBridge.php b/bridges/ImgsedBridge.php new file mode 100644 index 00000000000..1555c578a0f --- /dev/null +++ b/bridges/ImgsedBridge.php @@ -0,0 +1,259 @@ + [ + 'u' => [ + 'name' => 'username', + 'type' => 'text', + 'title' => 'Instagram username you want to follow', + 'exampleValue' => 'aesoprockwins', + 'required' => true, + ], + 'post' => [ + 'name' => 'posts', + 'type' => 'checkbox', + 'title' => 'Show posts for this Instagram user', + 'defaultValue' => 'checked', + ], + 'story' => [ + 'name' => 'stories', + 'type' => 'checkbox', + 'title' => 'Show stories for this Instagram user', + ], + 'tagged' => [ + 'name' => 'tagged', + 'type' => 'checkbox', + 'title' => 'Show tagged post for this Instagram user', + ], + ] + ]; + + public function getURI() + { + if (!is_null($this->getInput('u'))) { + return urljoin(self::URI, '/' . $this->getInput('u') . '/'); + } + + return parent::getURI(); + } + + public function collectData() + { + $username = $this->getInput('u'); + try { + // Check if the user exist + $html = getSimpleHTMLDOMCached(self::URI . $username . '/'); + if ($this->getInput('post')) { + $this->collectPosts(); + } + if ($this->getInput('story')) { + $this->collectStories(); + } + if ($this->getInput('tagged')) { + $this->collectTaggeds(); + } + } catch (HttpException $e) { + throw new \Exception(sprintf('Unable to find user `%s`', $username)); + } + } + + private function collectPosts() + { + $username = $this->getInput('u'); + $html = getSimpleHTMLDOMCached(self::URI . $username . '/'); + $html = defaultLinkTo($html, self::URI); + + foreach ($html->find('div[class=item]') as $post) { + $url = $post->find('a', 0)->href; + $instagramURL = $this->convertURLToInstagram($url); + $date = $this->parseDate($post->find('div[class=time]', 0)->plaintext); + $description = $post->find('img', 0)->alt; + $imageUrl = $post->find('img', 0)->src; + // Sometimes, there is some lazy image instead of the real URL + if ($imageUrl == 'https://imgsed.com/img/lazy.jpg') { + $imageUrl = $post->find('img', 0)->getAttribute('data-src'); + } + $download = $post->find('a[class=download]', 0)->href; + $author = $username; + $uid = $post->find('a', 0)->href; + $title = 'Post - ' . $username . ' - ' . $this->descriptionToTitle($description); + + // Checking post type + $isVideo = (bool) $post->find('i[class=video]', 0); + $videoNote = $isVideo ? '

(video)

' : ''; + + $isMoreContent = (bool) $post->find('svg', 0); + $moreContentNote = $isMoreContent ? '

(multiple images and/or videos)

' : ''; + + + + + $this->items[] = [ + 'uri' => $url, + 'author' => $author, + 'timestamp' => $date, + 'title' => $title, + 'thumbnail' => $imageUrl, + 'enclosures' => [$imageUrl, $download], + 'content' => << + {$description} + +{$videoNote} +{$moreContentNote} +

{$description}

+

Download

+

Display on Instagram

+HTML, + 'uid' => $uid + ]; + } + } + + private function collectStories() + { + try { + $username = $this->getInput('u'); + $html = getSimpleHTMLDOMCached(self::URI . 'api/media/?name=' . $username); + $json = Json::decode($html); + + foreach ($json as $post) { + $url = $post['src']; + $imageUrl = $post['thumb']; + $download = $url; + $author = $username; + $uid = $url; + $title = 'Story - ' . $username; + + $this->items[] = [ + 'uri' => $url, + 'author' => $author, + 'title' => $title, + 'thumbnail' => $imageUrl, + 'enclosures' => [$imageUrl, $download], + 'content' => << + story + +

Download

+ HTML, + 'uid' => $uid + ]; + } + } catch (Exception $e) { + // If it fails, it's because there are no stories, so don't do anything + } + } + + private function collectTaggeds() + { + $username = $this->getInput('u'); + try { + $html = getSimpleHTMLDOMCached(self::URI . 'tagged/' . $username . '/'); + $html = defaultLinkTo($html, self::URI); + + foreach ($html->find('div[class=item]') as $post) { + $url = $post->find('a', 1)->href; + $instagramURL = $this->convertURLToInstagram($url); + $fromURL = $post->find('div[class=username]', 0)->find('a', 0)->href; + $fromUsername = $post->find('div[class=username]', 0)->plaintext; + $date = $this->parseDate($post->find('div[class=time]', 0)->plaintext); + $description = $post->find('img', 0)->alt; + $imageUrl = $post->find('img', 0)->src; + $download = $post->find('a[class=download]', 0)->href; + $author = $fromUsername; + $uid = $post->find('a', 0)->href; + $title = 'Tagged - ' . $fromUsername . ' - ' . $this->descriptionToTitle($description); + + // Checking post type + $isVideo = (bool) $post->find('i[class=video]', 0); + $videoNote = $isVideo ? '

(video)

' : ''; + + $isMoreContent = (bool) $post->find('svg', 0); + $moreContentNote = $isMoreContent ? '

(multiple images and/or videos)

' : ''; + + + $this->items[] = [ + 'uri' => $url, + 'author' => $author, + 'timestamp' => $date, + 'title' => $title, + 'thumbnail' => $imageUrl, + 'enclosures' => [$imageUrl, $download], + 'content' => << + {$description} + +{$videoNote} +{$moreContentNote} +

From {$fromUsername}

+

{$description}

+

Download

+

Display on Instagram

+HTML, + 'uid' => $uid + ]; + } + } catch (Exception $e) { + // If it fails, it's because the account was not tagged + } + } + + // Parse date, and transform the date into a timetamp, even in a case of a relative date + private function parseDate($content) + { + $date = date_create(); + $dateString = str_replace(' ago', '', $content); + $relativeDate = date_interval_create_from_date_string($dateString); + if ($relativeDate) { + date_sub($date, $relativeDate); + } else { + Logger::info(sprintf('Unable to parse date string: %s', $dateString)); + } + return date_format($date, 'r'); + } + + private function convertURLToInstagram($url) + { + return str_replace(self::URI, self::INSTAGRAMURI, $url); + } + private function descriptionToTitle($description) + { + return strlen($description) > 60 ? mb_substr($description, 0, 57) . '...' : $description; + } + + public function getName() + { + if (!is_null($this->getInput('u'))) { + $types = []; + if ($this->getInput('post')) { + $types[] = 'Posts'; + } + if ($this->getInput('story')) { + $types[] = 'Stories'; + } + if ($this->getInput('tagged')) { + $types[] = 'Tags'; + } + $typesText = $types[0]; + if (count($types) > 1) { + for ($i = 1; $i < count($types) - 1; $i++) { + $typesText .= ', ' . $types[$i]; + } + $typesText .= ' & ' . $types[$i]; + } + + return 'Username ' . $this->getInput('u') . ' - ' . $typesText . ' - Imgsed Bridge'; + } + return parent::getName(); + } +} diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 1bfa2472718..714319067f4 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -98,9 +98,7 @@ protected function getInstagramUserId($username) return $username; } - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); + $cache = RssBridge::getCache(); $cache->setScope('InstagramBridge'); $cache->setKey([$username]); $key = $cache->loadData(); diff --git a/bridges/JohannesBlickBridge.php b/bridges/JohannesBlickBridge.php new file mode 100644 index 00000000000..6c00fecaba1 --- /dev/null +++ b/bridges/JohannesBlickBridge.php @@ -0,0 +1,29 @@ +find('td > a') as $index => $a) { + $item = []; // Create an empty item + $articlePath = $a->href; + $item['title'] = $a->innertext; + $item['uri'] = $articlePath; + $item['content'] = ''; + + $this->items[] = $item; // Add item to the list + if (count($this->items) >= 10) { + break; + } + } + } +} diff --git a/bridges/JornalDeNoticiasBridge.php b/bridges/JornalDeNoticiasBridge.php deleted file mode 100644 index 1549d04f121..00000000000 --- a/bridges/JornalDeNoticiasBridge.php +++ /dev/null @@ -1,59 +0,0 @@ - [ - 'url' => [ - 'name' => 'URL (relative)', - 'exampleValue' => 'opiniao/catia-domingues.html', - ] - ] - ]; - - public function getIcon() - { - return 'https://static.globalnoticias.pt/jn/common/images/favicons/favicon-128.png'; - } - - public function getURI() - { - switch ($this->queriedContext) { - case 'URL': - $url = self::URI . '/' . $this->getInput('url'); - break; - default: - $url = self::URI; - } - return $url; - } - - public function collectData() - { - $archives = $this->getURI(); - $html = getSimpleHTMLDOMCached($archives); - - foreach ($html->find('article') as $element) { - $item = []; - - $title = $element->find('h2 a', 0); - $link = $element->find('h2 a', 0); - $auth = $element->find('h3 a', 0); - - $item['title'] = $title->plaintext; - $item['uri'] = self::URI . $link->href; - $item['author'] = $auth->plaintext; - - $snippet = $element->find('h4 a', 0); - if ($snippet) { - $item['content'] = $snippet->plaintext; - } - - $this->items[] = $item; - } - } -} diff --git a/bridges/JornalNBridge.php b/bridges/JornalNBridge.php new file mode 100644 index 00000000000..2a9d6661455 --- /dev/null +++ b/bridges/JornalNBridge.php @@ -0,0 +1,104 @@ + [ + 'name' => 'News Feed', + 'type' => 'list', + 'title' => 'Feeds from the Portuguese sports newspaper A BOLA.PT', + 'values' => [ + 'Concelhos' => [ + 'Espinho' => 'espinho', + 'Ovar' => 'ovar', + 'Santa Maria da Feira' => 'santa-maria-da-feira', + ], + 'Cultura' => 'ovar/cultura', + 'Desporto' => 'desporto', + 'Economia' => 'santa-maria-da-feira/economia', + 'Política' => 'santa-maria-da-feira/politica', + 'Opinião' => 'santa-maria-da-feira/opiniao', + 'Sociedade' => 'santa-maria-da-feira/sociedade', + ] + ] + ] + ]; + + const PT_MONTH_NAMES = [ + 'janeiro' => '01', + 'fevereiro' => '02', + 'março' => '03', + 'abril' => '04', + 'maio' => '05', + 'junho' => '06', + 'julho' => '07', + 'agosto' => '08', + 'setembro' => '09', + 'outubro' => '10', + 'novembro' => '11', + 'dezembro' => '12', + ]; + + public function getIcon() + { + return 'https://www.jornaln.pt/wp-content/uploads/2023/01/cropped-NovoLogoJornal_Instagram-192x192.png'; + } + + public function getName() + { + if ($this->getKey('feed')) { + return self::NAME . ' | ' . $this->getKey('feed'); + } + return self::NAME; + } + + public function getURI() + { + return self::URI . $this->getInput('feed'); + } + + public function collectData() + { + $url = sprintf(self::URI . '/%s', $this->getInput('feed')); + $dom = getSimpleHTMLDOMCached($url); + $domSelector = '.elementor-widget-container > .elementor-posts-container'; + $dom = $dom->find($domSelector, 0); + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + foreach ($dom->find('article') as $article) { + //Get thumbnail + $image = $article->find('.elementor-post__thumbnail img', 0)->src; + //Timestamp + $date = $article->find('.elementor-post-date', 0)->plaintext; + $date = trim($date, "\t "); + $date = preg_replace('/ de /i', '/', $date); + $date = preg_replace('/, /', '/', $date); + $date = explode('/', $date); + $year = (int) $date[2]; + $month = (int) $date[1]; + $day = (int) $date[0]; + foreach (self::PT_MONTH_NAMES as $key => $item) { + if ($key === strtolower($month)) { + $month = (int) $item; + } + } + $timestamp = mktime(0, 0, 0, $month, $day, $year); + //Content + $content = '' . $article->find('.elementor-post__title > a', 0)->plaintext . ''; + $this->items[] = [ + 'title' => $article->find('.elementor-post__title > a', 0)->plaintext, + 'uri' => $article->find('a', 0)->href, + 'content' => $content, + 'timestamp' => $timestamp + ]; + } + } +} diff --git a/bridges/JustWatchBridge.php b/bridges/JustWatchBridge.php index 59e60a16d47..66b61aff4b3 100644 --- a/bridges/JustWatchBridge.php +++ b/bridges/JustWatchBridge.php @@ -169,10 +169,17 @@ public function collectData() foreach ($titles as $title) { $item = []; $item['uri'] = $title->find('a', 0)->href; - $item['title'] = $provider->find('picture > img', 0)->alt . ' - ' . $title->find('.title-poster__image > img', 0)->alt; - $image = $title->find('.title-poster__image > img', 0)->attr['src']; - if (str_starts_with($image, 'data')) { - $image = $title->find('.title-poster__image > img', 0)->attr['data-src']; + + $itemTitle = sprintf( + '%s - %s', + $provider->find('picture > img', 0)->alt ?? '', + $title->find('.title-poster__image > img', 0)->alt ?? '' + ); + $item['title'] = $itemTitle; + + $imageUrl = $title->find('.title-poster__image > img', 0)->attr['src'] ?? ''; + if (str_starts_with($imageUrl, 'data')) { + $imageUrl = $title->find('.title-poster__image > img', 0)->attr['data-src']; } $content = 'Provider: ' @@ -190,7 +197,7 @@ public function collectData() $content .= 'Poster:
'; $item['content'] = $content; diff --git a/bridges/NationalGeographicBridge.php b/bridges/NationalGeographicBridge.php index 79a0600b27d..f7572240adc 100644 --- a/bridges/NationalGeographicBridge.php +++ b/bridges/NationalGeographicBridge.php @@ -319,7 +319,7 @@ private function getFullArticle($uri) $content .= $module['note']; break; case 'listicle': - $content .= '

' . $module['title'] . '

'; + $content .= '

' . ($module['title'] ?? '(no title)') . '

'; if (isset($module['image'])) { $content .= $this->handleImages($module['image'], $module['image']['cmsType']); } diff --git a/bridges/NotAlwaysBridge.php b/bridges/NotAlwaysBridge.php index 49b6f663997..6f1a8c008a3 100644 --- a/bridges/NotAlwaysBridge.php +++ b/bridges/NotAlwaysBridge.php @@ -19,8 +19,10 @@ class NotAlwaysBridge extends BridgeAbstract 'Romantic' => 'romantic', 'Related' => 'related', 'Learning' => 'learning', - 'Friendly' => 'friendly', 'Hopeless' => 'hopeless', + 'Healthy' => 'healthy', + 'Legal' => 'legal', + 'Friendly' => 'friendly', 'Unfiltered' => 'unfiltered' ] ] @@ -38,7 +40,9 @@ public function collectData() #print_r($post); $item = []; $item['uri'] = $post->find('h1', 0)->find('a', 0)->href; - $item['content'] = $post; + $postHeader = $post->find('.post_header', 0); + $storyContent = $post->find('.storycontent', 0); + $item['content'] = $postHeader . '

' . $storyContent; $item['title'] = $post->find('h1', 0)->find('a', 0)->innertext; $this->items[] = $item; } diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index e281b79d69c..da3c34f563d 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -2,10 +2,24 @@ class NyaaTorrentsBridge extends FeedExpander { - const MAINTAINER = 'ORelio'; + const MAINTAINER = 'ORelio & Jisagi'; const NAME = 'NyaaTorrents'; const URI = 'https://nyaa.si/'; const DESCRIPTION = 'Returns the newest torrents, with optional search criteria.'; + const MAX_ITEMS = 20; + const CUSTOM_FIELD_PREFIX = 'nyaa:'; + const CUSTOM_FIELDS = [ + self::CUSTOM_FIELD_PREFIX . 'seeders' => 'seeders', + self::CUSTOM_FIELD_PREFIX . 'leechers' => 'leechers', + self::CUSTOM_FIELD_PREFIX . 'downloads' => 'downloads', + self::CUSTOM_FIELD_PREFIX . 'infoHash' => 'infoHash', + self::CUSTOM_FIELD_PREFIX . 'categoryId' => 'categoryId', + self::CUSTOM_FIELD_PREFIX . 'category' => 'category', + self::CUSTOM_FIELD_PREFIX . 'size' => 'size', + self::CUSTOM_FIELD_PREFIX . 'comments' => 'comments', + self::CUSTOM_FIELD_PREFIX . 'trusted' => 'trusted', + self::CUSTOM_FIELD_PREFIX . 'remake' => 'remake' + ]; const PARAMETERS = [ [ 'f' => [ @@ -65,23 +79,41 @@ public function getIcon() return self::URI . 'static/favicon.png'; } - public function collectData() + public function getURI() { - $this->collectExpandableDatas( - self::URI . '?page=rss&s=id&o=desc&' + return self::URI . '?page=rss&s=id&o=desc&' . http_build_query([ 'f' => $this->getInput('f'), 'c' => $this->getInput('c'), 'q' => $this->getInput('q'), 'u' => $this->getInput('u') - ]), - 20 - ); + ]); + } + + public function collectData() + { + $content = getContents($this->getURI()); + $content = $this->fixCustomFields($content); + $rssContent = simplexml_load_string(trim($content)); + $this->collectRss2($rssContent, self::MAX_ITEMS); + } + + private function fixCustomFields($content) + { + $broken = array_keys(self::CUSTOM_FIELDS); + $fixed = array_values(self::CUSTOM_FIELDS); + return str_replace($broken, $fixed, $content); } protected function parseItem($newItem) { - $item = parent::parseItem($newItem); + $item = parent::parseRss2Item($newItem); + + // Add nyaa custom fields + $item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']); + foreach (array_values(self::CUSTOM_FIELDS) as $value) { + $item[$value] = (string) $newItem->$value; + } //Convert URI from torrent file to web page $item['uri'] = str_replace('/download/', '/view/', $item['uri']); diff --git a/bridges/OMonlineBridge.php b/bridges/OMonlineBridge.php new file mode 100644 index 00000000000..a434e44e41d --- /dev/null +++ b/bridges/OMonlineBridge.php @@ -0,0 +1,72 @@ + [ + 'name' => 'Ortsname', + 'title' => 'Für die Anzeige von Beitragen nur aus einem Ort oder mehreren Orten + geben einen Orstnamen ein. Mehrere Ortsnamen müssen mit / getrennt eingeben werden, + z.B. Vechta/Cloppenburg. Groß- und Kleinschreibung beachten!' + ] + ] + ]; + + public function collectData() + { + if (!empty($this->getInput('ort'))) { + $url = sprintf('%s/ort/%s', self::URI, $this->getInput('ort')); + } else { + $url = sprintf('%s', self::URI); + } + + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request: ' . $url); + + $html = defaultLinkTo($html, $url); + + foreach ($html->find('div.molecule-teaser > a ') as $index => $a) { + $item = []; + + $articlePath = $a->href; + + $articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT) + or returnServerError('Could not request: ' . $articlePath); + + $articlePageHtml = defaultLinkTo($articlePageHtml, self::URI); + + $contents = $articlePageHtml->find('div.molecule-article', 0); + + $item['uri'] = $articlePath; + $item['title'] = $contents->find('h1', 0)->innertext; + + $contents->find('div.col-12 col-md-10 offset-0 offset-md-1', 0); + + $item['content'] = $contents->innertext; + $item['timestamp'] = $this->extractDate2($a->plaintext); + $this->items[] = $item; + + if (count($this->items) >= 10) { + break; + } + } + } + + private function extractDate2($text) + { + $dateRegex = '/^([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/'; + + $text = trim($text); + + if (preg_match($dateRegex, $text, $matches)) { + return $matches[1]; + } + + return ''; + } +} diff --git a/bridges/PatreonBridge.php b/bridges/PatreonBridge.php index fdf84e7edc8..b64102dab65 100644 --- a/bridges/PatreonBridge.php +++ b/bridges/PatreonBridge.php @@ -100,12 +100,14 @@ public function collectData() ); $item['author'] = $user->full_name; - if (isset($post->attributes->image)) { - $item['content'] .= '

'; + $image = $post->attributes->image ?? null; + if ($image) { + $logo = sprintf( + '

', + $post->attributes->url, + $image->thumb_url ?? $image->url ?? $this->getURI() + ); + $item['content'] .= $logo; } if (isset($post->attributes->content)) { diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 4ecd4c43058..85178e54375 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -129,7 +129,7 @@ protected function collectDeals($url) // Find the text corresponding to the clock $spanDateDiv = $clock->parent()->find('span[class=hide--toW3]', 0); - $itemDate = $spanDateDiv->plaintext; + $itemDate = $spanDateDiv->plaintext ?? ''; // In case of a Local deal, there is no date, but we can use // this case for other reason (like date not in the last field) if ($this->contains($itemDate, $this->i8n('localdeal'))) { @@ -481,12 +481,12 @@ private function getShipsFrom($deal) ] ); if ($deal->find('span[class*=' . $selector . ']', 0) != null) { - return '
' - . $deal->find('span[class*=' . $selector . ']', 0)->children(2)->plaintext - . '
'; - } else { - return ''; + $children = $deal->find('span[class*=' . $selector . ']', 0)->children(2); + if ($children) { + return '
' . $children->plaintext . '
'; + } } + return ''; } /** diff --git a/bridges/PicalaBridge.php b/bridges/PicalaBridge.php index 35f73d0a842..fa2542cfcd5 100644 --- a/bridges/PicalaBridge.php +++ b/bridges/PicalaBridge.php @@ -58,17 +58,26 @@ public function collectData() { $fullhtml = getSimpleHTMLDOM($this->getURI()); foreach ($fullhtml->find('.list-container-category a') as $article) { - $srcsets = explode(',', $article->find('img', 0)->getAttribute('srcset')); - $image = explode(' ', trim(array_shift($srcsets)))[0]; + $firstImage = $article->find('img', 0); + $image = null; + if ($firstImage !== null) { + $srcsets = explode(',', $firstImage->getAttribute('srcset')); + $image = explode(' ', trim(array_shift($srcsets)))[0]; + } $item = []; $item['uri'] = self::URI . $article->href; $item['title'] = $article->find('h2', 0)->plaintext; - $item['content'] = sprintf( - '
%s', - $image, - $article->find('.teaser__text', 0)->plaintext - ); + if ($image === null) { + $item['content'] = $article->find('.teaser__text', 0)->plaintext; + } else { + $item['content'] = sprintf( + '
%s', + $image, + $article->find('.teaser__text', 0)->plaintext + ); + } + $this->items[] = $item; } } diff --git a/bridges/PicnobBridge.php b/bridges/PicnobBridge.php index 6e010e64520..1d7d06b486e 100644 --- a/bridges/PicnobBridge.php +++ b/bridges/PicnobBridge.php @@ -6,7 +6,7 @@ class PicnobBridge extends BridgeAbstract const NAME = 'Picnob Bridge'; const URI = 'https://www.picnob.com/'; const CACHE_TIMEOUT = 3600; // 1h - const DESCRIPTION = 'Returns Picnob posts by user or by hashtag'; + const DESCRIPTION = 'Returns Picnob (Instagram viewer) posts by user or by hashtag'; const PARAMETERS = [ 'Username' => [ @@ -47,37 +47,36 @@ public function collectData() $html = getSimpleHTMLDOM($this->getURI()); foreach ($html->find('.items') as $part) { foreach ($part->find('.item') as $element) { - $url = urljoin(self::URI, $element->find('a', 0)->href); + $url = urljoin(self::URI, $element->find('a', 0)->href); + $date = date_create(); + $relativeDate = date_interval_create_from_date_string(str_replace(' ago', '', $element->find('.time', 0)->plaintext)); + if ($relativeDate) { + date_sub($date, $relativeDate); + } - $date = date_create(); - $relativeDate = str_replace(' ago', '', $element->find('.time', 0)->plaintext); - date_sub($date, date_interval_create_from_date_string($relativeDate)); + $description = defaultLinkTo(trim($element->find('.sum', 0)->innertext), self::URI); - $description = defaultLinkTo(trim($element->find('.sum', 0)->innertext), self::URI); + $isVideo = (bool) $element->find('.icon_video', 0); + $videoNote = $isVideo ? '

(video)

' : ''; - $isVideo = (bool) $element->find('.icon_video', 0); - $videoNote = $isVideo ? '

(video)

' : ''; + $isTV = (bool) $element->find('.icon_tv', 0); + $tvNote = $isTV ? '

(TV)

' : ''; - $isTV = (bool) $element->find('.icon_tv', 0); - $tvNote = $isTV ? '

(TV)

' : ''; + $isMoreContent = (bool) $element->find('.icon_multi', 0); + $moreContentNote = $isMoreContent ? '

(multiple images and/or videos)

' : ''; - $isMoreContent = (bool) $element->find('.icon_multi', 0); - $moreContentNote = $isMoreContent ? '

(multiple images and/or videos)

' : ''; + $imageUrl = $element->find('.img', 0)->getAttribute('data-src'); - $imageUrl = $element->find('.img', 0)->getAttribute('data-src'); - parse_str(parse_url($imageUrl, PHP_URL_QUERY), $imageVars); - $imageUrl = $imageVars['u']; + $uid = explode('/', parse_url($url, PHP_URL_PATH))[2]; - $uid = explode('/', parse_url($url, PHP_URL_PATH))[2]; - - $this->items[] = [ - 'uri' => $url, - 'timestamp' => date_format($date, 'r'), - 'title' => strlen($description) > 60 ? mb_substr($description, 0, 57) . '...' : $description, - 'thumbnail' => $imageUrl, - 'enclosures' => [$imageUrl], - 'content' => <<items[] = [ + 'uri' => $url, + 'timestamp' => date_format($date, 'r'), + 'title' => strlen($description) > 60 ? mb_substr($description, 0, 57) . '...' : $description, + 'thumbnail' => $imageUrl, + 'enclosures' => [$imageUrl], + 'content' => << @@ -86,8 +85,8 @@ public function collectData() {$moreContentNote}

{$description}

HTML, - 'uid' => $uid - ]; + 'uid' => $uid + ]; } } } diff --git a/bridges/PicukiBridge.php b/bridges/PicukiBridge.php index e90177ed5ab..f1d45e2acd8 100644 --- a/bridges/PicukiBridge.php +++ b/bridges/PicukiBridge.php @@ -6,9 +6,17 @@ class PicukiBridge extends BridgeAbstract const NAME = 'Picuki Bridge'; const URI = 'https://www.picuki.com/'; const CACHE_TIMEOUT = 3600; // 1h - const DESCRIPTION = 'Returns Picuki posts by user and by hashtag'; + const DESCRIPTION = 'Returns Picuki (Instagram viewer) posts by user and by hashtag'; const PARAMETERS = [ + 'global' => [ + 'count' => [ + 'name' => 'Count', + 'type' => 'number', + 'title' => 'How many posts to fetch', + 'defaultValue' => 12 + ] + ], 'Username' => [ 'u' => [ 'name' => 'username', @@ -43,6 +51,13 @@ public function collectData() $re = '#let short_code = "(.*?)";\s*$#m'; $html = getSimpleHTMLDOM($this->getURI()); + $requestedCount = $this->getInput('count'); + if ($requestedCount > 12) { + // Picuki shows 12 posts per page at initial load. + throw new \Exception('Maximum count is 12'); + } + + $count = 0; foreach ($html->find('.box-photos .box-photo') as $element) { // skip ad items if (in_array('adv', explode(' ', $element->class))) { @@ -86,14 +101,19 @@ public function collectData() 'source' => $sourceUrl, 'enclosures' => [$imageUrl], 'content' => << - - -{$sourceUrl} -{$videoNote} -

{$description}

-HTML + + + + {$sourceUrl} + {$videoNote} +

{$description}

+ HTML ]; + + $count++; + if ($count >= $requestedCount) { + break; + } } } diff --git a/bridges/PokemonNewsBridge.php b/bridges/PokemonNewsBridge.php index 954e584c5f2..3dacb163108 100644 --- a/bridges/PokemonNewsBridge.php +++ b/bridges/PokemonNewsBridge.php @@ -14,7 +14,10 @@ public function collectData() // todo: parse json api instead: https://www.pokemon.com/api/1/us/news/get-news.json $url = 'https://www.pokemon.com/us/pokemon-news'; $dom = getSimpleHTMLDOM($url); - + $haystack = (string)$dom; + if (str_contains($haystack, 'Request unsuccessful. Incapsula incident')) { + throw new \Exception('Blocked by anti-bot'); + } foreach ($dom->find('.news-list ul li') as $item) { $title = $item->find('h3', 0)->plaintext; $description = $item->find('p.hidden-mobile', 0); diff --git a/bridges/PornhubBridge.php b/bridges/PornhubBridge.php index 75ac4c0f5b1..104463a82df 100644 --- a/bridges/PornhubBridge.php +++ b/bridges/PornhubBridge.php @@ -67,7 +67,9 @@ public function collectData() $show_images = $this->getInput('show_images'); - $html = getSimpleHTMLDOM($uri); + $html = getSimpleHTMLDOM($uri, [ + 'cookie: accessAgeDisclaimerPH=1' + ]); foreach ($html->find('div.videoUList ul.videos li.videoblock') as $element) { $item = []; diff --git a/bridges/PresidenciaPTBridge.php b/bridges/PresidenciaPTBridge.php index 5afcc91fe19..8b02a481a27 100644 --- a/bridges/PresidenciaPTBridge.php +++ b/bridges/PresidenciaPTBridge.php @@ -61,9 +61,9 @@ public function collectData() $item = []; $link = $element->find('a', 0); - $etitle = $element->find('.content-box h2', 0); - $edts = $element->find('p', 1); - $edt = html_entity_decode($edts->innertext, ENT_HTML5); + $etitle = $element->find('.article-title', 0); + $edts = $element->find('.date', 0); + $edt = $edts->innertext; $item['title'] = strip_tags($etitle->innertext); $item['uri'] = self::URI . $link->href; diff --git a/bridges/QwantzBridge.php b/bridges/QwantzBridge.php new file mode 100644 index 00000000000..e48e948adf0 --- /dev/null +++ b/bridges/QwantzBridge.php @@ -0,0 +1,37 @@ +find('img')[0]->{'src'}; + $subject = $content->find('a')[1]->{'href'}; + $subject = urldecode(substr($subject, strpos($subject, 'subject') + 8)); + $p = (string)$content->find('P')[0]; + + $item['content'] = "{$subject}

{$title}

{$p}"; + + return $item; + } + + public function collectData() + { + $this->collectExpandableDatas(self::URI . 'rssfeed.php'); + } + + public function getIcon() + { + return self::URI . 'favicon.ico'; + } +} diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index de80f09d434..86d7884b2a2 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -73,47 +73,6 @@ class RedditBridge extends BridgeAbstract ] ]; - public function detectParameters($url) - { - $parsed_url = parse_url($url); - - $host = $parsed_url['host'] ?? null; - - if ($host != 'www.reddit.com' && $host != 'old.reddit.com') { - return null; - } - - $path = explode('/', $parsed_url['path']); - - if ($path[1] == 'r') { - return [ - 'r' => $path[2] - ]; - } elseif ($path[1] == 'user') { - return [ - 'u' => $path[2] - ]; - } else { - return null; - } - } - - public function getIcon() - { - return 'https://www.redditstatic.com/desktop2x/img/favicon/favicon-96x96.png'; - } - - public function getName() - { - if ($this->queriedContext == 'single') { - return 'Reddit r/' . $this->getInput('r'); - } elseif ($this->queriedContext == 'user') { - return 'Reddit u/' . $this->getInput('u'); - } else { - return self::NAME; - } - } - public function collectData() { $user = false; @@ -152,18 +111,22 @@ public function collectData() foreach ($subreddits as $subreddit) { $name = trim($subreddit); - $values = getContents(self::URI - . '/search.json?q=' - . $keywords - . $flair - . ($user ? 'author%3A' : 'subreddit%3A') - . $name - . '&sort=' - . $this->getInput('d') - . '&include_over_18=on'); - $decodedValues = json_decode($values); - - foreach ($decodedValues->data->children as $post) { + $url = self::URI + . '/search.json?q=' + . $keywords + . $flair + . ($user ? 'author%3A' : 'subreddit%3A') + . $name + . '&sort=' + . $this->getInput('d') + . '&include_over_18=on'; + + $version = 'v0.0.1'; + $useragent = "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"; + $json = getContents($url, ['User-Agent: ' . $useragent]); + $parsedJson = Json::decode($json, false); + + foreach ($parsedJson->data->children as $post) { if ($post->kind == 't1' && !$comments) { continue; } @@ -288,6 +251,22 @@ public function collectData() }); } + public function getIcon() + { + return 'https://www.redditstatic.com/desktop2x/img/favicon/favicon-96x96.png'; + } + + public function getName() + { + if ($this->queriedContext == 'single') { + return 'Reddit r/' . $this->getInput('r'); + } elseif ($this->queriedContext == 'user') { + return 'Reddit u/' . $this->getInput('u'); + } else { + return self::NAME; + } + } + private function encodePermalink($link) { return self::URI . implode( @@ -307,4 +286,29 @@ private function link($href, $text) { return '' . $text . ''; } + + public function detectParameters($url) + { + $parsed_url = parse_url($url); + + $host = $parsed_url['host'] ?? null; + + if ($host != 'www.reddit.com' && $host != 'old.reddit.com') { + return null; + } + + $path = explode('/', $parsed_url['path']); + + if ($path[1] == 'r') { + return [ + 'r' => $path[2] + ]; + } elseif ($path[1] == 'user') { + return [ + 'u' => $path[2] + ]; + } else { + return null; + } + } } diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php index 56946a4776c..4fd25b008fd 100644 --- a/bridges/Releases3DSBridge.php +++ b/bridges/Releases3DSBridge.php @@ -82,10 +82,10 @@ protected function collectDataUrl($dataUrl) $item = []; $item['title'] = $name; $item['author'] = $publisher; - $item['timestamp'] = $ignDate; - $item['enclosures'] = [$ignCoverArt]; + //$item['timestamp'] = $ignDate; + //$item['enclosures'] = [$ignCoverArt]; $item['uri'] = empty($ignLink) ? $searchLinkDuckDuckGo : $ignLink; - $item['content'] = $ignDescription . $releaseDescription . $releaseSearchLinks; + $item['content'] = $releaseDescription . $releaseSearchLinks; $this->items[] = $item; $limit++; } diff --git a/bridges/ReutersBridge.php b/bridges/ReutersBridge.php index ab2e812d3bd..2acadfc3edd 100644 --- a/bridges/ReutersBridge.php +++ b/bridges/ReutersBridge.php @@ -143,18 +143,6 @@ class ReutersBridge extends BridgeAbstract 'wire' ]; - /** - * Performs an HTTP request to the Reuters API and returns decoded JSON - * in the form of an associative array - * @param string $feed_uri Full API URL to fetch data - * @return array - */ - private function getJson($uri) - { - $returned_data = getContents($uri); - return json_decode($returned_data, true); - } - /** * Takes in data from Reuters Wire API and * creates structured data in the form of a list @@ -295,8 +283,19 @@ private function getArticle($feed_uri, $is_article_uid = false) { // This will make another request to API to get full detail of article and author's name. $url = $this->getAPIURL($feed_uri, 'article', $is_article_uid); - $rawData = $this->getJson($url); + try { + $json = getContents($url); + $rawData = Json::decode($json); + } catch (\JsonException $e) { + return [ + 'content' => '', + 'author' => '', + 'category' => '', + 'images' => '', + 'published_at' => '' + ]; + } $article_content = ''; $authorlist = ''; $category = []; @@ -342,15 +341,12 @@ private function handleImage($images) { $img_placeholder = ''; - foreach ($images as $image) { // Add more image to article. + foreach ($images as $image) { + // Add more image to article. $image_url = $image['url']; - $image_caption = $image['caption']; + $image_caption = $image['caption'] ?? $image['alt_text'] ?? $image['subtitle'] ?? ''; $image_alt_text = ''; - if (isset($image['alt_text'])) { - $image_alt_text = $image['alt_text']; - } else { - $image_alt_text = $image_caption; - } + $image_alt_text = $image['alt_text'] ?? $image_caption; $img = "\"$image_alt_text\""; $img_caption = "
$image_caption
"; $figure = "
$img \t $img_caption
"; @@ -497,7 +493,8 @@ public function collectData() { $endpoint = $this->getSectionEndpoint(); $url = $this->getAPIURL($endpoint, 'section'); - $data = $this->getJson($url); + $json = getContents($url); + $data = Json::decode($json); $stories = []; $section_name = ''; @@ -557,7 +554,11 @@ public function collectData() $image_placeholder = $this->handleImage([$story['thumbnail']]); } $content = $story['description'] . $image_placeholder; - $category = [$story['primary_section']['name']]; + if (isset($story['primary_section']['name'])) { + $category = [$story['primary_section']['name']]; + } else { + $category = []; + } } else { $content_detail = $this->getArticle($article_uri); $description = $content_detail['content']; diff --git a/bridges/RoadAndTrackBridge.php b/bridges/RoadAndTrackBridge.php index d666b6bd2c0..c236036cd65 100644 --- a/bridges/RoadAndTrackBridge.php +++ b/bridges/RoadAndTrackBridge.php @@ -49,8 +49,13 @@ private function fetchArticle($articleLink) $item['title'] = $title->innertext; } - $item['author'] = $article->find('.byline-name', 0)->innertext; - $item['timestamp'] = strtotime($article->find('.content-info-date', 0)->getAttribute('datetime')); + $item['author'] = $article->find('.byline-name', 0)->innertext ?? ''; + + $contentInfoDate = $article->find('.content-info-date', 0); + if ($contentInfoDate) { + $datetime = $contentInfoDate->getAttribute('datetime'); + $item['timestamp'] = strtotime($datetime); + } $content = $article->find('.content-container', 0); if ($content->find('.content-rail', 0) !== null) { diff --git a/bridges/RumbleBridge.php b/bridges/RumbleBridge.php index d8fcf19cc12..08b416bfe22 100644 --- a/bridges/RumbleBridge.php +++ b/bridges/RumbleBridge.php @@ -40,9 +40,12 @@ public function collectData() $dom = getSimpleHTMLDOM($url); foreach ($dom->find('li.video-listing-entry') as $video) { + $datetime = $video->find('time', 0)->getAttribute('datetime'); + $this->items[] = [ 'title' => $video->find('h3', 0)->plaintext, 'uri' => self::URI . $video->find('a', 0)->href, + 'timestamp' => (new \DateTimeImmutable($datetime))->getTimestamp(), 'author' => $account . '@rumble.com', 'content' => defaultLinkTo($video, self::URI)->innertext, ]; diff --git a/bridges/ScientificAmericanBridge.php b/bridges/ScientificAmericanBridge.php index 88635f58e0b..d575bf9488f 100644 --- a/bridges/ScientificAmericanBridge.php +++ b/bridges/ScientificAmericanBridge.php @@ -25,10 +25,11 @@ class ScientificAmericanBridge extends FeedExpander ]; const FEED = 'http://rss.sciam.com/ScientificAmerican-Global'; - const ISSUES = 'https://www.scientificamerican.com/store/archive/?magazineFilterID=all'; + const ISSUES = 'https://www.scientificamerican.com/archive/issues/2020s/'; public function collectData() { + $this->collectIssues(); $items = [ ...$this->collectFeed(), ...$this->collectIssues() @@ -49,7 +50,7 @@ public function collectData() if ($this->getInput('addContents') == 1) { usort($this->items, function ($item1, $item2) { - return $item1['timestamp'] < $item2['timestamp']; + return $item1['timestamp'] - $item2['timestamp']; }); } } @@ -65,8 +66,8 @@ private function collectFeed() private function collectIssues() { $html = getSimpleHTMLDOMCached(self::ISSUES); - $issues_root = $html->find('div.store-listing-group', 0); - $issues = $issues_root->find('div.store-listing-group__item'); + $content = $html->getElementById('content')->children(3); + $issues = $content->children(); $issues_count = min( (int)$this->getInput('parseIssues'), count($issues) @@ -74,7 +75,7 @@ private function collectIssues() $items = []; for ($i = 0; $i < $issues_count; $i++) { - $a = $issues[$i]->find('a.store-listing__cta', 0); + $a = $issues[$i]->find('a', 0); $link = 'https://scientificamerican.com' . $a->getAttribute('href'); array_push($items, ...$this->parseIssue($link)); } @@ -86,51 +87,42 @@ private function parseIssue($issue_link) $items = []; $html = getSimpleHTMLDOMCached($issue_link); - $features = $html->find('section[data-issue-column="Features"]', 0); + $features = $html->find('[class^=Detail_issue__article__previews__featured]', 0); if ($features != null) { - $articles = $features->find('article'); + $articles = $features->find('div', 0)->children(); foreach ($articles as $article) { - $items[] = $this->parseIssueItem($article); + $h4 = $article->find('h4', 0); + $a = $h4->find('a', 0); + $link = 'https://scientificamerican.com' . $a->getAttribute('href'); + $title = $a->plaintext; + $items[] = [ + 'uri' => $link, + 'title' => $title, + 'uid' => $link, + 'content' => '' + ]; } } - $departments = $html->find('section[data-issue-column="Departments"]', 0); + $departments = $html->find('[class^=Detail_issue__article__previews__departments]', 0); if ($departments != null) { - $lis = $departments->find('ul', 0)->find('li'); - foreach ($lis as $li) { - $items[] = $this->parseIssueItem($li); + $headers = $departments->find('[class*=Listing_article__listing__title]'); + foreach ($headers as $header) { + $a = $header->find('a', 0); + $link = 'https://scientificamerican.com' . $a->getAttribute('href'); + $title = $a->plaintext; + $items[] = [ + 'uri' => $link, + 'title' => $title, + 'uid' => $link, + 'content' => '' + ]; } } return $items; } - private function parseIssueItem($article) - { - $title = $article->getAttribute('data-article-title'); - $a = $article->find('a', 0); - $link = null; - if ($a != null) { - $link = $a->href; - } else { - [$kind, $v] = explode('-', $article->getAttribute('id'), 2); - $link = 'https://scientificamerican.com/' . $kind . '/' . $v; - } - $content = ''; - - $desc = $article->find('p.listing-wide__inner__desc', 0); - if ($desc != null) { - $content = $desc->plaintext; - } - - return [ - 'uri' => $link, - 'title' => $title, - 'uid' => $link, - 'content' => $content - ]; - } - private function updateItem($item) { $html = getSimpleHTMLDOMCached($item['uri']); diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php index 09fc7b212eb..0bd9a2b0daa 100644 --- a/bridges/SoundcloudBridge.php +++ b/bridges/SoundcloudBridge.php @@ -36,13 +36,17 @@ class SoundCloudBridge extends BridgeAbstract private $feedTitle = null; private $feedIcon = null; - private $clientIDCache = null; + private $cache = null; private $clientIdRegex = '/client_id.*?"(.+?)"/'; private $widgetRegex = '/widget-.+?\.js/'; public function collectData() { + $this->cache = RssBridge::getCache(); + $this->cache->setScope('SoundCloudBridge'); + $this->cache->setKey(['client_id']); + $res = $this->getUser($this->getInput('u')); $this->feedTitle = $res->username; @@ -62,8 +66,7 @@ public function collectData() $item['author'] = $apiItem->user->username; $item['title'] = $apiItem->user->username . ' - ' . $apiItem->title; $item['timestamp'] = strtotime($apiItem->created_at); - - $description = nl2br($apiItem->description); + $description = nl2br($apiItem->description ?? ''); $item['content'] = <<{$description}

@@ -116,24 +119,11 @@ public function getName() return parent::getName(); } - private function initClientIDCache() - { - if ($this->clientIDCache !== null) { - return; - } - - $cacheFactory = new CacheFactory(); - - $this->clientIDCache = $cacheFactory->create(); - $this->clientIDCache->setScope('SoundCloudBridge'); - $this->clientIDCache->setKey(['client_id']); - } - private function getClientID() { - $this->initClientIDCache(); - - $clientID = $this->clientIDCache->loadData(); + $this->cache->setScope('SoundCloudBridge'); + $this->cache->setKey(['client_id']); + $clientID = $this->cache->loadData(); if ($clientID == null) { return $this->refreshClientID(); @@ -144,8 +134,6 @@ private function getClientID() private function refreshClientID() { - $this->initClientIDCache(); - $playerHTML = getContents($this->playerUrl); // Extract widget JS filenames from player page @@ -163,7 +151,9 @@ private function refreshClientID() if (preg_match($this->clientIdRegex, $widgetJS, $matches)) { $clientID = $matches[1]; - $this->clientIDCache->saveData($clientID); + $this->cache->setScope('SoundCloudBridge'); + $this->cache->setKey(['client_id']); + $this->cache->saveData($clientID); return $clientID; } diff --git a/bridges/SpotifyBridge.php b/bridges/SpotifyBridge.php index a957edf63ef..7b7e2b1d5cd 100644 --- a/bridges/SpotifyBridge.php +++ b/bridges/SpotifyBridge.php @@ -7,45 +7,86 @@ class SpotifyBridge extends BridgeAbstract const DESCRIPTION = 'Fetches the latest items from one or more artists, playlists or podcasts'; const MAINTAINER = 'Paroleen'; const CACHE_TIMEOUT = 3600; - const PARAMETERS = [ [ - 'clientid' => [ - 'name' => 'Client ID', - 'type' => 'text', - 'required' => true + const PARAMETERS = [ + 'By Spotify URIs' => [ + 'clientid' => [ + 'name' => 'Client ID', + 'type' => 'text', + 'required' => true + ], + 'clientsecret' => [ + 'name' => 'Client secret', + 'type' => 'text', + 'required' => true + ], + 'country' => [ + 'name' => 'Country/Market', + 'type' => 'text', + 'required' => false, + 'exampleValue' => 'US', + 'defaultValue' => 'US' + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'exampleValue' => 10, + 'defaultValue' => 10 + ], + 'spotifyuri' => [ + 'name' => 'Spotify URIs', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'spotify:artist:4lianjyuR1tqf6oUX8kjrZ [,spotify:playlist:37i9dQZF1DXcBWIGoYBM5M,spotify:show:6ShFMYxeDNMo15COLObDvC]', + ], + 'albumtype' => [ + 'name' => 'Album type', + 'type' => 'text', + 'required' => false, + 'exampleValue' => 'album,single,appears_on,compilation', + 'defaultValue' => 'album,single' + ] ], - 'clientsecret' => [ - 'name' => 'Client secret', - 'type' => 'text', - 'required' => true + 'By Spotify Search' => [ + 'clientid' => [ + 'name' => 'Client ID', + 'type' => 'text', + 'required' => true + ], + 'clientsecret' => [ + 'name' => 'Client secret', + 'type' => 'text', + 'required' => true + ], + 'market' => [ + 'name' => 'Market', + 'type' => 'text', + 'required' => false, + 'exampleValue' => 'US', + 'defaultValue' => 'US' + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'exampleValue' => 10, + 'defaultValue' => 10 + ], + 'query' => [ + 'name' => 'Search query', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'artist:The Beatles', + ], + 'type' => [ + 'name' => 'Type', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'album,episode', + 'defaultValue' => 'album,episode' + ] ], - 'country' => [ - 'name' => 'Country/Market', - 'type' => 'text', - 'required' => false, - 'exampleValue' => 'US', - 'defaultValue' => 'US' - ], - 'limit' => [ - 'name' => 'Limit', - 'type' => 'number', - 'required' => false, - 'exampleValue' => 10, - 'defaultValue' => 10 - ], - 'spotifyuri' => [ - 'name' => 'Spotify URIs', - 'type' => 'text', - 'required' => true, - 'exampleValue' => 'spotify:artist:4lianjyuR1tqf6oUX8kjrZ [,spotify:playlist:37i9dQZF1DXcBWIGoYBM5M,spotify:show:6ShFMYxeDNMo15COLObDvC]', - ], - 'albumtype' => [ - 'name' => 'Album type', - 'type' => 'text', - 'required' => false, - 'exampleValue' => 'album,single,appears_on,compilation', - 'defaultValue' => 'album,single' - ] - ] ]; + ]; private $uri = ''; private $name = ''; @@ -53,7 +94,14 @@ class SpotifyBridge extends BridgeAbstract public function collectData() { - $entries = $this->getAllEntries(); + $this->fetchAccessToken(); + + if ($this->queriedContext === 'By Spotify URIs') { + $entries = $this->getEntriesFromURIs(); + } else { + $entries = $this->getEntriesFromQuery(); + } + usort($entries, function ($entry1, $entry2) { return $this->getDate($entry2) <=> $this->getDate($entry1); }); @@ -77,7 +125,46 @@ public function collectData() } } - private function getAllEntries() + private function getEntriesFromQuery() + { + $entries = []; + + $types = [ + 'albums', + 'episodes', + ]; + + $query = [ + 'q' => $this->getInput('query'), + 'type' => $this->getInput('type'), + 'market' => $this->getInput('market'), + 'limit' => 50, + ]; + + $hasItems = true; + $offset = 0; + + while ($hasItems && $offset < 1000) { + $hasItems = false; + + $query['offset'] = $offset; + $json = getContents('https://api.spotify.com/v1/search?' . http_build_query($query), ['Authorization: Bearer ' . $this->token]); + $partial = Json::decode($json); + + foreach ($types as $type) { + if (isset($partial[$type]['items'])) { + $entries = array_merge($entries, $partial[$type]['items']); + $hasItems = true; + } + } + + $offset += 50; + } + + return $entries; + } + + private function getEntriesFromURIs() { $entries = []; $uris = explode(',', $this->getInput('spotifyuri')); @@ -92,7 +179,7 @@ private function getAllEntries() 'show' => 'episode', ]; if (!isset($types[$type])) { - throw new \Exception('Spotify URI not supported'); + throw new \Exception(sprintf('Unsupported Spotify URI: %s', $uri)); } $entry_type = $types[$type]; @@ -111,7 +198,8 @@ private function getAllEntries() $offset = 0; while (true) { $query['offset'] = $offset; - $partial = $this->fetchContent($url . '?' . http_build_query($query)); + $json = getContents($url . '?' . http_build_query($query), ['Authorization: Bearer ' . $this->token]); + $partial = Json::decode($json); if (empty($partial['items'])) { break; } @@ -188,61 +276,30 @@ private function getDate($entry) return DateTime::createFromFormat('Y-m-d', $date)->getTimestamp(); } - private function getToken() + private function fetchAccessToken() { - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); - $cache->setScope('SpotifyBridge'); - + $cache = RssBridge::getCache(); $cacheKey = sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret')); - $cache->setKey($cacheKey); - - $time = null; - if ($cache->getTime()) { - $time = (new DateTime())->getTimestamp() - $cache->getTime(); - } - - if ($cache->getTime() == false || $time >= 3600) { - $this->fetchToken(); - $cache->saveData($this->token); + $cache->setScope('SpotifyBridge'); + $cache->setKey([$cacheKey]); + $token = $cache->loadData(3600); + if ($token) { + $this->token = $token; } else { - $this->token = $cache->loadData(); + $basicAuth = base64_encode(sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret'))); + $json = getContents('https://accounts.spotify.com/api/token', [ + "Authorization: Basic $basicAuth" + ], [ + CURLOPT_POSTFIELDS => 'grant_type=client_credentials' + ]); + $data = Json::decode($json); + $this->token = $data['access_token']; + $cache->setScope('SpotifyBridge'); + $cache->setKey([$cacheKey]); + $cache->saveData($this->token); } } - private function fetchToken() - { - $curl = curl_init(); - - curl_setopt($curl, CURLOPT_URL, 'https://accounts.spotify.com/api/token'); - curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($curl, CURLOPT_POST, 1); - curl_setopt($curl, CURLOPT_POSTFIELDS, 'grant_type=client_credentials'); - - $basic = sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret')); - curl_setopt($curl, CURLOPT_HTTPHEADER, ['Authorization: Basic ' . base64_encode($basic)]); - - $json = curl_exec($curl); - $json = json_decode($json)->access_token; - curl_close($curl); - - $this->token = $json; - } - - private function fetchContent($url) - { - $this->getToken(); - $curl = curl_init(); - curl_setopt($curl, CURLOPT_URL, $url); - curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($curl, CURLOPT_HTTPHEADER, ['Authorization: Bearer ' . $this->token]); - $json = curl_exec($curl); - $json = json_decode($json, true); - curl_close($curl); - return $json; - } - public function getURI() { if (empty($this->uri)) { @@ -275,7 +332,8 @@ private function getFirstEntry() $query['market'] = $this->getInput('country'); } - $item = $this->fetchContent($uri . '?' . http_build_query($query)); + $json = getContents($uri . '?' . http_build_query($query), ['Authorization: Bearer ' . $this->token]); + $item = Json::decode($json); $this->uri = $item['external_urls']['spotify']; $this->name = $item['name'] . ' - Spotify'; diff --git a/bridges/SteamGroupAnnouncementsBridge.php b/bridges/SteamGroupAnnouncementsBridge.php new file mode 100644 index 00000000000..2b848850590 --- /dev/null +++ b/bridges/SteamGroupAnnouncementsBridge.php @@ -0,0 +1,25 @@ + [ + 'name' => 'Group name', + 'exampleValue' => 'freegamesfinders', + 'required' => true + ] + ] + ]; + + public function collectData() + { + $uri = self::URI . 'groups/' . $this->getInput('g') . '/rss'; + $this->collectExpandableDatas($uri, 10); + } +} diff --git a/bridges/StravaBridge.php b/bridges/StravaBridge.php new file mode 100644 index 00000000000..da1739cb8de --- /dev/null +++ b/bridges/StravaBridge.php @@ -0,0 +1,81 @@ + [ + 'name' => 'athleteID', + 'required' => true + ] + ], + ]; + + public function detectParameters($url) + { + if (preg_match('/strava\.com\/athletes\/([\d]+)/', $url, $matches) > 0) { + return [ + 'athleteID' => $matches[1] + ]; + } + return null; + } + + public function collectData() + { + $athleteID = $this->getInput('athleteID'); + + $dom = getSimpleHTMLDOM(self::URI . '/athletes/' . $athleteID); + $scriptRegex = "/data-react-props='(.*?)'/"; + preg_match($scriptRegex, $dom, $matches) or returnServerError('Could not find json'); + $jsonData = json_decode(html_entity_decode($matches[1])); + $this->feedName = $jsonData->athlete->name . "'s Recent Activities"; + $this->iconURL = $jsonData->athlete->avatarUrl; + foreach ($jsonData->recentActivities as $activity) { + $item = []; + + $item['title'] = $activity->name . ' (' . $activity->detailedType . ')'; + $item['author'] = $jsonData->athlete->name; + $item['uri'] = self::URI . '/activities/' . $activity->id; + $item['timestamp'] = $activity->startDateLocal; + + $content = 'Distance: ' . $activity->distance . + '
Elev Gain: ' . $activity->elevation . + '
Time: ' . $activity->movingTime . '

'; + + foreach ($activity->images as $image) { + $src = $image->squareSrc; + if (empty($src)) { + $src = $image->defaultSrc; + } + $content .= ''; + } + $item['content'] = $content; + + $item['enclosures'][] = $item['uri'] . '/export_gpx'; + + $this->items[] = $item; + } + } + + public function getName() + { + if (empty($this->feedName)) { + return parent::getName(); + } else { + return $this->feedName; + } + } + + public function getIcon() + { + if (empty($this->iconURL)) { + return parent::getIcon(); + } else { + return $this->iconURL; + } + } +} diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php index 0636bb468cb..1f9c34c059b 100644 --- a/bridges/TheHackerNewsBridge.php +++ b/bridges/TheHackerNewsBridge.php @@ -10,6 +10,8 @@ class TheHackerNewsBridge extends BridgeAbstract public function collectData() { $html = getSimpleHTMLDOM($this->getURI()); + $html = convertLazyLoading($html); + $html = defaultLinkTo($html, $this->getURI()); $limit = 0; foreach ($html->find('div.body-post') as $element) { @@ -17,74 +19,68 @@ public function collectData() break; } + // Author (not present on home page) $article_author = null; - $icon_user = $element->find('i.icon-user', 0); - if ($icon_user) { - $article_author = trim($icon_user->parent()->plaintext); - $article_author = str_replace('', '', $article_author); - } + + // Title $article_title = $element->find('h2.home-title', 0)->plaintext; + // Date $article_timestamp = time(); - //Date without time $calendar = $element->find('i.icon-calendar', 0); if ($calendar) { $article_timestamp = strtotime( extractFromDelimiters( $calendar->parent()->outertext, '', - '' + '' ) ); } - //Article thumbnail in lazy-loading image - if (is_object($element->find('img[data-echo]', 0))) { - $article_thumbnail = [ - extractFromDelimiters( - $element->find('img[data-echo]', 0)->outertext, - "data-echo='", - "'" - ) - ]; - } else { - $article_thumbnail = []; + // Thumbnail + $article_thumbnail = []; + if (is_object($element->find('img', 0))) { + $article_thumbnail = [ $element->find('img', 0)->src ]; } + // Content (truncated) + $article_content = $element->find('div.home-desc', 0)->plaintext; + + // Now try expanding article $article_url = $element->find('a.story-link', 0)->href; - $article = getSimpleHTMLDOMCached($article_url); - if ($article) { - //Article body - $var = $article->find('div.articlebody', 0); - if ($var) { - $contents = $var->innertext; - $contents = stripRecursiveHtmlSection($contents, 'div', '
'); - $contents = stripWithDelimiters($contents, ''); + $article_html = getSimpleHTMLDOMCached($article_url); + if ($article_html) { + // Content (expanded and cleaned) + $article_body = $article_html->find('div.articlebody', 0); + if ($article_body) { + $article_body = convertLazyLoading($article_body); + $article_body = defaultLinkTo($article_body, $article_url); + $header_img = $article_body->find('img', 0); + if ($header_img) { + $header_img->parent->style = ''; + } + foreach ($article_body->find('center.cf') as $center_ad) { + $center_ad->outertext = ''; + } + $article_content = $article_body->innertext; } - //Date with time - if (is_object($article->find('meta[itemprop=dateModified]', 0))) { - $article_timestamp = strtotime( - extractFromDelimiters( - $article->find('meta[itemprop=dateModified]', 0)->outertext, - "content='", - "'" - ) - ); + // Author + $spans_author = $article_html->find('span.author'); + if (count($spans_author) > 0) { + $article_author = $spans_author[array_key_last($spans_author)]->plaintext; } - } else { - $contents = 'Could not request TheHackerNews: ' . $article_url; } $item = []; $item['uri'] = $article_url; $item['title'] = $article_title; - if ($article_author) { + if (!empty($article_author)) { $item['author'] = $article_author; } $item['enclosures'] = $article_thumbnail; $item['timestamp'] = $article_timestamp; - $item['content'] = trim($contents ?? ''); + $item['content'] = trim($article_content); $this->items[] = $item; $limit++; } diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php index 2a03824598c..556e5ffcac0 100644 --- a/bridges/TikTokBridge.php +++ b/bridges/TikTokBridge.php @@ -26,18 +26,6 @@ class TikTokBridge extends BridgeAbstract private $feedName = ''; - public function detectParameters($url) - { - if (preg_match('/tiktok\.com\/(@[\w]+)/', $url, $matches) > 0) { - return [ - 'context' => 'By user', - 'username' => $matches[1] - ]; - } - - return null; - } - public function collectData() { $html = getSimpleHTMLDOM($this->getURI()); @@ -45,20 +33,37 @@ public function collectData() $title = $html->find('h1', 0)->plaintext ?? self::NAME; $this->feedName = htmlspecialchars_decode($title); - foreach ($html->find('div.tiktok-x6y88p-DivItemContainerV2') as $div) { + $var = $html->find('script[id=SIGI_STATE]', 0); + $SIGI_STATE_RAW = $var->innertext; + $SIGI_STATE = Json::decode($SIGI_STATE_RAW, false); + + foreach ($SIGI_STATE->ItemModule as $key => $value) { $item = []; - $link = $div->find('a', 0)->href; - $image = $div->find('img', 0)->src; - $views = $div->find('strong.video-count', 0)->plaintext; + $link = 'https://www.tiktok.com/@' . $value->author . '/video/' . $value->id; + $image = $value->video->dynamicCover; + if (empty($image)) { + $image = $value->video->cover; + } + $views = $value->stats->playCount; + $hastags = []; + foreach ($value->textExtra as $tag) { + $hastags[] = $tag->hashtagName; + } + $hastags_str = ''; + foreach ($hastags as $tag) { + $hastags_str .= '#' . $tag . ' '; + } $item['uri'] = $link; - $item['title'] = $div->find('a', 1)->plaintext; + $item['title'] = $value->desc; + $item['timestamp'] = $value->createTime; + $item['author'] = '@' . $value->author; $item['enclosures'][] = $image; - + $item['categories'] = $hastags; $item['content'] = << -

{$views} views

+

{$views} views


Hashtags: {$hastags_str} EOD; $this->items[] = $item; @@ -87,10 +92,25 @@ public function getName() private function processUsername() { - if (substr($this->getInput('username'), 0, 1) !== '@') { - return '@' . $this->getInput('username'); + $username = trim($this->getInput('username')); + if (preg_match('#^https?://www\.tiktok\.com/@(.*)$#', $username, $m)) { + return '@' . $m[1]; + } + if (substr($username, 0, 1) !== '@') { + return '@' . $username; } + return $username; + } - return $this->getInput('username'); + public function detectParameters($url) + { + if (preg_match('/tiktok\.com\/(@[\w]+)/', $url, $matches) > 0) { + return [ + 'context' => 'By user', + 'username' => $matches[1] + ]; + } + + return null; } } diff --git a/bridges/TldrTechBridge.php b/bridges/TldrTechBridge.php index ba7ebf6e87c..7d8febe1c40 100644 --- a/bridges/TldrTechBridge.php +++ b/bridges/TldrTechBridge.php @@ -23,7 +23,9 @@ class TldrTechBridge extends BridgeAbstract 'values' => [ 'Tech' => 'tech', 'Crypto' => 'crypto', - 'AI' => 'ai' + 'AI' => 'ai', + 'Web Dev' => 'engineering', + 'Founders' => 'founders' ], 'defaultValue' => 'tech' ] diff --git a/bridges/TraktBridge.php b/bridges/TraktBridge.php new file mode 100644 index 00000000000..7aa90dfd81b --- /dev/null +++ b/bridges/TraktBridge.php @@ -0,0 +1,70 @@ + [ + 'name' => 'username', + 'required' => true + ], + 'hide_shows' => [ + 'name' => 'Hide shows', + 'type' => 'checkbox', + 'title' => 'Hide shows', + ], + + ], + ]; + + public function detectParameters($url) + { + if (preg_match('/trakt\.tv\/users\/(.*?)\//', $url, $matches) > 0) { + return [ + 'username' => $matches[1] + ]; + } + return null; + } + + public function collectData() + { + $username = $this->getInput('username'); + $dom = getSimpleHTMLDOMCached(self::URI . '/users/' . $username . '/history'); + $this->feedName = $dom->find('#avatar-wrapper h1 a', 0)->plaintext; + $this->iconURL = $dom->find('img.avatar', 0)->{'src'}; + + foreach ($dom->find('#history-items .posters', 0)->find('div.grid-item') as $div) { + if ($this->getInput('hide_shows') && $div->{'data-type'} != 'movie') { + continue; + } + $item = []; + $item['author'] = $this->feedName; + $item['title'] = $div->find('img.real', 0)->{'title'}; + $item['timestamp'] = $div->find('.format-date', 0)->plaintext; + $item['content'] = ''; + $item['uri'] = self::URI . $div->{'data-url'}; + $this->items[] = $item; + } + } + public function getName() + { + if (empty($this->feedName)) { + return parent::getName(); + } else { + return $this->feedName; + } + } + public function getIcon() + { + if (empty($this->iconURL)) { + return parent::getIcon(); + } else { + return $this->iconURL; + } + } +} diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index b8ff1c404a3..8976174afb8 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -223,10 +223,10 @@ private function apiRequest($query, $variables) CURLOPT_POSTFIELDS => json_encode($request) ]; - Debug::log("Sending GraphQL query:\n" . $query); - Debug::log("Sending GraphQL variables:\n" . json_encode($variables, JSON_PRETTY_PRINT)); + Logger::debug("Sending GraphQL query:\n" . $query); + Logger::debug("Sending GraphQL variables:\n" . json_encode($variables, JSON_PRETTY_PRINT)); $response = json_decode(getContents('https://gql.twitch.tv/gql', $header, $opts)); - Debug::log("Got GraphQL response:\n" . json_encode($response, JSON_PRETTY_PRINT)); + Logger::debug("Got GraphQL response:\n" . json_encode($response, JSON_PRETTY_PRINT)); if (isset($response->errors)) { $messages = array_column($response->errors, 'message'); diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index a467f61bded..1ba00c669f3 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -7,7 +7,7 @@ class TwitterBridge extends BridgeAbstract const API_URI = 'https://api.twitter.com'; const GUEST_TOKEN_USES = 100; const GUEST_TOKEN_EXPIRY = 10800; // 3hrs - const CACHE_TIMEOUT = 300; // 5min + const CACHE_TIMEOUT = 60 * 15; // 15min const DESCRIPTION = 'returns tweets'; const MAINTAINER = 'arnd-s'; const PARAMETERS = [ @@ -124,6 +124,7 @@ class TwitterBridge extends BridgeAbstract private $apiKey = null; private $guestToken = null; private $authHeaders = []; + private ?string $feedIconUrl = null; public function detectParameters($url) { @@ -209,6 +210,16 @@ public function getURI() } } + private function getFullText($id) + { + $url = sprintf( + 'https://cdn.syndication.twimg.com/tweet-result?id=%s&lang=en', + $id + ); + + return json_decode(getContents($url), false); + } + public function collectData() { // $data will contain an array of all found tweets (unfiltered) @@ -219,18 +230,11 @@ public function collectData() $tweets = []; // Get authentication information - + $cache = RssBridge::getCache(); + $api = new TwitterClient($cache); // Try to get all tweets switch ($this->queriedContext) { case 'By username': - $cacheFactory = new CacheFactory(); - $cache = $cacheFactory->create(); - - $cache->setScope('twitter'); - $cache->setKey(['cache']); - $cache->purgeCache(60 * 60 * 3); // 3h - $api = new TwitterClient($cache); - $screenName = $this->getInput('u'); $screenName = trim($screenName); $screenName = ltrim($screenName, '@'); @@ -242,35 +246,45 @@ public function collectData() case 'By keyword or hashtag': // Does not work with the recent twitter changes $params = [ - 'q' => urlencode($this->getInput('q')), - 'tweet_mode' => 'extended', - 'tweet_search_mode' => 'live', + 'q' => urlencode($this->getInput('q')), + 'tweet_mode' => 'extended', + 'tweet_search_mode' => 'live', ]; - $data = $this->makeApiCall('/1.1/search/tweets.json', $params)->statuses; + $tweets = $api->search($params)->statuses; + $data = (object) [ + 'tweets' => $tweets + ]; break; case 'By list': // Does not work with the recent twitter changes - $params = [ - 'slug' => strtolower($this->getInput('list')), - 'owner_screen_name' => strtolower($this->getInput('user')), - 'tweet_mode' => 'extended', + // $params = [ + // 'slug' => strtolower($this->getInput('list')), + // 'owner_screen_name' => strtolower($this->getInput('user')), + // 'tweet_mode' => 'extended', + // ]; + $query = [ + 'screenName' => strtolower($this->getInput('user')), + 'listSlug' => strtolower($this->getInput('list')) ]; - $data = $this->makeApiCall('/1.1/lists/statuses.json', $params); + $data = $api->fetchListTweets($query, $this->queriedContext); break; case 'By list ID': // Does not work with the recent twitter changes - $params = [ - 'list_id' => $this->getInput('listid'), - 'tweet_mode' => 'extended', + // $params = [ + // 'list_id' => $this->getInput('listid'), + // 'tweet_mode' => 'extended', + // ]; + + $query = [ + 'listId' => $this->getInput('listid') ]; - $data = $this->makeApiCall('/1.1/lists/statuses.json', $params); + $data = $api->fetchListTweets($query, $this->queriedContext); break; - default: returnServerError('Invalid query context !'); } @@ -314,6 +328,11 @@ public function collectData() } } + if ($this->queriedContext === 'By username') { + $this->feedIconUrl = $data->user_info->legacy->profile_image_url_https ?? null; + } + + $i = 0; foreach ($tweets as $tweet) { // Skip own Retweets... if (isset($tweet->retweeted_status) && $tweet->retweeted_status->user->id_str === $tweet->user->id_str) { @@ -325,14 +344,6 @@ public function collectData() continue; } - switch ($this->queriedContext) { - case 'By username': - if ($this->getInput('norep') && isset($tweet->in_reply_to_status_id)) { - continue 2; - } - break; - } - $item = []; $realtweet = $tweet; @@ -341,11 +352,40 @@ public function collectData() $realtweet = $tweet->retweeted_status; } - $item['username'] = $data->user_info->legacy->screen_name; - $item['fullname'] = $data->user_info->legacy->name; - $item['avatar'] = $data->user_info->legacy->profile_image_url_https; + if (isset($realtweet->truncated) && $realtweet->truncated) { + try { + $realtweet = $this->getFullText($realtweet->id_str); + } catch (HttpException $e) { + $realtweet = $tweet; + } + } + + switch ($this->queriedContext) { + case 'By username': + if ($this->getInput('norep') && isset($tweet->in_reply_to_status_id)) { + continue 2; + } + $item['username'] = $data->user_info->legacy->screen_name; + $item['fullname'] = $data->user_info->legacy->name; + $item['avatar'] = $data->user_info->legacy->profile_image_url_https; + $item['id'] = $realtweet->id_str; + break; + case 'By list': + case 'By list ID': + $item['username'] = $data->userIds[$i]->legacy->screen_name; + $item['fullname'] = $data->userIds[$i]->legacy->name; + $item['avatar'] = $data->userIds[$i]->legacy->profile_image_url_https; + $item['id'] = $realtweet->conversation_id_str; + break; + case 'By keyword or hashtag': + $item['username'] = $realtweet->user->screen_name; + $item['fullname'] = $realtweet->user->name; + $item['avatar'] = $realtweet->user->profile_image_url_https; + $item['id'] = $realtweet->id_str; + break; + } + $item['timestamp'] = $realtweet->created_at; - $item['id'] = $realtweet->id_str; $item['uri'] = self::URI . $item['username'] . '/status/' . $item['id']; $item['author'] = (isset($tweet->retweeted_status) ? 'RT: ' : '') . $item['fullname'] @@ -353,7 +393,11 @@ public function collectData() . $item['username'] . ')'; // Convert plain text URLs into HTML hyperlinks - $fulltext = $realtweet->full_text; + if (isset($realtweet->full_text)) { + $fulltext = $realtweet->full_text; + } else { + $fulltext = $realtweet->text; + } $cleanedTweet = $fulltext; $foundUrls = false; @@ -385,7 +429,7 @@ public function collectData() if ($foundUrls === false) { // fallback to regex'es $reg_ex = '/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/'; - if (preg_match($reg_ex, $realtweet->full_text, $url)) { + if (preg_match($reg_ex, $fulltext, $url)) { $cleanedTweet = preg_replace( $reg_ex, "{$url[0]} ", @@ -410,10 +454,17 @@ public function collectData() EOD; } + $medias = []; + if (isset($realtweet->extended_entities->media)) { + $medias = $realtweet->extended_entities->media; + } else if (isset($realtweet->mediaDetails)) { + $medias = $realtweet->mediaDetails; + } + // Get images $media_html = ''; - if (isset($realtweet->extended_entities->media) && !$this->getInput('noimg')) { - foreach ($realtweet->extended_entities->media as $media) { + if (!$this->getInput('noimg')) { + foreach ($medias as $media) { switch ($media->type) { case 'photo': $image = $media->media_url_https . '?name=orig'; @@ -496,12 +547,18 @@ public function collectData() EOD; // put out + $i++; $this->items[] = $item; } usort($this->items, ['TwitterBridge', 'compareTweetId']); } + public function getIcon() + { + return $this->feedIconUrl ?? parent::getIcon(); + } + private static function compareTweetId($tweet1, $tweet2) { return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1); @@ -511,9 +568,7 @@ private static function compareTweetId($tweet1, $tweet2) //This function takes 2 requests, and therefore is cached private function getApiKey($forceNew = 0) { - $cacheFactory = new CacheFactory(); - - $r_cache = $cacheFactory->create(); + $r_cache = RssBridge::getCache(); $scope = 'TwitterBridge'; $r_cache->setScope($scope); $r_cache->setKey(['refresh']); @@ -529,7 +584,7 @@ private function getApiKey($forceNew = 0) $cacheFactory = new CacheFactory(); - $cache = $cacheFactory->create(); + $cache = RssBridge::getCache(); $cache->setScope($scope); $cache->setKey(['api_key']); $data = $cache->loadData(); @@ -564,9 +619,7 @@ private function getApiKey($forceNew = 0) $apiKey = $data; } - $cacheFac2 = new CacheFactory(); - - $gt_cache = $cacheFactory->create(); + $gt_cache = RssBridge::getCache(); $gt_cache->setScope($scope); $gt_cache->setKey(['guest_token']); $guestTokenUses = $gt_cache->loadData(); diff --git a/bridges/UsesTechBridge.php b/bridges/UsesTechBridge.php new file mode 100644 index 00000000000..653d83dc71f --- /dev/null +++ b/bridges/UsesTechBridge.php @@ -0,0 +1,30 @@ +find('div[class=PersonInner]') as $index => $a) { + $item = []; // Create an empty item + $articlePath = $a->find('a[class=displayLink]', 0)->href; + $item['title'] = $a->find('img', 0)->getAttribute('alt'); + $item['author'] = $a->find('img', 0)->getAttribute('alt'); + $item['uri'] = $articlePath; + $item['content'] = $a->find('p', 0)->innertext; + + $this->items[] = $item; // Add item to the list + if (count($this->items) >= self::MAX_ITEM) { + break; + } + } + } +} diff --git a/bridges/VideoCardzBridge.php b/bridges/VideoCardzBridge.php new file mode 100644 index 00000000000..84796c203fa --- /dev/null +++ b/bridges/VideoCardzBridge.php @@ -0,0 +1,76 @@ + [ + 'name' => 'News Feed', + 'type' => 'list', + 'title' => 'Feeds from VideoCardz.com', + 'values' => [ + 'News' => 'sections/news', + 'Featured' => 'sections/featured', + 'Leaks' => 'sections/leaks', + 'Press Releases' => 'sections/press-releases', + 'Preview Roundup' => 'sections/review-roundup', + 'Rumour' => 'sections/rumor', + ] + ] + ] + ]; + + public function getIcon() + { + return 'https://videocardz.com/favicon-32x32.png?x66580'; + } + + public function getName() + { + return !is_null($this->getKey('feed')) ? self::NAME . ' | ' . $this->getKey('feed') : self::NAME; + } + + public function getURI() + { + return self::URI . $this->getInput('feed'); + } + + public function collectData() + { + $url = sprintf('https://videocardz.com/%s', $this->getInput('feed')); + $dom = getSimpleHTMLDOM($url); + $dom = $dom->find('.subcategory-news', 0); + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + + foreach ($dom->find('article') as $article) { + $title = preg_replace('/\(PR\) /i', '', $article->find('h2', 0)->plaintext); + //Get thumbnail + $image = $article->style; + $image = preg_replace('/background-image:url\(/i', '', $image); + $image = substr_replace($image, '', -3); + //Get date and time of publishing + $datetime = date_parse($article->find('.main-index-article-datetitle-date > a', 0)->plaintext); + $year = $datetime['year']; + $month = $datetime['month']; + $day = $datetime['day']; + $hour = $datetime['hour']; + $minute = $datetime['minute']; + $timestamp = mktime($hour, $minute, 0, $month, $day, $year); + $content = '' . $article->find('h2', 0)->plaintext . ' thumbnail'; + $this->items[] = [ + 'title' => $title, + 'uri' => $article->find('p.main-index-article-datetitle-date > a', 0)->href, + 'content' => $content, + 'timestamp' => $timestamp, + ]; + } + } +} diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 0b00fca7bf9..967734ef7bd 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -311,6 +311,44 @@ public function collectData() $copy_quote->outertext = "
Reposted ($copy_quote_author):
$copy_quote_content"; } + foreach ($post->find('.SecondaryAttachment') as $sa) { + $sa_href = $sa->getAttribute('href'); + if (!$sa_href) { + $sa_href = ''; + } + $sa_task_click = $sa->getAttribute('data-task-click'); + + if (str_starts_with($sa_href, 'https://vk.com/doc')) { + // document + $doc_title = $sa->find('.SecondaryAttachment__childrenText', 0)->innertext; + $doc_size = $sa->find('.SecondaryAttachmentSubhead', 0)->innertext; + $doc_link = $sa_href; + $content_suffix .= "
Doc: $doc_title ($doc_size)"; + $sa->outertext = ''; + } else if (str_starts_with($sa_href, 'https://vk.com/@')) { + // article + $article_title = $sa->find('.SecondaryAttachment__childrenText', 0)->innertext; + $article_author = explode('Article · from ', $sa->find('.SecondaryAttachmentSubhead', 0)->innertext)[1]; + $article_link = $sa_href; + $content_suffix .= "
Article: $article_title ($article_author)"; + $sa->outertext = ''; + } else if ($sa_task_click == 'SecondaryAttachment/playAudio') { + // audio + $audio_json = json_decode(html_entity_decode($sa->getAttribute('data-audio'))); + $audio_link = $audio_json->url; + $audio_title = $sa->find('.SecondaryAttachment__childrenText', 0)->innertext; + $audio_author = $sa->find('.SecondaryAttachmentSubhead', 0)->innertext; + $content_suffix .= "
Audio: $audio_title ($audio_author)"; + $sa->outertext = ''; + } else if ($sa_task_click == 'SecondaryAttachment/playPlaylist') { + // playlist link + $playlist_title = $sa->find('.SecondaryAttachment__childrenText', 0)->innertext; + $playlist_link = $sa->find('.SecondaryAttachment__link', 0)->getAttribute('href'); + $content_suffix .= "
Playlist: $playlist_title"; + $sa->outertext = ''; + } + } + $item = []; $content = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '
'); $content .= $content_suffix; @@ -410,7 +448,9 @@ private function getPhoto($a) private function getTitle($content) { - preg_match('/^["\w\ \p{L}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); + $content = explode('
', $content)[0]; + $content = strip_tags($content); + preg_match('/^[:,"\w\ \p{L}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); if (count($result) == 0) { return 'untitled'; } @@ -450,18 +490,22 @@ private function getTime($post) private function getContents() { - $header = ['Accept-language: en', 'Cookie: remixlang=3']; + $httpHeaders = [ + 'Accept-language: en', + 'Cookie: remixlang=3', + ]; $redirects = 0; $uri = $this->getURI(); while ($redirects < 2) { - $response = getContents($uri, $header, [CURLOPT_FOLLOWLOCATION => false], true); + $response = getContents($uri, $httpHeaders, [CURLOPT_FOLLOWLOCATION => false], true); if (in_array($response['code'], [200, 304])) { return $response['content']; } - $uri = urljoin(self::URI, $response['header']['location'][0]); + $headers = $response['headers']; + $uri = urljoin(self::URI, $headers['location'][0]); if (str_contains($uri, '/429.html')) { returnServerError('VK responded "Too many requests"'); diff --git a/bridges/WYMTNewsBridge.php b/bridges/WYMTNewsBridge.php new file mode 100644 index 00000000000..c19d6e25c72 --- /dev/null +++ b/bridges/WYMTNewsBridge.php @@ -0,0 +1,27 @@ +find('.card-body'); + + foreach ($articles as $article) { + $item = []; + $url = $article->find('.headline a', 0); + $item['uri'] = $url->href; + $item['title'] = trim($url->plaintext); + $item['author'] = $article->find('.author', 0)->plaintext; + $item['content'] = $article->find('.deck', 0)->plaintext; + $this->items[] = $item; + } + } +} diff --git a/bridges/YandexZenBridge.php b/bridges/YandexZenBridge.php index 4232f2d0a6d..ee1ff506f6c 100644 --- a/bridges/YandexZenBridge.php +++ b/bridges/YandexZenBridge.php @@ -39,8 +39,13 @@ public function collectData() $item['uri'] = $post->share_link; $item['title'] = $post->title; - $item['timestamp'] = date(DateTimeInterface::ATOM, $post->publication_date); - $item['content'] = $post->text; + + $publicationDateUnixTimestamp = $post->publication_date ?? null; + if ($publicationDateUnixTimestamp) { + $item['timestamp'] = date(DateTimeInterface::ATOM, $publicationDateUnixTimestamp); + } + + $item['content'] = $post->text . "
"; $item['enclosures'] = [ $post->image, ]; diff --git a/bridges/YouTubeCommunityTabBridge.php b/bridges/YouTubeCommunityTabBridge.php index c44e9557311..20822828b0d 100644 --- a/bridges/YouTubeCommunityTabBridge.php +++ b/bridges/YouTubeCommunityTabBridge.php @@ -78,21 +78,29 @@ public function collectData() returnServerError('Channel does not have a community tab'); } - foreach ($this->getCommunityPosts($json) as $post) { + $posts = $this->getCommunityPosts($json); + foreach ($posts as $key => $post) { $this->itemTitle = ''; if (!isset($post->backstagePostThreadRenderer)) { continue; } - $details = $post->backstagePostThreadRenderer->post->backstagePostRenderer; + if (isset($post->backstagePostThreadRenderer->post->backstagePostRenderer)) { + $details = $post->backstagePostThreadRenderer->post->backstagePostRenderer; + } elseif (isset($post->backstagePostThreadRenderer->post->sharedPostRenderer)) { + // todo: properly extract data from this shared post + $details = $post->backstagePostThreadRenderer->post->sharedPostRenderer; + } else { + continue; + } $item = []; $item['uri'] = self::URI . '/post/' . $details->postId; - $item['author'] = $details->authorText->runs[0]->text; - $item['content'] = ''; + $item['author'] = $details->authorText->runs[0]->text ?? null; + $item['content'] = $item['uri']; - if (isset($details->contentText)) { + if (isset($details->contentText->runs)) { $text = $this->getText($details->contentText->runs); $this->itemTitle = $this->ellipsisTitle($text); @@ -102,14 +110,20 @@ public function collectData() $item['content'] .= $this->getAttachments($details); $item['title'] = $this->itemTitle; + $date = strtotime(str_replace(' (edited)', '', $details->publishedTimeText->runs[0]->text)); + if (is_int($date)) { + // subtract an increasing multiple of 60 seconds to always preserve the original order + $item['timestamp'] = $date - $key * 60; + } + $this->items[] = $item; } } public function getURI() { - if (!empty($this->feedUri)) { - return $this->feedUri; + if (!empty($this->feedUrl)) { + return $this->feedUrl; } return parent::getURI(); diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 95f4f01791e..54a38d987cb 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -13,7 +13,6 @@ class YoutubeBridge extends BridgeAbstract const URI = 'https://www.youtube.com/'; const CACHE_TIMEOUT = 10800; // 3h const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search'; - const MAINTAINER = 'em92'; const PARAMETERS = [ 'By username' => [ @@ -234,7 +233,11 @@ private function ytGetSimpleHTMLDOM($url, $cached = false) private function getJSONData($html) { $scriptRegex = '/var ytInitialData = (.*?);<\/script>/'; - preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + $result = preg_match($scriptRegex, $html, $matches); + if (! $result) { + Logger::debug('Could not find ytInitialData'); + return null; + } return json_decode($matches[1]); } @@ -293,15 +296,17 @@ private function parseJSONListing($jsonData) } } - if (preg_match('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', $durationText)) { - $durationText = preg_replace('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); - } else { - $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); - } - sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); - $duration = $hours * 3600 + $minutes * 60 + $seconds; - if ($duration < $duration_min || $duration > $duration_max) { - continue; + if (is_string($durationText)) { + if (preg_match('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', $durationText)) { + $durationText = preg_replace('/([\d]{1,2})\:([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); + } else { + $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); + } + sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); + $duration = $hours * 3600 + $minutes * 60 + $seconds; + if ($duration < $duration_min || $duration > $duration_max) { + continue; + } } // $vid_list .= $vid . ','; diff --git a/caches/FileCache.php b/caches/FileCache.php index 8c99234da00..6e150cb495c 100644 --- a/caches/FileCache.php +++ b/caches/FileCache.php @@ -1,44 +1,66 @@ config = $config; - - if (!is_dir($this->config['path'])) { - throw new \Exception('The cache path does not exists. You probably want: mkdir cache && chown www-data:www-data cache'); - } - if (!is_writable($this->config['path'])) { - throw new \Exception('The cache path is not writeable. You probably want: chown www-data:www-data cache'); + $default = [ + 'path' => null, + 'enable_purge' => true, + ]; + $this->config = array_merge($default, $config); + if (!$this->config['path']) { + throw new \Exception('The FileCache needs a path value'); } + // Normalize with a single trailing slash + $this->config['path'] = rtrim($this->config['path'], '/') . '/'; } - public function loadData() + public function getConfig() { - if (file_exists($this->getCacheFile())) { - return unserialize(file_get_contents($this->getCacheFile())); + return $this->config; + } + + public function loadData(int $timeout = 86400) + { + clearstatcache(); + if (!file_exists($this->getCacheFile())) { + return null; + } + $modificationTime = filemtime($this->getCacheFile()); + if (time() - $timeout < $modificationTime) { + $data = unserialize(file_get_contents($this->getCacheFile())); + if ($data === false) { + Logger::warning(sprintf('Failed to unserialize: %s', $this->getCacheFile())); + // Intentionally not throwing an exception + return null; + } + return $data; } + // It's a good idea to delete the expired item here, but commented out atm + // unlink($this->getCacheFile()); return null; } - public function saveData($data) + public function saveData($data): void { - $writeStream = file_put_contents($this->getCacheFile(), serialize($data)); - if ($writeStream === false) { - throw new \Exception('The cache path is not writeable. You probably want: chown www-data:www-data cache'); + $bytes = file_put_contents($this->getCacheFile(), serialize($data), LOCK_EX); + if ($bytes === false) { + throw new \Exception(sprintf('Failed to write to: %s', $this->getCacheFile())); } - return $this; } - public function getTime() + public function getTime(): ?int { + clearstatcache(); $cacheFile = $this->getCacheFile(); - clearstatcache(false, $cacheFile); if (file_exists($cacheFile)) { $time = filemtime($cacheFile); if ($time !== false) { @@ -50,7 +72,7 @@ public function getTime() return null; } - public function purgeCache($seconds) + public function purgeCache(int $timeout = 86400): void { if (! $this->config['enable_purge']) { return; @@ -66,38 +88,32 @@ public function purgeCache($seconds) ); foreach ($cacheIterator as $cacheFile) { - if (in_array($cacheFile->getBasename(), ['.', '..', '.gitkeep'])) { + $basename = $cacheFile->getBasename(); + $excluded = [ + '.' => true, + '..' => true, + '.gitkeep' => true, + ]; + if (isset($excluded[$basename])) { continue; } elseif ($cacheFile->isFile()) { - if (filemtime($cacheFile->getPathname()) < time() - $seconds) { + $filepath = $cacheFile->getPathname(); + if (filemtime($filepath) < time() - $timeout) { // todo: sometimes this file doesn't exists - unlink($cacheFile->getPathname()); + unlink($filepath); } } } } - public function setScope($scope) + public function setScope(string $scope): void { - if (!is_string($scope)) { - throw new \Exception('The given scope is invalid!'); - } - $this->scope = $this->config['path'] . trim($scope, " \t\n\r\0\x0B\\\/") . '/'; - - return $this; } - public function setKey($key) + public function setKey(array $key): void { - $key = json_encode($key); - - if (!is_string($key)) { - throw new \Exception('The given key is invalid!'); - } - - $this->key = $key; - return $this; + $this->key = json_encode($key); } private function getScope() diff --git a/caches/MemcachedCache.php b/caches/MemcachedCache.php index 85681e890a5..dcb572c7f7a 100644 --- a/caches/MemcachedCache.php +++ b/caches/MemcachedCache.php @@ -2,12 +2,10 @@ class MemcachedCache implements CacheInterface { - private $scope; - private $key; + private string $scope; + private string $key; private $conn; private $expiration = 0; - private $time = false; - private $data = null; public function __construct() { @@ -43,73 +41,62 @@ public function __construct() $this->conn = $conn; } - public function loadData() + public function loadData(int $timeout = 86400) { - if ($this->data) { - return $this->data; - } - $result = $this->conn->get($this->getCacheKey()); - if ($result === false) { + $value = $this->conn->get($this->getCacheKey()); + if ($value === false) { return null; } - - $this->time = $result['time']; - $this->data = $result['data']; - return $result['data']; + if (time() - $timeout < $value['time']) { + return $value['data']; + } + return null; } - public function saveData($datas) + public function saveData($data): void { - $time = time(); - $object_to_save = [ - 'data' => $datas, - 'time' => $time, + $value = [ + 'data' => $data, + 'time' => time(), ]; - $result = $this->conn->set($this->getCacheKey(), $object_to_save, $this->expiration); - + $result = $this->conn->set($this->getCacheKey(), $value, $this->expiration); if ($result === false) { - throw new \Exception('Cannot write the cache to memcached server'); + Logger::warning('Failed to store an item in memcached', [ + 'scope' => $this->scope, + 'key' => $this->key, + 'expiration' => $this->expiration, + 'code' => $this->conn->getLastErrorCode(), + 'message' => $this->conn->getLastErrorMessage(), + 'number' => $this->conn->getLastErrorErrno(), + ]); + // Intentionally not throwing an exception } - - $this->time = $time; - - return $this; } - public function getTime() + public function getTime(): ?int { - if ($this->time === false) { - $this->loadData(); + $value = $this->conn->get($this->getCacheKey()); + if ($value === false) { + return null; } - return $this->time; + return $value['time']; } - public function purgeCache($duration) + public function purgeCache(int $timeout = 86400): void { // Note: does not purges cache right now // Just sets cache expiration and leave cache purging for memcached itself - $this->expiration = $duration; + $this->expiration = $timeout; } - public function setScope($scope) + public function setScope(string $scope): void { $this->scope = $scope; - return $this; } - public function setKey($key) + public function setKey(array $key): void { - if (!empty($key) && is_array($key)) { - $key = array_map('strtolower', $key); - } - $key = json_encode($key); - - if (!is_string($key)) { - throw new \Exception('The given key is invalid!'); - } - - $this->key = $key; - return $this; + $this->key = json_encode($key); } private function getCacheKey() diff --git a/caches/NullCache.php b/caches/NullCache.php index 8e5c9e35804..fe43fe06472 100644 --- a/caches/NullCache.php +++ b/caches/NullCache.php @@ -4,27 +4,28 @@ class NullCache implements CacheInterface { - public function setScope($scope) + public function setScope(string $scope): void { } - public function setKey($key) + public function setKey(array $key): void { } - public function loadData() + public function loadData(int $timeout = 86400) { } - public function saveData($data) + public function saveData($data): void { } - public function getTime() + public function getTime(): ?int { + return null; } - public function purgeCache($seconds) + public function purgeCache(int $timeout = 86400): void { } } diff --git a/caches/SQLiteCache.php b/caches/SQLiteCache.php index 1e44519ba09..92235862664 100644 --- a/caches/SQLiteCache.php +++ b/caches/SQLiteCache.php @@ -1,128 +1,115 @@ - */ class SQLiteCache implements CacheInterface { - protected $scope; - protected $key; + private \SQLite3 $db; + private string $scope; + private string $key; + private array $config; - private $db = null; - - public function __construct() + public function __construct(array $config) { - if (!extension_loaded('sqlite3')) { - throw new \Exception('"sqlite3" extension not loaded. Please check "php.ini"'); - } - - if (!is_writable(PATH_CACHE)) { - throw new \Exception('The cache folder is not writable'); + $default = [ + 'file' => null, + 'timeout' => 5000, + 'enable_purge' => true, + ]; + $config = array_merge($default, $config); + $this->config = $config; + + if (!$config['file']) { + throw new \Exception('sqlite cache needs a file'); } - $section = 'SQLiteCache'; - $file = Configuration::getConfig($section, 'file'); - if (!$file) { - throw new \Exception(sprintf('Configuration for %s missing.', $section)); - } - - if (dirname($file) == '.') { - $file = PATH_CACHE . $file; - } elseif (!is_dir(dirname($file))) { - throw new \Exception(sprintf('Invalid configuration for %s', $section)); - } - - if (!is_file($file)) { - // The instantiation creates the file - $this->db = new \SQLite3($file); + if (is_file($config['file'])) { + $this->db = new \SQLite3($config['file']); $this->db->enableExceptions(true); - $this->db->exec("CREATE TABLE storage ('key' BLOB PRIMARY KEY, 'value' BLOB, 'updated' INTEGER)"); } else { - $this->db = new \SQLite3($file); + // Create the file and create sql schema + $this->db = new \SQLite3($config['file']); $this->db->enableExceptions(true); + $this->db->exec("CREATE TABLE storage ('key' BLOB PRIMARY KEY, 'value' BLOB, 'updated' INTEGER)"); } - $this->db->busyTimeout(5000); + $this->db->busyTimeout($config['timeout']); } - public function loadData() + public function loadData(int $timeout = 86400) { - $Qselect = $this->db->prepare('SELECT value FROM storage WHERE key = :key'); - $Qselect->bindValue(':key', $this->getCacheKey()); - $result = $Qselect->execute(); - if ($result instanceof \SQLite3Result) { - $data = $result->fetchArray(\SQLITE3_ASSOC); - if (isset($data['value'])) { - return unserialize($data['value']); + $stmt = $this->db->prepare('SELECT value, updated FROM storage WHERE key = :key'); + $stmt->bindValue(':key', $this->getCacheKey()); + $result = $stmt->execute(); + if (!$result) { + return null; + } + $row = $result->fetchArray(\SQLITE3_ASSOC); + if ($row === false) { + return null; + } + $value = $row['value']; + $modificationTime = $row['updated']; + if (time() - $timeout < $modificationTime) { + $data = unserialize($value); + if ($data === false) { + Logger::error(sprintf("Failed to unserialize: '%s'", mb_substr($value, 0, 100))); + return null; } + return $data; } - + // It's a good idea to delete expired cache items. + // However I'm seeing lots of SQLITE_BUSY errors so commented out for now + // $stmt = $this->db->prepare('DELETE FROM storage WHERE key = :key'); + // $stmt->bindValue(':key', $this->getCacheKey()); + // $stmt->execute(); return null; } - public function saveData($data) + public function saveData($data): void { - $Qupdate = $this->db->prepare('INSERT OR REPLACE INTO storage (key, value, updated) VALUES (:key, :value, :updated)'); - $Qupdate->bindValue(':key', $this->getCacheKey()); - $Qupdate->bindValue(':value', serialize($data)); - $Qupdate->bindValue(':updated', time()); - $Qupdate->execute(); + $blob = serialize($data); - return $this; + $stmt = $this->db->prepare('INSERT OR REPLACE INTO storage (key, value, updated) VALUES (:key, :value, :updated)'); + $stmt->bindValue(':key', $this->getCacheKey()); + $stmt->bindValue(':value', $blob, \SQLITE3_BLOB); + $stmt->bindValue(':updated', time()); + $stmt->execute(); } - public function getTime() + public function getTime(): ?int { - $Qselect = $this->db->prepare('SELECT updated FROM storage WHERE key = :key'); - $Qselect->bindValue(':key', $this->getCacheKey()); - $result = $Qselect->execute(); - if ($result instanceof \SQLite3Result) { - $data = $result->fetchArray(SQLITE3_ASSOC); - if (isset($data['updated'])) { - return $data['updated']; + $stmt = $this->db->prepare('SELECT updated FROM storage WHERE key = :key'); + $stmt->bindValue(':key', $this->getCacheKey()); + $result = $stmt->execute(); + if ($result) { + $row = $result->fetchArray(\SQLITE3_ASSOC); + if ($row !== false) { + return $row['updated']; } } - return null; } - public function purgeCache($seconds) + public function purgeCache(int $timeout = 86400): void { - $Qdelete = $this->db->prepare('DELETE FROM storage WHERE updated < :expired'); - $Qdelete->bindValue(':expired', time() - $seconds); - $Qdelete->execute(); + if (!$this->config['enable_purge']) { + return; + } + $stmt = $this->db->prepare('DELETE FROM storage WHERE updated < :expired'); + $stmt->bindValue(':expired', time() - $timeout); + $stmt->execute(); } - public function setScope($scope) + public function setScope(string $scope): void { - if (is_null($scope) || !is_string($scope)) { - throw new \Exception('The given scope is invalid!'); - } - $this->scope = $scope; - return $this; } - public function setKey($key) + public function setKey(array $key): void { - if (!empty($key) && is_array($key)) { - $key = array_map('strtolower', $key); - } - $key = json_encode($key); - - if (!is_string($key)) { - throw new \Exception('The given key is invalid!'); - } - - $this->key = $key; - return $this; + $this->key = json_encode($key); } private function getCacheKey() { - if (is_null($this->key)) { - throw new \Exception('Call "setKey" first!'); - } - return hash('sha1', $this->scope . $this->key, true); } } diff --git a/composer.lock b/composer.lock index 426a59b496e..39e9cfc46ff 100644 --- a/composer.lock +++ b/composer.lock @@ -4,35 +4,35 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "0d00dfa7c120bdd750ac0ac4a45f5452", + "content-hash": "24083060ddb8be9a95e75f6596e3bb83", "packages": [], "packages-dev": [ { "name": "doctrine/instantiator", - "version": "1.4.1", + "version": "1.5.0", "source": { "type": "git", "url": "https://github.com/doctrine/instantiator.git", - "reference": "10dcfce151b967d20fde1b34ae6640712c3891bc" + "reference": "0a0fa9780f5d4e507415a065172d26a98d02047b" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/doctrine/instantiator/zipball/10dcfce151b967d20fde1b34ae6640712c3891bc", - "reference": "10dcfce151b967d20fde1b34ae6640712c3891bc", + "url": "https://api.github.com/repos/doctrine/instantiator/zipball/0a0fa9780f5d4e507415a065172d26a98d02047b", + "reference": "0a0fa9780f5d4e507415a065172d26a98d02047b", "shasum": "" }, "require": { "php": "^7.1 || ^8.0" }, "require-dev": { - "doctrine/coding-standard": "^9", + "doctrine/coding-standard": "^9 || ^11", "ext-pdo": "*", "ext-phar": "*", "phpbench/phpbench": "^0.16 || ^1", "phpstan/phpstan": "^1.4", "phpstan/phpstan-phpunit": "^1", "phpunit/phpunit": "^7.5 || ^8.5 || ^9.5", - "vimeo/psalm": "^4.22" + "vimeo/psalm": "^4.30 || ^5.4" }, "type": "library", "autoload": { @@ -59,7 +59,7 @@ ], "support": { "issues": "https://github.com/doctrine/instantiator/issues", - "source": "https://github.com/doctrine/instantiator/tree/1.4.1" + "source": "https://github.com/doctrine/instantiator/tree/1.5.0" }, "funding": [ { @@ -75,20 +75,20 @@ "type": "tidelift" } ], - "time": "2022-03-03T08:28:38+00:00" + "time": "2022-12-30T00:15:36+00:00" }, { "name": "myclabs/deep-copy", - "version": "1.11.0", + "version": "1.11.1", "source": { "type": "git", "url": "https://github.com/myclabs/DeepCopy.git", - "reference": "14daed4296fae74d9e3201d2c4925d1acb7aa614" + "reference": "7284c22080590fb39f2ffa3e9057f10a4ddd0e0c" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/14daed4296fae74d9e3201d2c4925d1acb7aa614", - "reference": "14daed4296fae74d9e3201d2c4925d1acb7aa614", + "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/7284c22080590fb39f2ffa3e9057f10a4ddd0e0c", + "reference": "7284c22080590fb39f2ffa3e9057f10a4ddd0e0c", "shasum": "" }, "require": { @@ -126,7 +126,7 @@ ], "support": { "issues": "https://github.com/myclabs/DeepCopy/issues", - "source": "https://github.com/myclabs/DeepCopy/tree/1.11.0" + "source": "https://github.com/myclabs/DeepCopy/tree/1.11.1" }, "funding": [ { @@ -134,20 +134,20 @@ "type": "tidelift" } ], - "time": "2022-03-03T13:19:32+00:00" + "time": "2023-03-08T13:26:56+00:00" }, { "name": "nikic/php-parser", - "version": "v4.13.2", + "version": "v4.16.0", "source": { "type": "git", "url": "https://github.com/nikic/PHP-Parser.git", - "reference": "210577fe3cf7badcc5814d99455df46564f3c077" + "reference": "19526a33fb561ef417e822e85f08a00db4059c17" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/nikic/PHP-Parser/zipball/210577fe3cf7badcc5814d99455df46564f3c077", - "reference": "210577fe3cf7badcc5814d99455df46564f3c077", + "url": "https://api.github.com/repos/nikic/PHP-Parser/zipball/19526a33fb561ef417e822e85f08a00db4059c17", + "reference": "19526a33fb561ef417e822e85f08a00db4059c17", "shasum": "" }, "require": { @@ -188,9 +188,9 @@ ], "support": { "issues": "https://github.com/nikic/PHP-Parser/issues", - "source": "https://github.com/nikic/PHP-Parser/tree/v4.13.2" + "source": "https://github.com/nikic/PHP-Parser/tree/v4.16.0" }, - "time": "2021-11-30T19:35:32+00:00" + "time": "2023-06-25T14:52:30+00:00" }, { "name": "phar-io/manifest", @@ -303,252 +303,25 @@ }, "time": "2022-02-21T01:04:05+00:00" }, - { - "name": "phpdocumentor/reflection-common", - "version": "2.2.0", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/ReflectionCommon.git", - "reference": "1d01c49d4ed62f25aa84a747ad35d5a16924662b" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionCommon/zipball/1d01c49d4ed62f25aa84a747ad35d5a16924662b", - "reference": "1d01c49d4ed62f25aa84a747ad35d5a16924662b", - "shasum": "" - }, - "require": { - "php": "^7.2 || ^8.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-2.x": "2.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Jaap van Otterdijk", - "email": "opensource@ijaap.nl" - } - ], - "description": "Common reflection classes used by phpdocumentor to reflect the code structure", - "homepage": "http://www.phpdoc.org", - "keywords": [ - "FQSEN", - "phpDocumentor", - "phpdoc", - "reflection", - "static analysis" - ], - "support": { - "issues": "https://github.com/phpDocumentor/ReflectionCommon/issues", - "source": "https://github.com/phpDocumentor/ReflectionCommon/tree/2.x" - }, - "time": "2020-06-27T09:03:43+00:00" - }, - { - "name": "phpdocumentor/reflection-docblock", - "version": "5.3.0", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/ReflectionDocBlock.git", - "reference": "622548b623e81ca6d78b721c5e029f4ce664f170" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/622548b623e81ca6d78b721c5e029f4ce664f170", - "reference": "622548b623e81ca6d78b721c5e029f4ce664f170", - "shasum": "" - }, - "require": { - "ext-filter": "*", - "php": "^7.2 || ^8.0", - "phpdocumentor/reflection-common": "^2.2", - "phpdocumentor/type-resolver": "^1.3", - "webmozart/assert": "^1.9.1" - }, - "require-dev": { - "mockery/mockery": "~1.3.2", - "psalm/phar": "^4.8" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "5.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": "src" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Mike van Riel", - "email": "me@mikevanriel.com" - }, - { - "name": "Jaap van Otterdijk", - "email": "account@ijaap.nl" - } - ], - "description": "With this component, a library can provide support for annotations via DocBlocks or otherwise retrieve information that is embedded in a DocBlock.", - "support": { - "issues": "https://github.com/phpDocumentor/ReflectionDocBlock/issues", - "source": "https://github.com/phpDocumentor/ReflectionDocBlock/tree/5.3.0" - }, - "time": "2021-10-19T17:43:47+00:00" - }, - { - "name": "phpdocumentor/type-resolver", - "version": "1.6.1", - "source": { - "type": "git", - "url": "https://github.com/phpDocumentor/TypeResolver.git", - "reference": "77a32518733312af16a44300404e945338981de3" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/TypeResolver/zipball/77a32518733312af16a44300404e945338981de3", - "reference": "77a32518733312af16a44300404e945338981de3", - "shasum": "" - }, - "require": { - "php": "^7.2 || ^8.0", - "phpdocumentor/reflection-common": "^2.0" - }, - "require-dev": { - "ext-tokenizer": "*", - "psalm/phar": "^4.8" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-1.x": "1.x-dev" - } - }, - "autoload": { - "psr-4": { - "phpDocumentor\\Reflection\\": "src" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Mike van Riel", - "email": "me@mikevanriel.com" - } - ], - "description": "A PSR-5 based resolver of Class names, Types and Structural Element Names", - "support": { - "issues": "https://github.com/phpDocumentor/TypeResolver/issues", - "source": "https://github.com/phpDocumentor/TypeResolver/tree/1.6.1" - }, - "time": "2022-03-15T21:29:03+00:00" - }, - { - "name": "phpspec/prophecy", - "version": "v1.15.0", - "source": { - "type": "git", - "url": "https://github.com/phpspec/prophecy.git", - "reference": "bbcd7380b0ebf3961ee21409db7b38bc31d69a13" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/phpspec/prophecy/zipball/bbcd7380b0ebf3961ee21409db7b38bc31d69a13", - "reference": "bbcd7380b0ebf3961ee21409db7b38bc31d69a13", - "shasum": "" - }, - "require": { - "doctrine/instantiator": "^1.2", - "php": "^7.2 || ~8.0, <8.2", - "phpdocumentor/reflection-docblock": "^5.2", - "sebastian/comparator": "^3.0 || ^4.0", - "sebastian/recursion-context": "^3.0 || ^4.0" - }, - "require-dev": { - "phpspec/phpspec": "^6.0 || ^7.0", - "phpunit/phpunit": "^8.0 || ^9.0" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.x-dev" - } - }, - "autoload": { - "psr-4": { - "Prophecy\\": "src/Prophecy" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Konstantin Kudryashov", - "email": "ever.zet@gmail.com", - "homepage": "http://everzet.com" - }, - { - "name": "Marcello Duarte", - "email": "marcello.duarte@gmail.com" - } - ], - "description": "Highly opinionated mocking framework for PHP 5.3+", - "homepage": "https://github.com/phpspec/prophecy", - "keywords": [ - "Double", - "Dummy", - "fake", - "mock", - "spy", - "stub" - ], - "support": { - "issues": "https://github.com/phpspec/prophecy/issues", - "source": "https://github.com/phpspec/prophecy/tree/v1.15.0" - }, - "time": "2021-12-08T12:19:24+00:00" - }, { "name": "phpunit/php-code-coverage", - "version": "9.2.15", + "version": "9.2.26", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/php-code-coverage.git", - "reference": "2e9da11878c4202f97915c1cb4bb1ca318a63f5f" + "reference": "443bc6912c9bd5b409254a40f4b0f4ced7c80ea1" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/2e9da11878c4202f97915c1cb4bb1ca318a63f5f", - "reference": "2e9da11878c4202f97915c1cb4bb1ca318a63f5f", + "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/443bc6912c9bd5b409254a40f4b0f4ced7c80ea1", + "reference": "443bc6912c9bd5b409254a40f4b0f4ced7c80ea1", "shasum": "" }, "require": { "ext-dom": "*", "ext-libxml": "*", "ext-xmlwriter": "*", - "nikic/php-parser": "^4.13.0", + "nikic/php-parser": "^4.15", "php": ">=7.3", "phpunit/php-file-iterator": "^3.0.3", "phpunit/php-text-template": "^2.0.2", @@ -563,8 +336,8 @@ "phpunit/phpunit": "^9.3" }, "suggest": { - "ext-pcov": "*", - "ext-xdebug": "*" + "ext-pcov": "PHP extension that provides line coverage", + "ext-xdebug": "PHP extension that provides line coverage as well as branch and path coverage" }, "type": "library", "extra": { @@ -597,7 +370,7 @@ ], "support": { "issues": "https://github.com/sebastianbergmann/php-code-coverage/issues", - "source": "https://github.com/sebastianbergmann/php-code-coverage/tree/9.2.15" + "source": "https://github.com/sebastianbergmann/php-code-coverage/tree/9.2.26" }, "funding": [ { @@ -605,7 +378,7 @@ "type": "github" } ], - "time": "2022-03-07T09:28:20+00:00" + "time": "2023-03-06T12:58:08+00:00" }, { "name": "phpunit/php-file-iterator", @@ -850,20 +623,20 @@ }, { "name": "phpunit/phpunit", - "version": "9.5.20", + "version": "9.6.9", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "12bc8879fb65aef2138b26fc633cb1e3620cffba" + "reference": "a9aceaf20a682aeacf28d582654a1670d8826778" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/12bc8879fb65aef2138b26fc633cb1e3620cffba", - "reference": "12bc8879fb65aef2138b26fc633cb1e3620cffba", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/a9aceaf20a682aeacf28d582654a1670d8826778", + "reference": "a9aceaf20a682aeacf28d582654a1670d8826778", "shasum": "" }, "require": { - "doctrine/instantiator": "^1.3.1", + "doctrine/instantiator": "^1.3.1 || ^2", "ext-dom": "*", "ext-json": "*", "ext-libxml": "*", @@ -874,7 +647,6 @@ "phar-io/manifest": "^2.0.3", "phar-io/version": "^3.0.2", "php": ">=7.3", - "phpspec/prophecy": "^1.12.1", "phpunit/php-code-coverage": "^9.2.13", "phpunit/php-file-iterator": "^3.0.5", "phpunit/php-invoker": "^3.1.1", @@ -882,23 +654,19 @@ "phpunit/php-timer": "^5.0.2", "sebastian/cli-parser": "^1.0.1", "sebastian/code-unit": "^1.0.6", - "sebastian/comparator": "^4.0.5", + "sebastian/comparator": "^4.0.8", "sebastian/diff": "^4.0.3", "sebastian/environment": "^5.1.3", - "sebastian/exporter": "^4.0.3", + "sebastian/exporter": "^4.0.5", "sebastian/global-state": "^5.0.1", "sebastian/object-enumerator": "^4.0.3", "sebastian/resource-operations": "^3.0.3", - "sebastian/type": "^3.0", + "sebastian/type": "^3.2", "sebastian/version": "^3.0.2" }, - "require-dev": { - "ext-pdo": "*", - "phpspec/prophecy-phpunit": "^2.0.1" - }, "suggest": { - "ext-soap": "*", - "ext-xdebug": "*" + "ext-soap": "To be able to generate mocks based on WSDL files", + "ext-xdebug": "PHP extension that provides line coverage as well as branch and path coverage" }, "bin": [ "phpunit" @@ -906,7 +674,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "9.5-dev" + "dev-master": "9.6-dev" } }, "autoload": { @@ -937,7 +705,8 @@ ], "support": { "issues": "https://github.com/sebastianbergmann/phpunit/issues", - "source": "https://github.com/sebastianbergmann/phpunit/tree/9.5.20" + "security": "https://github.com/sebastianbergmann/phpunit/security/policy", + "source": "https://github.com/sebastianbergmann/phpunit/tree/9.6.9" }, "funding": [ { @@ -947,9 +716,13 @@ { "url": "https://github.com/sebastianbergmann", "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/phpunit/phpunit", + "type": "tidelift" } ], - "time": "2022-04-01T12:37:26+00:00" + "time": "2023-06-11T06:13:56+00:00" }, { "name": "sebastian/cli-parser", @@ -1120,16 +893,16 @@ }, { "name": "sebastian/comparator", - "version": "4.0.6", + "version": "4.0.8", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/comparator.git", - "reference": "55f4261989e546dc112258c7a75935a81a7ce382" + "reference": "fa0f136dd2334583309d32b62544682ee972b51a" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/55f4261989e546dc112258c7a75935a81a7ce382", - "reference": "55f4261989e546dc112258c7a75935a81a7ce382", + "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/fa0f136dd2334583309d32b62544682ee972b51a", + "reference": "fa0f136dd2334583309d32b62544682ee972b51a", "shasum": "" }, "require": { @@ -1182,7 +955,7 @@ ], "support": { "issues": "https://github.com/sebastianbergmann/comparator/issues", - "source": "https://github.com/sebastianbergmann/comparator/tree/4.0.6" + "source": "https://github.com/sebastianbergmann/comparator/tree/4.0.8" }, "funding": [ { @@ -1190,7 +963,7 @@ "type": "github" } ], - "time": "2020-10-26T15:49:45+00:00" + "time": "2022-09-14T12:41:17+00:00" }, { "name": "sebastian/complexity", @@ -1251,16 +1024,16 @@ }, { "name": "sebastian/diff", - "version": "4.0.4", + "version": "4.0.5", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/diff.git", - "reference": "3461e3fccc7cfdfc2720be910d3bd73c69be590d" + "reference": "74be17022044ebaaecfdf0c5cd504fc9cd5a7131" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/3461e3fccc7cfdfc2720be910d3bd73c69be590d", - "reference": "3461e3fccc7cfdfc2720be910d3bd73c69be590d", + "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/74be17022044ebaaecfdf0c5cd504fc9cd5a7131", + "reference": "74be17022044ebaaecfdf0c5cd504fc9cd5a7131", "shasum": "" }, "require": { @@ -1305,7 +1078,7 @@ ], "support": { "issues": "https://github.com/sebastianbergmann/diff/issues", - "source": "https://github.com/sebastianbergmann/diff/tree/4.0.4" + "source": "https://github.com/sebastianbergmann/diff/tree/4.0.5" }, "funding": [ { @@ -1313,20 +1086,20 @@ "type": "github" } ], - "time": "2020-10-26T13:10:38+00:00" + "time": "2023-05-07T05:35:17+00:00" }, { "name": "sebastian/environment", - "version": "5.1.4", + "version": "5.1.5", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/environment.git", - "reference": "1b5dff7bb151a4db11d49d90e5408e4e938270f7" + "reference": "830c43a844f1f8d5b7a1f6d6076b784454d8b7ed" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/1b5dff7bb151a4db11d49d90e5408e4e938270f7", - "reference": "1b5dff7bb151a4db11d49d90e5408e4e938270f7", + "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/830c43a844f1f8d5b7a1f6d6076b784454d8b7ed", + "reference": "830c43a844f1f8d5b7a1f6d6076b784454d8b7ed", "shasum": "" }, "require": { @@ -1368,7 +1141,7 @@ ], "support": { "issues": "https://github.com/sebastianbergmann/environment/issues", - "source": "https://github.com/sebastianbergmann/environment/tree/5.1.4" + "source": "https://github.com/sebastianbergmann/environment/tree/5.1.5" }, "funding": [ { @@ -1376,20 +1149,20 @@ "type": "github" } ], - "time": "2022-04-03T09:37:03+00:00" + "time": "2023-02-03T06:03:51+00:00" }, { "name": "sebastian/exporter", - "version": "4.0.4", + "version": "4.0.5", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/exporter.git", - "reference": "65e8b7db476c5dd267e65eea9cab77584d3cfff9" + "reference": "ac230ed27f0f98f597c8a2b6eb7ac563af5e5b9d" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/65e8b7db476c5dd267e65eea9cab77584d3cfff9", - "reference": "65e8b7db476c5dd267e65eea9cab77584d3cfff9", + "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/ac230ed27f0f98f597c8a2b6eb7ac563af5e5b9d", + "reference": "ac230ed27f0f98f597c8a2b6eb7ac563af5e5b9d", "shasum": "" }, "require": { @@ -1445,7 +1218,7 @@ ], "support": { "issues": "https://github.com/sebastianbergmann/exporter/issues", - "source": "https://github.com/sebastianbergmann/exporter/tree/4.0.4" + "source": "https://github.com/sebastianbergmann/exporter/tree/4.0.5" }, "funding": [ { @@ -1453,7 +1226,7 @@ "type": "github" } ], - "time": "2021-11-11T14:18:36+00:00" + "time": "2022-09-14T06:03:37+00:00" }, { "name": "sebastian/global-state", @@ -1690,16 +1463,16 @@ }, { "name": "sebastian/recursion-context", - "version": "4.0.4", + "version": "4.0.5", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/recursion-context.git", - "reference": "cd9d8cf3c5804de4341c283ed787f099f5506172" + "reference": "e75bd0f07204fec2a0af9b0f3cfe97d05f92efc1" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/cd9d8cf3c5804de4341c283ed787f099f5506172", - "reference": "cd9d8cf3c5804de4341c283ed787f099f5506172", + "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/e75bd0f07204fec2a0af9b0f3cfe97d05f92efc1", + "reference": "e75bd0f07204fec2a0af9b0f3cfe97d05f92efc1", "shasum": "" }, "require": { @@ -1738,10 +1511,10 @@ } ], "description": "Provides functionality to recursively process PHP variables", - "homepage": "http://www.github.com/sebastianbergmann/recursion-context", + "homepage": "https://github.com/sebastianbergmann/recursion-context", "support": { "issues": "https://github.com/sebastianbergmann/recursion-context/issues", - "source": "https://github.com/sebastianbergmann/recursion-context/tree/4.0.4" + "source": "https://github.com/sebastianbergmann/recursion-context/tree/4.0.5" }, "funding": [ { @@ -1749,7 +1522,7 @@ "type": "github" } ], - "time": "2020-10-26T13:17:30+00:00" + "time": "2023-02-03T06:07:39+00:00" }, { "name": "sebastian/resource-operations", @@ -1808,16 +1581,16 @@ }, { "name": "sebastian/type", - "version": "3.0.0", + "version": "3.2.1", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/type.git", - "reference": "b233b84bc4465aff7b57cf1c4bc75c86d00d6dad" + "reference": "75e2c2a32f5e0b3aef905b9ed0b179b953b3d7c7" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/type/zipball/b233b84bc4465aff7b57cf1c4bc75c86d00d6dad", - "reference": "b233b84bc4465aff7b57cf1c4bc75c86d00d6dad", + "url": "https://api.github.com/repos/sebastianbergmann/type/zipball/75e2c2a32f5e0b3aef905b9ed0b179b953b3d7c7", + "reference": "75e2c2a32f5e0b3aef905b9ed0b179b953b3d7c7", "shasum": "" }, "require": { @@ -1829,7 +1602,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "3.0-dev" + "dev-master": "3.2-dev" } }, "autoload": { @@ -1852,7 +1625,7 @@ "homepage": "https://github.com/sebastianbergmann/type", "support": { "issues": "https://github.com/sebastianbergmann/type/issues", - "source": "https://github.com/sebastianbergmann/type/tree/3.0.0" + "source": "https://github.com/sebastianbergmann/type/tree/3.2.1" }, "funding": [ { @@ -1860,7 +1633,7 @@ "type": "github" } ], - "time": "2022-03-15T09:54:48+00:00" + "time": "2023-02-03T06:13:03+00:00" }, { "name": "sebastian/version", @@ -1917,16 +1690,16 @@ }, { "name": "squizlabs/php_codesniffer", - "version": "3.6.2", + "version": "3.7.2", "source": { "type": "git", "url": "https://github.com/squizlabs/PHP_CodeSniffer.git", - "reference": "5e4e71592f69da17871dba6e80dd51bce74a351a" + "reference": "ed8e00df0a83aa96acf703f8c2979ff33341f879" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/squizlabs/PHP_CodeSniffer/zipball/5e4e71592f69da17871dba6e80dd51bce74a351a", - "reference": "5e4e71592f69da17871dba6e80dd51bce74a351a", + "url": "https://api.github.com/repos/squizlabs/PHP_CodeSniffer/zipball/ed8e00df0a83aa96acf703f8c2979ff33341f879", + "reference": "ed8e00df0a83aa96acf703f8c2979ff33341f879", "shasum": "" }, "require": { @@ -1962,96 +1735,15 @@ "homepage": "https://github.com/squizlabs/PHP_CodeSniffer", "keywords": [ "phpcs", - "standards" + "standards", + "static analysis" ], "support": { "issues": "https://github.com/squizlabs/PHP_CodeSniffer/issues", "source": "https://github.com/squizlabs/PHP_CodeSniffer", "wiki": "https://github.com/squizlabs/PHP_CodeSniffer/wiki" }, - "time": "2021-12-12T21:44:58+00:00" - }, - { - "name": "symfony/polyfill-ctype", - "version": "v1.25.0", - "source": { - "type": "git", - "url": "https://github.com/symfony/polyfill-ctype.git", - "reference": "30885182c981ab175d4d034db0f6f469898070ab" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/30885182c981ab175d4d034db0f6f469898070ab", - "reference": "30885182c981ab175d4d034db0f6f469898070ab", - "shasum": "" - }, - "require": { - "php": ">=7.1" - }, - "provide": { - "ext-ctype": "*" - }, - "suggest": { - "ext-ctype": "For best performance" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-main": "1.23-dev" - }, - "thanks": { - "name": "symfony/polyfill", - "url": "https://github.com/symfony/polyfill" - } - }, - "autoload": { - "files": [ - "bootstrap.php" - ], - "psr-4": { - "Symfony\\Polyfill\\Ctype\\": "" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Gert de Pagter", - "email": "BackEndTea@gmail.com" - }, - { - "name": "Symfony Community", - "homepage": "https://symfony.com/contributors" - } - ], - "description": "Symfony polyfill for ctype functions", - "homepage": "https://symfony.com", - "keywords": [ - "compatibility", - "ctype", - "polyfill", - "portable" - ], - "support": { - "source": "https://github.com/symfony/polyfill-ctype/tree/v1.25.0" - }, - "funding": [ - { - "url": "https://symfony.com/sponsor", - "type": "custom" - }, - { - "url": "https://github.com/fabpot", - "type": "github" - }, - { - "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", - "type": "tidelift" - } - ], - "time": "2021-10-20T20:35:02+00:00" + "time": "2023-02-22T23:07:41+00:00" }, { "name": "theseer/tokenizer", @@ -2102,64 +1794,6 @@ } ], "time": "2021-07-28T10:34:58+00:00" - }, - { - "name": "webmozart/assert", - "version": "1.10.0", - "source": { - "type": "git", - "url": "https://github.com/webmozarts/assert.git", - "reference": "6964c76c7804814a842473e0c8fd15bab0f18e25" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/webmozarts/assert/zipball/6964c76c7804814a842473e0c8fd15bab0f18e25", - "reference": "6964c76c7804814a842473e0c8fd15bab0f18e25", - "shasum": "" - }, - "require": { - "php": "^7.2 || ^8.0", - "symfony/polyfill-ctype": "^1.8" - }, - "conflict": { - "phpstan/phpstan": "<0.12.20", - "vimeo/psalm": "<4.6.1 || 4.6.2" - }, - "require-dev": { - "phpunit/phpunit": "^8.5.13" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-master": "1.10-dev" - } - }, - "autoload": { - "psr-4": { - "Webmozart\\Assert\\": "src/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Bernhard Schussek", - "email": "bschussek@gmail.com" - } - ], - "description": "Assertions to validate method input/output with nice error messages.", - "keywords": [ - "assert", - "check", - "validate" - ], - "support": { - "issues": "https://github.com/webmozarts/assert/issues", - "source": "https://github.com/webmozarts/assert/tree/1.10.0" - }, - "time": "2021-03-09T10:59:23+00:00" } ], "aliases": [], @@ -2174,8 +1808,9 @@ "ext-openssl": "*", "ext-libxml": "*", "ext-simplexml": "*", - "ext-json": "*" + "ext-json": "*", + "ext-intl": "*" }, "platform-dev": [], - "plugin-api-version": "2.3.0" + "plugin-api-version": "2.0.0" } diff --git a/config.default.ini.php b/config.default.ini.php index 3a347036d60..d0c508f4481 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -6,6 +6,27 @@ [system] +; Only these bridges are available for feed production +; How to enable all bridges: enabled_bridges[] = * +enabled_bridges[] = CssSelectorBridge +enabled_bridges[] = FeedMerge +enabled_bridges[] = FeedReducerBridge +enabled_bridges[] = Filter +enabled_bridges[] = GettrBridge +enabled_bridges[] = MastodonBridge +enabled_bridges[] = Reddit +enabled_bridges[] = RumbleBridge +enabled_bridges[] = SoundcloudBridge +enabled_bridges[] = Telegram +enabled_bridges[] = ThePirateBay +enabled_bridges[] = TikTokBridge +enabled_bridges[] = Twitch +enabled_bridges[] = Twitter +enabled_bridges[] = Vk +enabled_bridges[] = XPathBridge +enabled_bridges[] = Youtube +enabled_bridges[] = YouTubeCommunityTabBridge + ; Defines the timezone used by RSS-Bridge ; Find a list of supported timezones at ; https://www.php.net/manual/en/timezones.php @@ -22,6 +43,9 @@ ; debug_mode_whitelist[] = 127.0.0.1 ; debug_mode_whitelist[] = 192.168.1.10 +; Whether to enable maintenance mode. If enabled, feed requests receive 503 Service Unavailable +enable_maintenance_mode = false + [http] timeout = 60 useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" @@ -110,7 +134,12 @@ enable_purge = true [SQLiteCache] +; Filepath of the sqlite db file file = "cache.sqlite" +; Whether to actually delete data when purging +enable_purge = true +; Busy wait in ms before timing out +timeout = 5000 [MemcachedCache] host = "localhost" diff --git a/contrib/prepare_release/fetch_contributors.php b/contrib/prepare_release/fetch_contributors.php index ad04458ad67..cfe2c5b29cb 100644 --- a/contrib/prepare_release/fetch_contributors.php +++ b/contrib/prepare_release/fetch_contributors.php @@ -14,7 +14,8 @@ 'Content-Type' => 'application/json', 'User-Agent' => 'RSS-Bridge', ]; - $result = _http_request($url, ['headers' => $headers]); + $httpClient = new CurlHttpClient(); + $result = $httpClient->request($url, ['headers' => $headers]); foreach (json_decode($result['body']) as $contributor) { $contributors[] = $contributor; diff --git a/docs/03_For_Hosts/03_Docker_Installation.md b/docs/03_For_Hosts/03_Docker_Installation.md index 12b75c482f9..d895e748bfb 100644 --- a/docs/03_For_Hosts/03_Docker_Installation.md +++ b/docs/03_For_Hosts/03_Docker_Installation.md @@ -45,5 +45,5 @@ services: If you want to add a bridge that is not part of [`/bridges`](https://github.com/RSS-Bridge/rss-bridge/tree/master/bridges), you can map a folder to the `/config` folder of the `rss-bridge` container. 1. Create a folder in the location of your docker-compose.yml or your general docker working area (in this example it will be `/home/docker/rssbridge/config` ). -2. Copy your [custom bridges](../05_Bridge_API/01_How_to_create_a_new_bridge.md) to the `/home/docker/rssbridge/config` folder. You can also add your custom [whitelist.txt](../03_For_Hosts/05_Whitelisting.md) file and your custom [config.ini.php](../03_For_Hosts/08_Custom_Configuration.md) to this folder. +2. Copy your [custom bridges](../05_Bridge_API/01_How_to_create_a_new_bridge.md) to the `/home/docker/rssbridge/config` folder. Applies also to [config.ini.php](../03_For_Hosts/08_Custom_Configuration.md). 3. Map the folder to `/config` inside the container. To do that, replace the `` from the previous examples with `/home/docker/rssbridge/config` \ No newline at end of file diff --git a/docs/03_For_Hosts/04_Heroku_Installation.md b/docs/03_For_Hosts/04_Heroku_Installation.md index ecda833d514..f79c2a969d4 100644 --- a/docs/03_For_Hosts/04_Heroku_Installation.md +++ b/docs/03_For_Hosts/04_Heroku_Installation.md @@ -14,7 +14,7 @@ You can simply press the button below to easily deploy RSS Bridge on Heroku and ![image](../images/fork_button.png) -2. To customise what bridges can be used if need, create a `whitelist.txt` file in your fork and follow the instructions given [here](../03_For_Hosts/05_Whitelisting.md). You don’t need to do this if you’re fine with the default bridges. +2. To customise what bridges can be used if need, see [here](../03_For_Hosts/05_Whitelisting.md). You don’t need to do this if you’re fine with the default bridges. 3. [Log in to Heroku](https://dashboard.heroku.com) and create a new app. The app name will be the URL of the RSS Bridge (appname.herokuapp.com) diff --git a/docs/03_For_Hosts/05_Whitelisting.md b/docs/03_For_Hosts/05_Whitelisting.md index 46e029edd1b..113c4e3d43b 100644 --- a/docs/03_For_Hosts/05_Whitelisting.md +++ b/docs/03_For_Hosts/05_Whitelisting.md @@ -1,38 +1,26 @@ -RSS-Bridge supports whitelists in order to limit the available bridges on your web server. +Modify `config.ini.php` to limit available bridges. -A default whitelist file (`whitelist.default.txt`) is shipped with RSS-Bridge. Please do not edit this file, as it gets replaced when upgrading RSS-Bridge! +## Enable all bridges -You should, however, use this file as template to create your own whitelist (or leave it as is, to keep the default bridges). In order to create your own whitelist perform following actions: - -* Copy the file `whitelist.default.txt` in the RSS-Bridge root folder -* Rename the new file to `whitelist.txt` -* Change the lines to satisfy your requirements - -RSS-Bridge will automatically detect the `whitelist.txt` and use it. If the file doesn't exist it will default to `whitelist.default.txt` automatically. - -# Specific whitelisting +``` +enabled_bridges[] = * +``` -In order to specifically whitelist bridges, open `whitelist.txt` and add one line for each bridge you want to show. Make sure you use normal [line-feeds](https://en.wikipedia.org/wiki/Newline "Line-feed") at the end of a line (LF not [CRLF](https://en.wikipedia.org/wiki/Carriage_return "Carriage-return line-feed")). The bridge name must match the filename of the bridge in the bridges folder (see [folder structure](../04_For_Developers/03_Folder_structure.md)). The name may or may not include the 'Bridge' part. +## Enable some bridges -**Examples**: -```TEXT -FacebookBridge -WikipediaBridge -TwitterBridge +``` +enabled_bridges[] = TwitchBridge +enabled_bridges[] = GettrBridge ``` -or +## Enable all bridges (legacy shortcut) -```TEXT -Facebook -Wikipedia -Twitter +``` +echo '*' > whitelist.txt ``` -# Global whitelisting - -In order to globally whitelist all bridges, open the `whitelist.txt` file, remove all contents and just write an asterisk `*` into the file (only this one character). +## Enable some bridges (legacy shortcut) -```TEXT -* -``` \ No newline at end of file +``` +echo -e "TwitchBridge\nTwitterBridge" > whitelist.txt +``` diff --git a/docs/07_Cache_API/01_How_to_create_a_new_cache.md b/docs/07_Cache_API/01_How_to_create_a_new_cache.md index fe171718d91..cfc30308af3 100644 --- a/docs/07_Cache_API/01_How_to_create_a_new_cache.md +++ b/docs/07_Cache_API/01_How_to_create_a_new_cache.md @@ -1,24 +1,3 @@ Create a new file in the `caches/` folder (see [Folder structure](../04_For_Developers/03_Folder_structure.md)). -The file must be named according to following specification: - -* It starts with the type -* The file name must end with 'Cache' -* The file type must be PHP, written in small letters (seriously!) ".php" - -**Examples:** - -Type | Filename ------|--------- -File | FileCache.php -MySQL | MySQLCache.php - -The file must start with the PHP tags and end with an empty line. The closing tag `?>` is [omitted](http://php.net/basic-syntax.instruction-separation). - -Example: - -```PHP -queriedContext; } + $needle = $this->inputs[$this->queriedContext][$input]['value']; foreach (static::PARAMETERS[$context][$input]['values'] as $first_level_key => $first_level_value) { - if ($needle === (string)$first_level_value) { + if (!is_array($first_level_value) && $needle === (string)$first_level_value) { return $first_level_key; } elseif (is_array($first_level_value)) { foreach ($first_level_value as $second_level_key => $second_level_value) { @@ -408,39 +409,27 @@ public function detectParameters($url) /** * Loads a cached value for the specified key * - * @param string $key Key name - * @param int $duration Cache duration (optional) + * @param int $timeout Cache duration (optional) * @return mixed Cached value or null if the key doesn't exist or has expired */ - protected function loadCacheValue($key, $duration = null) + protected function loadCacheValue(string $key, int $timeout = 86400) { - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); - // Create class name without the namespace part - $scope = $this->getShortName(); - $cache->setScope($scope); - $cache->setKey($key); - if ($duration && $cache->getTime() < time() - $duration) { - return null; - } - return $cache->loadData(); + $cache = RssBridge::getCache(); + $cache->setScope($this->getShortName()); + $cache->setKey([$key]); + return $cache->loadData($timeout); } /** * Stores a value to cache with the specified key * - * @param string $key Key name * @param mixed $value Value to cache */ - protected function saveCacheValue($key, $value) + protected function saveCacheValue(string $key, $value) { - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); - $scope = $this->getShortName(); - $cache->setScope($scope); - $cache->setKey($key); + $cache = RssBridge::getCache(); + $cache->setScope($this->getShortName()); + $cache->setKey([$key]); $cache->saveData($value); } diff --git a/lib/BridgeFactory.php b/lib/BridgeFactory.php index 6bd832bf9ee..db2c394a2c8 100644 --- a/lib/BridgeFactory.php +++ b/lib/BridgeFactory.php @@ -2,101 +2,69 @@ final class BridgeFactory { - /** @var array> */ private $bridgeClassNames = []; - - /** @var array> */ - private $whitelist = []; + private $enabledBridges = []; public function __construct() { - // create names + // Create all possible bridge class names from fs foreach (scandir(__DIR__ . '/../bridges/') as $file) { if (preg_match('/^([^.]+Bridge)\.php$/U', $file, $m)) { $this->bridgeClassNames[] = $m[1]; } } - // create whitelist - if (file_exists(WHITELIST)) { - $contents = trim(file_get_contents(WHITELIST)); - } elseif (file_exists(WHITELIST_DEFAULT)) { - $contents = trim(file_get_contents(WHITELIST_DEFAULT)); - } else { - $contents = ''; + $enabledBridges = Configuration::getConfig('system', 'enabled_bridges'); + if ($enabledBridges === null) { + throw new \Exception('No bridges are enabled... wtf?'); } - - if ($contents === '*') { - // Whitelist all bridges - $this->whitelist = $this->getBridgeClassNames(); - } else { - foreach (explode("\n", $contents) as $bridgeName) { - $bridgeClassName = $this->sanitizeBridgeName($bridgeName); - if ($bridgeClassName !== null) { - $this->whitelist[] = $bridgeClassName; - } + foreach ($enabledBridges as $enabledBridge) { + if ($enabledBridge === '*') { + $this->enabledBridges = $this->bridgeClassNames; + break; } + $this->enabledBridges[] = $this->createBridgeClassName($enabledBridge); } } - /** - * @param class-string $name - */ public function create(string $name): BridgeInterface { return new $name(); } - /** - * @return array> - */ - public function getBridgeClassNames(): array + public function isEnabled(string $bridgeName): bool { - return $this->bridgeClassNames; + return in_array($bridgeName, $this->enabledBridges); } - /** - * @param class-string|null $name - */ - public function isWhitelisted(string $name): bool + public function createBridgeClassName(string $bridgeName): ?string { - return in_array($name, $this->whitelist); - } + $name = self::normalizeBridgeName($bridgeName); + $namesLoweredCase = array_map('strtolower', $this->bridgeClassNames); + $nameLoweredCase = strtolower($name); - /** - * Tries to turn a potentially human produced bridge name into a class name. - * - * @param mixed $name - * @return class-string|null - */ - public function sanitizeBridgeName($name): ?string - { - if (!is_string($name)) { - return null; + if (! in_array($nameLoweredCase, $namesLoweredCase)) { + throw new \Exception(sprintf('Bridge name invalid: %s', $bridgeName)); } - // Trim trailing '.php' if exists + $index = array_search($nameLoweredCase, $namesLoweredCase); + + return $this->bridgeClassNames[$index]; + } + + public static function normalizeBridgeName(string $name) + { if (preg_match('/(.+)(?:\.php)/', $name, $matches)) { $name = $matches[1]; } - - // Append 'Bridge' suffix if not present. if (!preg_match('/(Bridge)$/i', $name)) { $name = sprintf('%sBridge', $name); } + return $name; + } - // Improve performance for correctly written bridge names - if (in_array($name, $this->getBridgeClassNames())) { - $index = array_search($name, $this->getBridgeClassNames()); - return $this->getBridgeClassNames()[$index]; - } - - // The name is valid if a corresponding bridge file is found on disk - if (in_array(strtolower($name), array_map('strtolower', $this->getBridgeClassNames()))) { - $index = array_search(strtolower($name), array_map('strtolower', $this->getBridgeClassNames())); - return $this->getBridgeClassNames()[$index]; - } - - return null; + public function getBridgeClassNames(): array + { + return $this->bridgeClassNames; } } diff --git a/lib/CacheFactory.php b/lib/CacheFactory.php index 4bf6342cf7d..78a0e83e488 100644 --- a/lib/CacheFactory.php +++ b/lib/CacheFactory.php @@ -38,13 +38,39 @@ public function create(string $name = null): CacheInterface case NullCache::class: return new NullCache(); case FileCache::class: - return new FileCache([ - // Intentionally checking for "truthy" value + $fileCacheConfig = [ + // Intentionally checking for truthy value because the historic default value is the empty string 'path' => Configuration::getConfig('FileCache', 'path') ?: PATH_CACHE, 'enable_purge' => Configuration::getConfig('FileCache', 'enable_purge'), - ]); + ]; + if (!is_dir($fileCacheConfig['path'])) { + throw new \Exception(sprintf('The FileCache path does not exists: %s', $fileCacheConfig['path'])); + } + if (!is_writable($fileCacheConfig['path'])) { + throw new \Exception(sprintf('The FileCache path is not writable: %s', $fileCacheConfig['path'])); + } + return new FileCache($fileCacheConfig); case SQLiteCache::class: - return new SQLiteCache(); + if (!extension_loaded('sqlite3')) { + throw new \Exception('"sqlite3" extension not loaded. Please check "php.ini"'); + } + if (!is_writable(PATH_CACHE)) { + throw new \Exception('The cache folder is not writable'); + } + $file = Configuration::getConfig('SQLiteCache', 'file'); + if (!$file) { + throw new \Exception(sprintf('Configuration for %s missing.', 'SQLiteCache')); + } + if (dirname($file) == '.') { + $file = PATH_CACHE . $file; + } elseif (!is_dir(dirname($file))) { + throw new \Exception(sprintf('Invalid configuration for %s', 'SQLiteCache')); + } + return new SQLiteCache([ + 'file' => $file, + 'timeout' => Configuration::getConfig('SQLiteCache', 'timeout'), + 'enable_purge' => Configuration::getConfig('SQLiteCache', 'enable_purge'), + ]); case MemcachedCache::class: return new MemcachedCache(); default: diff --git a/lib/CacheInterface.php b/lib/CacheInterface.php index 1c3b89cadc1..85aa830f1f0 100644 --- a/lib/CacheInterface.php +++ b/lib/CacheInterface.php @@ -1,68 +1,16 @@ $envValue) { $nameParts = explode('_', $envName); if ($nameParts[0] === 'RSSBRIDGE') { @@ -105,22 +124,30 @@ public static function loadConfiguration(array $customConfig = [], array $env = // Invalid env name continue; } + + // The variable is named $header but it's actually the section in config.ini.php $header = $nameParts[1]; - $key = $nameParts[2]; + + // Recombine the key if it had multiple underscores + $key = implode('_', array_slice($nameParts, 2)); + $key = strtolower($key); + + // Handle this specifically because it's an array + if ($key === 'enabled_bridges') { + $envValue = explode(',', $envValue); + $envValue = array_map('trim', $envValue); + } + if ($envValue === 'true' || $envValue === 'false') { $envValue = filter_var($envValue, FILTER_VALIDATE_BOOLEAN); } + self::setConfig($header, $key, $envValue); } } - if (file_exists(__DIR__ . '/../DEBUG')) { - // The debug mode has been moved to config. Preserve existing installs which has this DEBUG file. - self::setConfig('system', 'enable_debug_mode', true); - $debug = trim(file_get_contents(__DIR__ . '/../DEBUG')); - if ($debug) { - self::setConfig('system', 'debug_mode_whitelist', explode("\n", str_replace("\r", '', $debug))); - } + if (!is_array(self::getConfig('system', 'enabled_bridges'))) { + self::throwConfigError('system', 'enabled_bridges', 'Is not an array'); } if ( @@ -193,9 +220,9 @@ public static function loadConfiguration(array $customConfig = [], array $env = } } - public static function getConfig(string $section, string $key) + public static function getConfig(string $section, string $key, $default = null) { - return self::$config[strtolower($section)][strtolower($key)] ?? null; + return self::$config[strtolower($section)][strtolower($key)] ?? $default; } private static function setConfig(string $section, string $key, $value): void diff --git a/lib/Debug.php b/lib/Debug.php index f6a8d1052b4..48dbb31a9dd 100644 --- a/lib/Debug.php +++ b/lib/Debug.php @@ -7,7 +7,7 @@ class Debug */ public static function isEnabled(): bool { - $ip = $_SERVER['REMOTE_ADDR']; + $ip = $_SERVER['REMOTE_ADDR'] ?? 'x.y.z.1'; $enableDebugMode = Configuration::getConfig('system', 'enable_debug_mode'); $debugModeWhitelist = Configuration::getConfig('system', 'debug_mode_whitelist') ?: []; if ($enableDebugMode && ($debugModeWhitelist === [] || in_array($ip, $debugModeWhitelist))) { diff --git a/lib/Logger.php b/lib/Logger.php index e15035fe503..5423f62c4d8 100644 --- a/lib/Logger.php +++ b/lib/Logger.php @@ -43,6 +43,7 @@ private static function log(string $level, string $message, array $context = []) $context['url'] = get_current_url(); $context['trace'] = trace_to_call_points(trace_from_exception($e)); // Don't log these exceptions + // todo: this logic belongs in log handler $ignoredExceptions = [ 'You must specify a format', 'Format name invalid', @@ -54,6 +55,10 @@ private static function log(string $level, string $message, array $context = []) 'Unable to find channel. The channel is non-existing or non-public', // fb 'This group is not public! RSS-Bridge only supports public groups!', + 'You must be logged in to view this page', + 'Unable to get the page id. You should consider getting the ID by hand', + // tiktok 404 + 'https://www.tiktok.com/@', ]; foreach ($ignoredExceptions as $ignoredException) { if (str_starts_with($e->getMessage(), $ignoredException)) { @@ -76,6 +81,6 @@ private static function log(string $level, string $message, array $context = []) // Log to file // todo: extract to log handler - //file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND); + // file_put_contents('/tmp/rss-bridge.log', $text, FILE_APPEND | LOCK_EX); } } diff --git a/lib/RssBridge.php b/lib/RssBridge.php index e6f1c9e4409..8969dc549af 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -2,6 +2,9 @@ final class RssBridge { + private static HttpClient $httpClient; + private static CacheInterface $cache; + public function main(array $argv = []) { if ($argv) { @@ -14,7 +17,7 @@ public function main(array $argv = []) try { $this->run($request); } catch (\Throwable $e) { - Logger::error('Exception in main', ['e' => $e]); + Logger::error(sprintf('Exception in RssBridge::main(): %s', create_sane_exception_message($e)), ['e' => $e]); http_response_code(500); print render(__DIR__ . '/../templates/error.html.php', ['e' => $e]); } @@ -52,7 +55,7 @@ private function run($request): void $error = error_get_last(); if ($error) { $message = sprintf( - 'Fatal Error %s: %s in %s line %s', + '(shutdown) %s: %s in %s line %s', $error['type'], sanitize_root($error['message']), sanitize_root($error['file']), @@ -69,6 +72,11 @@ private function run($request): void // Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED); date_default_timezone_set(Configuration::getConfig('system', 'timezone')); + $cacheFactory = new CacheFactory(); + + self::$httpClient = new CurlHttpClient(); + self::$cache = $cacheFactory->create(); + if (Configuration::getConfig('authentication', 'enable')) { $authenticationMiddleware = new AuthenticationMiddleware(); $authenticationMiddleware(); @@ -98,4 +106,14 @@ private function run($request): void $response->send(); } } + + public static function getHttpClient(): HttpClient + { + return self::$httpClient; + } + + public static function getCache(): CacheInterface + { + return self::$cache; + } } diff --git a/lib/TwitterClient.php b/lib/TwitterClient.php index fa8d765f837..0c6b95355eb 100644 --- a/lib/TwitterClient.php +++ b/lib/TwitterClient.php @@ -11,8 +11,95 @@ class TwitterClient public function __construct(CacheInterface $cache) { $this->cache = $cache; - $this->authorization = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'; - $this->data = $cache->loadData() ?? []; + + $cache->setScope('twitter'); + $cache->setKey(['cache']); + $cache->purgeCache(60 * 60 * 3); + + $this->data = $this->cache->loadData() ?? []; + $this->authorization = 'AAAAAAAAAAAAAAAAAAAAAGHtAgAAAAAA%2Bx7ILXNILCqkSGIzy6faIHZ9s3Q%3DQy97w6SIrzE7lQwPJEYQBsArEE2fC25caFwRBvAGi456G09vGR'; + } + + private function extractTweetAndUsersFromGraphQL($timeline) + { + if (isset($timeline->data->user)) { + $result = $timeline->data->user->result; + $instructions = $result->timeline_v2->timeline->instructions; + } else { + $result = $timeline->data->list->timeline_response; + $instructions = $result->timeline->instructions; + } + if (isset($result->__typename) && $result->__typename === 'UserUnavailable') { + throw new \Exception('UserUnavailable'); + } + $instructionTypes = [ + 'TimelineAddEntries', + 'TimelineClearCache', + 'TimelinePinEntry', // unclear purpose, maybe pinned tweet? + ]; + if (!isset($instructions[1]) && isset($timeline->data->user)) { + throw new \Exception('The account exists but has not tweeted yet?'); + } + + $entries = null; + foreach ($instructions as $instruction) { + $instructionType = ''; + if (isset($instruction->type)) { + $instructionType = $instruction->type; + } else { + $instructionType = $instruction->__typename; + } + + if ($instructionType === 'TimelineAddEntries') { + $entries = $instruction->entries; + break; + } + } + if (!$entries) { + throw new \Exception(sprintf('Unable to find time line tweets in: %s', implode(',', array_column($instructions, 'type')))); + } + + $tweets = []; + $userIds = []; + foreach ($entries as $entry) { + $entryType = ''; + + if (isset($entry->content->entryType)) { + $entryType = $entry->content->entryType; + } else { + $entryType = $entry->content->__typename; + } + + if ($entryType !== 'TimelineTimelineItem') { + continue; + } + + if (isset($timeline->data->user)) { + if (!isset($entry->content->itemContent->tweet_results->result->legacy)) { + continue; + } + $tweets[] = $entry->content->itemContent->tweet_results->result->legacy; + + $userIds[] = $entry->content->itemContent->tweet_results->result->core->user_results->result; + } else { + if (!isset($entry->content->content->tweetResult->result->legacy)) { + continue; + } + $tweets[] = $entry->content->content->tweetResult->result->legacy; + + $userIds[] = $entry->content->content->tweetResult->result->core->user_result->result; + } + } + + return (object) [ + 'userIds' => $userIds, + 'tweets' => $tweets, + ]; + } + + private function extractTweetFromSearch($searchResult) + { + return $searchResult->statuses; } public function fetchUserTweets(string $screenName): \stdClass @@ -22,7 +109,6 @@ public function fetchUserTweets(string $screenName): \stdClass $userInfo = $this->fetchUserInfoByScreenName($screenName); } catch (HttpException $e) { if ($e->getCode() === 403) { - Logger::info('The guest token has expired'); $this->data['guest_token'] = null; $this->fetchGuestToken(); $userInfo = $this->fetchUserInfoByScreenName($screenName); @@ -32,59 +118,79 @@ public function fetchUserTweets(string $screenName): \stdClass } try { - $timeline = $this->fetchTimeline($userInfo->rest_id); + $timeline = $this->fetchTimelineUsingSearch($screenName); } catch (HttpException $e) { if ($e->getCode() === 403) { - Logger::info('The guest token has expired'); $this->data['guest_token'] = null; $this->fetchGuestToken(); - $timeline = $this->fetchTimeline($userInfo->rest_id); + $timeline = $this->fetchTimelineUsingSearch($screenName); } else { throw $e; } } - $result = $timeline->data->user->result; - if ($result->__typename === 'UserUnavailable') { - throw new \Exception('UserUnavailable'); - } - $instructionTypes = ['TimelineAddEntries', 'TimelineClearCache']; - $instructions = $result->timeline_v2->timeline->instructions; - if (!isset($instructions[1])) { - throw new \Exception('The account exists but has not tweeted yet?'); - } - $instruction = $instructions[1]; - if ($instruction->type !== 'TimelineAddEntries') { - throw new \Exception(sprintf('Unexpected instruction type: %s', $instruction->type)); - } - $tweets = []; - foreach ($instruction->entries as $entry) { - if ($entry->content->entryType !== 'TimelineTimelineItem') { - continue; - } - if (!isset($entry->content->itemContent->tweet_results->result->legacy)) { - continue; - } - $tweets[] = $entry->content->itemContent->tweet_results->result->legacy; - } + $tweets = $this->extractTweetFromSearch($timeline); + return (object) [ 'user_info' => $userInfo, 'tweets' => $tweets, ]; } + public function fetchListTweets($query, $operation = '') + { + $id = ''; + $this->fetchGuestToken(); + if ($operation == 'By list') { + try { + $listInfo = $this->fetchListInfoBySlug($query['screenName'], $query['listSlug']); + $id = $listInfo->id_str; + } catch (HttpException $e) { + if ($e->getCode() === 403) { + $this->data['guest_token'] = null; + $this->fetchGuestToken(); + $listInfo = $this->fetchListInfoBySlug($query['screenName'], $query['listSlug']); + $id = $listInfo->id_str; + } else { + throw $e; + } + } + } elseif ($operation === 'By list ID') { + $id = $query['listId']; + } else { + throw new \Exception('Unknown operation to make list tweets'); + } + + try { + $timeline = $this->fetchListTimeline($id); + } catch (HttpException $e) { + if ($e->getCode() === 403) { + $this->data['guest_token'] = null; + $this->fetchGuestToken(); + $timeline = $this->fetchListTimeline($id); + } else { + throw $e; + } + } + + $data = $this->extractTweetAndUsersFromGraphQL($timeline); + + return $data; + } + private function fetchGuestToken(): void { if (isset($this->data['guest_token'])) { - Logger::info('Reusing cached guest token: ' . $this->data['guest_token']); return; } $url = 'https://api.twitter.com/1.1/guest/activate.json'; $response = getContents($url, $this->createHttpHeaders(), [CURLOPT_POST => true]); $guest_token = json_decode($response)->guest_token; $this->data['guest_token'] = $guest_token; + + $this->cache->setScope('twitter'); + $this->cache->setKey(['cache']); $this->cache->saveData($this->data); - Logger::info("Fetch new guest token: $guest_token"); } private function fetchUserInfoByScreenName(string $screenName) @@ -100,13 +206,16 @@ private function fetchUserInfoByScreenName(string $screenName) 'https://twitter.com/i/api/graphql/hc-pka9A7gyS3xODIafnrQ/UserByScreenName?variables=%s', urlencode(json_encode($variables)) ); - $response = json_decode(getContents($url, $this->createHttpHeaders())); + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); if (isset($response->errors)) { // Grab the first error message throw new \Exception(sprintf('From twitter api: "%s"', $response->errors[0]->message)); } $userInfo = $response->data->user; $this->data[$screenName] = $userInfo; + + $this->cache->setScope('twitter'); + $this->cache->setKey(['cache']); $this->cache->saveData($this->data); return $userInfo; } @@ -150,7 +259,164 @@ private function fetchTimeline($userId) urlencode(json_encode($variables)), urlencode(json_encode($features)) ); - $response = json_decode(getContents($url, $this->createHttpHeaders())); + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); + return $response; + } + + private function fetchTimelineUsingSearch($screenName) + { + $params = [ + 'q' => 'from:' . $screenName, + 'modules' => 'status', + 'result_type' => 'recent' + ]; + $response = $this->search($params); + return $response; + } + + public function search($queryParam) + { + $url = sprintf( + 'https://api.twitter.com/1.1/search/tweets.json?%s', + http_build_query($queryParam) + ); + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); + return $response; + } + + private function fetchListInfoBySlug($screenName, $listSlug) + { + if (isset($this->data[$screenName . '-' . $listSlug])) { + return $this->data[$screenName . '-' . $listSlug]; + } + + $features = [ + 'android_graphql_skip_api_media_color_palette' => false, + 'blue_business_profile_image_shape_enabled' => false, + 'creator_subscriptions_subscription_count_enabled' => false, + 'creator_subscriptions_tweet_preview_api_enabled' => true, + 'freedom_of_speech_not_reach_fetch_enabled' => false, + 'graphql_is_translatable_rweb_tweet_is_translatable_enabled' => false, + 'hidden_profile_likes_enabled' => false, + 'highlights_tweets_tab_ui_enabled' => false, + 'interactive_text_enabled' => false, + 'longform_notetweets_consumption_enabled' => true, + 'longform_notetweets_inline_media_enabled' => false, + 'longform_notetweets_richtext_consumption_enabled' => true, + 'longform_notetweets_rich_text_read_enabled' => false, + 'responsive_web_edit_tweet_api_enabled' => false, + 'responsive_web_enhance_cards_enabled' => false, + 'responsive_web_graphql_exclude_directive_enabled' => true, + 'responsive_web_graphql_skip_user_profile_image_extensions_enabled' => false, + 'responsive_web_graphql_timeline_navigation_enabled' => false, + 'responsive_web_media_download_video_enabled' => false, + 'responsive_web_text_conversations_enabled' => false, + 'responsive_web_twitter_article_tweet_consumption_enabled' => false, + 'responsive_web_twitter_blue_verified_badge_is_enabled' => true, + 'rweb_lists_timeline_redesign_enabled' => true, + 'spaces_2022_h2_clipping' => true, + 'spaces_2022_h2_spaces_communities' => true, + 'standardized_nudges_misinfo' => false, + 'subscriptions_verification_info_enabled' => true, + 'subscriptions_verification_info_reason_enabled' => true, + 'subscriptions_verification_info_verified_since_enabled' => true, + 'super_follow_badge_privacy_enabled' => false, + 'super_follow_exclusive_tweet_notifications_enabled' => false, + 'super_follow_tweet_api_enabled' => false, + 'super_follow_user_api_enabled' => false, + 'tweet_awards_web_tipping_enabled' => false, + 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled' => false, + 'tweetypie_unmention_optimization_enabled' => false, + 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled' => false, + 'verified_phone_label_enabled' => false, + 'vibe_api_enabled' => false, + 'view_counts_everywhere_api_enabled' => false + ]; + $variables = [ + 'screenName' => $screenName, + 'listSlug' => $listSlug + ]; + + $url = sprintf( + 'https://twitter.com/i/api/graphql/-kmqNvm5Y-cVrfvBy6docg/ListBySlug?variables=%s&features=%s', + urlencode(json_encode($variables)), + urlencode(json_encode($features)) + ); + + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); + if (isset($response->errors)) { + // Grab the first error message + throw new \Exception(sprintf('From twitter api: "%s"', $response->errors[0]->message)); + } + if (!isset($response->data->user_by_screen_name->list)) { + throw new \Exception( + sprintf('Unable to find list in twitter response for %s, %s', $screenName, $listSlug) + ); + } + $listInfo = $response->data->user_by_screen_name->list; + $this->data[$screenName . '-' . $listSlug] = $listInfo; + + $this->cache->setScope('twitter'); + $this->cache->setKey(['cache']); + $this->cache->saveData($this->data); + return $listInfo; + } + + private function fetchListTimeline($listId) + { + $features = [ + 'android_graphql_skip_api_media_color_palette' => false, + 'blue_business_profile_image_shape_enabled' => false, + 'creator_subscriptions_subscription_count_enabled' => false, + 'creator_subscriptions_tweet_preview_api_enabled' => true, + 'freedom_of_speech_not_reach_fetch_enabled' => false, + 'graphql_is_translatable_rweb_tweet_is_translatable_enabled' => false, + 'hidden_profile_likes_enabled' => false, + 'highlights_tweets_tab_ui_enabled' => false, + 'interactive_text_enabled' => false, + 'longform_notetweets_consumption_enabled' => true, + 'longform_notetweets_inline_media_enabled' => false, + 'longform_notetweets_richtext_consumption_enabled' => true, + 'longform_notetweets_rich_text_read_enabled' => false, + 'responsive_web_edit_tweet_api_enabled' => false, + 'responsive_web_enhance_cards_enabled' => false, + 'responsive_web_graphql_exclude_directive_enabled' => true, + 'responsive_web_graphql_skip_user_profile_image_extensions_enabled' => false, + 'responsive_web_graphql_timeline_navigation_enabled' => false, + 'responsive_web_media_download_video_enabled' => false, + 'responsive_web_text_conversations_enabled' => false, + 'responsive_web_twitter_article_tweet_consumption_enabled' => false, + 'responsive_web_twitter_blue_verified_badge_is_enabled' => true, + 'rweb_lists_timeline_redesign_enabled' => true, + 'spaces_2022_h2_clipping' => true, + 'spaces_2022_h2_spaces_communities' => true, + 'standardized_nudges_misinfo' => false, + 'subscriptions_verification_info_enabled' => true, + 'subscriptions_verification_info_reason_enabled' => true, + 'subscriptions_verification_info_verified_since_enabled' => true, + 'super_follow_badge_privacy_enabled' => false, + 'super_follow_exclusive_tweet_notifications_enabled' => false, + 'super_follow_tweet_api_enabled' => false, + 'super_follow_user_api_enabled' => false, + 'tweet_awards_web_tipping_enabled' => false, + 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled' => false, + 'tweetypie_unmention_optimization_enabled' => false, + 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled' => false, + 'verified_phone_label_enabled' => false, + 'vibe_api_enabled' => false, + 'view_counts_everywhere_api_enabled' => false + ]; + $variables = [ + 'rest_id' => $listId, + 'count' => 20 + ]; + + $url = sprintf( + 'https://twitter.com/i/api/graphql/BbGLL1ZfMibdFNWlk7a0Pw/ListTimeline?variables=%s&features=%s', + urlencode(json_encode($variables)), + urlencode(json_encode($features)) + ); + $response = Json::decode(getContents($url, $this->createHttpHeaders()), false); return $response; } diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index 27d6e1a1a05..059293228a4 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -388,7 +388,7 @@ public function collectData() libxml_use_internal_errors(false); // fix relative links - defaultLinkTo($webPageHtml, $this->feedUri); + defaultLinkTo($webPageHtml, $webPageHtml->baseURI ?? $this->feedUri); $xpath = new \DOMXPath($webPageHtml); diff --git a/lib/bootstrap.php b/lib/bootstrap.php index 86182801a45..e05dd94a251 100644 --- a/lib/bootstrap.php +++ b/lib/bootstrap.php @@ -29,12 +29,6 @@ /** Path to the cache folder */ const PATH_CACHE = __DIR__ . '/../cache/'; -/** Path to the whitelist file */ -const WHITELIST = __DIR__ . '/../whitelist.txt'; - -/** Path to the default whitelist file */ -const WHITELIST_DEFAULT = __DIR__ . '/../whitelist.default.txt'; - /** URL to the RSS-Bridge repository */ const REPOSITORY = 'https://github.com/RSS-Bridge/rss-bridge/'; diff --git a/lib/contents.php b/lib/contents.php index a1630e3ca19..5587a98e8b8 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -63,6 +63,11 @@ public function getBody() return $this->body; } + public function getCode() + { + return $this->code; + } + public function getHeaders() { return $this->headers; @@ -99,12 +104,7 @@ function getContents( array $curlOptions = [], bool $returnFull = false ) { - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); - $cache->setScope('server'); - $cache->purgeCache(86400); // 24 hours (forced) - $cache->setKey([$url]); + $httpClient = RssBridge::getHttpClient(); // Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102 $defaultHttpHeaders = [ @@ -140,24 +140,23 @@ function getContents( if (Configuration::getConfig('proxy', 'url') && !defined('NOPROXY')) { $config['proxy'] = Configuration::getConfig('proxy', 'url'); } - if (!Debug::isEnabled() && $cache->getTime()) { + + $cache = RssBridge::getCache(); + $cache->setScope('server'); + $cache->setKey([$url]); + + if (!Debug::isEnabled() && $cache->getTime() && $cache->loadData(86400 * 7)) { $config['if_not_modified_since'] = $cache->getTime(); } - $result = _http_request($url, $config); - $response = [ - 'code' => $result['code'], - 'status_lines' => $result['status_lines'], - 'header' => $result['headers'], - 'content' => $result['body'], - ]; + $response = $httpClient->request($url, $config); - switch ($result['code']) { + switch ($response['code']) { case 200: case 201: case 202: - if (isset($result['headers']['cache-control'])) { - $cachecontrol = $result['headers']['cache-control']; + if (isset($response['headers']['cache-control'])) { + $cachecontrol = $response['headers']['cache-control']; $lastValue = array_pop($cachecontrol); $directives = explode(',', $lastValue); $directives = array_map('trim', $directives); @@ -166,7 +165,7 @@ function getContents( break; } } - $cache->saveData($result['body']); + $cache->saveData($response['body']); break; case 301: case 302: @@ -175,16 +174,16 @@ function getContents( break; case 304: // Not Modified - $response['content'] = $cache->loadData(); + $response['body'] = $cache->loadData(86400 * 7); break; default: $exceptionMessage = sprintf( '%s resulted in %s %s %s', $url, - $result['code'], - Response::STATUS_CODES[$result['code']] ?? '', + $response['code'], + Response::STATUS_CODES[$response['code']] ?? '', // If debug, include a part of the response body in the exception message - Debug::isEnabled() ? mb_substr($result['body'], 0, 500) : '', + Debug::isEnabled() ? mb_substr($response['body'], 0, 500) : '', ); // The following code must be extracted if it grows too much @@ -195,134 +194,141 @@ function getContents( 'Security | Glassdoor', ]; foreach ($cloudflareTitles as $cloudflareTitle) { - if (str_contains($result['body'], $cloudflareTitle)) { - throw new CloudFlareException($exceptionMessage, $result['code']); + if (str_contains($response['body'], $cloudflareTitle)) { + throw new CloudFlareException($exceptionMessage, $response['code']); } } - - throw new HttpException($exceptionMessage, $result['code']); + throw new HttpException(trim($exceptionMessage), $response['code']); } if ($returnFull === true) { + // For legacy reasons, use content instead of body + $response['content'] = $response['body']; + unset($response['body']); return $response; } - return $response['content']; + return $response['body']; } -/** - * Fetch content from url - * - * @internal Private function used internally - * @throws HttpException - */ -function _http_request(string $url, array $config = []): array +interface HttpClient { - $defaults = [ - 'useragent' => null, - 'timeout' => 5, - 'headers' => [], - 'proxy' => null, - 'curl_options' => [], - 'if_not_modified_since' => null, - 'retries' => 3, - 'max_filesize' => null, - ]; - $config = array_merge($defaults, $config); - - $ch = curl_init($url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_MAXREDIRS, 5); - curl_setopt($ch, CURLOPT_HEADER, false); - $httpHeaders = []; - foreach ($config['headers'] as $name => $value) { - $httpHeaders[] = sprintf('%s: %s', $name, $value); - } - curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); - if ($config['useragent']) { - curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); - } - curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); - curl_setopt($ch, CURLOPT_ENCODING, ''); - curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); - - if ($config['max_filesize']) { - // This option inspects the Content-Length header - curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); - curl_setopt($ch, CURLOPT_NOPROGRESS, false); - // This progress function will monitor responses who omit the Content-Length header - curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { - if ($downloaded > $config['max_filesize']) { - // Return a non-zero value to abort the transfer - return -1; - } - return 0; - }); - } - - if ($config['proxy']) { - curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); - } - if (curl_setopt_array($ch, $config['curl_options']) === false) { - throw new \Exception('Tried to set an illegal curl option'); - } + public function request(string $url, array $config = []): array; +} - if ($config['if_not_modified_since']) { - curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); - curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); - } +final class CurlHttpClient implements HttpClient +{ + public function request(string $url, array $config = []): array + { + $defaults = [ + 'useragent' => null, + 'timeout' => 5, + 'headers' => [], + 'proxy' => null, + 'curl_options' => [], + 'if_not_modified_since' => null, + 'retries' => 3, + 'max_filesize' => null, + 'max_redirections' => 5, + ]; + $config = array_merge($defaults, $config); - $responseStatusLines = []; - $responseHeaders = []; - curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { - $len = strlen($rawHeader); - if ($rawHeader === "\r\n") { - return $len; + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, $config['max_redirections']); + curl_setopt($ch, CURLOPT_HEADER, false); + $httpHeaders = []; + foreach ($config['headers'] as $name => $value) { + $httpHeaders[] = sprintf('%s: %s', $name, $value); } - if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { - $responseStatusLines[] = $rawHeader; - return $len; + curl_setopt($ch, CURLOPT_HTTPHEADER, $httpHeaders); + if ($config['useragent']) { + curl_setopt($ch, CURLOPT_USERAGENT, $config['useragent']); } - $header = explode(':', $rawHeader); - if (count($header) === 1) { - return $len; + curl_setopt($ch, CURLOPT_TIMEOUT, $config['timeout']); + curl_setopt($ch, CURLOPT_ENCODING, ''); + curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); + + if ($config['max_filesize']) { + // This option inspects the Content-Length header + curl_setopt($ch, CURLOPT_MAXFILESIZE, $config['max_filesize']); + curl_setopt($ch, CURLOPT_NOPROGRESS, false); + // This progress function will monitor responses who omit the Content-Length header + curl_setopt($ch, CURLOPT_PROGRESSFUNCTION, function ($ch, $downloadSize, $downloaded, $uploadSize, $uploaded) use ($config) { + if ($downloaded > $config['max_filesize']) { + // Return a non-zero value to abort the transfer + return -1; + } + return 0; + }); + } + + if ($config['proxy']) { + curl_setopt($ch, CURLOPT_PROXY, $config['proxy']); } - $name = mb_strtolower(trim($header[0])); - $value = trim(implode(':', array_slice($header, 1))); - if (!isset($responseHeaders[$name])) { - $responseHeaders[$name] = []; + if (curl_setopt_array($ch, $config['curl_options']) === false) { + throw new \Exception('Tried to set an illegal curl option'); } - $responseHeaders[$name][] = $value; - return $len; - }); - $attempts = 0; - while (true) { - $attempts++; - $data = curl_exec($ch); - if ($data !== false) { - // The network call was successful, so break out of the loop - break; + if ($config['if_not_modified_since']) { + curl_setopt($ch, CURLOPT_TIMEVALUE, $config['if_not_modified_since']); + curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); } - if ($attempts > $config['retries']) { - // Finally give up - throw new HttpException(sprintf( - 'cURL error %s: %s (%s) for %s', - curl_error($ch), - curl_errno($ch), - 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', - $url - )); + + $responseStatusLines = []; + $responseHeaders = []; + curl_setopt($ch, CURLOPT_HEADERFUNCTION, function ($ch, $rawHeader) use (&$responseHeaders, &$responseStatusLines) { + $len = strlen($rawHeader); + if ($rawHeader === "\r\n") { + return $len; + } + if (preg_match('#^HTTP/(2|1.1|1.0)#', $rawHeader)) { + $responseStatusLines[] = $rawHeader; + return $len; + } + $header = explode(':', $rawHeader); + if (count($header) === 1) { + return $len; + } + $name = mb_strtolower(trim($header[0])); + $value = trim(implode(':', array_slice($header, 1))); + if (!isset($responseHeaders[$name])) { + $responseHeaders[$name] = []; + } + $responseHeaders[$name][] = $value; + return $len; + }); + + $attempts = 0; + while (true) { + $attempts++; + $data = curl_exec($ch); + if ($data !== false) { + // The network call was successful, so break out of the loop + break; + } + if ($attempts > $config['retries']) { + // Finally give up + $curl_error = curl_error($ch); + $curl_errno = curl_errno($ch); + throw new HttpException(sprintf( + 'cURL error %s: %s (%s) for %s', + $curl_error, + $curl_errno, + 'https://curl.haxx.se/libcurl/c/libcurl-errors.html', + $url + )); + } } - } - $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - return [ - 'code' => $statusCode, - 'status_lines' => $responseStatusLines, - 'headers' => $responseHeaders, - 'body' => $data, - ]; + $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + return [ + 'code' => $statusCode, + 'status_lines' => $responseStatusLines, + 'headers' => $responseHeaders, + 'body' => $data, + ]; + } } /** @@ -385,7 +391,7 @@ function getSimpleHTMLDOM( * _Notice_: Cached contents are forcefully removed after 24 hours (86400 seconds). * * @param string $url The URL. - * @param int $duration Cache duration in seconds. + * @param int $timeout Cache duration in seconds. * @param array $header (optional) A list of cURL header. * For more information follow the links below. * * https://php.net/manual/en/function.curl-setopt.php @@ -410,7 +416,7 @@ function getSimpleHTMLDOM( */ function getSimpleHTMLDOMCached( $url, - $duration = 86400, + $timeout = 86400, $header = [], $opts = [], $lowercase = true, @@ -420,40 +426,18 @@ function getSimpleHTMLDOMCached( $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT ) { - Logger::debug(sprintf('Caching url %s, duration %d', $url, $duration)); - - // Initialize cache - $cacheFactory = new CacheFactory(); - - $cache = $cacheFactory->create(); + $cache = RssBridge::getCache(); $cache->setScope('pages'); - $cache->purgeCache(86400); // 24 hours (forced) - - $params = [$url]; - $cache->setKey($params); - - // Determine if cached file is within duration - $time = $cache->getTime(); - if ( - $time !== false - && (time() - $duration < $time) - && !Debug::isEnabled() - ) { - // Contents within duration and debug mode is disabled - $content = $cache->loadData(); - } else { - // Contents not within duration, or debug mode is enabled - $content = getContents( - $url, - $header ?? [], - $opts ?? [] - ); - // todo: fix bad if statement - if ($content !== false) { - $cache->saveData($content); - } + $cache->setKey([$url]); + $content = $cache->loadData($timeout); + if (!$content || Debug::isEnabled()) { + $content = getContents($url, $header ?? [], $opts ?? []); + } + if ($content) { + $cache->setScope('pages'); + $cache->setKey([$url]); + $cache->saveData($content); } - return str_get_html( $content, $lowercase, diff --git a/lib/utils.php b/lib/utils.php index ea329f8dec4..94f928cd7ad 100644 --- a/lib/utils.php +++ b/lib/utils.php @@ -51,11 +51,13 @@ function get_current_url(): string function create_sane_exception_message(\Throwable $e): string { + $sanitizedMessage = sanitize_root($e->getMessage()); + $sanitizedFilepath = sanitize_root($e->getFile()); return sprintf( '%s: %s in %s line %s', get_class($e), - sanitize_root($e->getMessage()), - sanitize_root($e->getFile()), + $sanitizedMessage, + $sanitizedFilepath, $e->getLine() ); } diff --git a/tests/Actions/ActionImplementationTest.php b/tests/Actions/ActionImplementationTest.php index bf5dc4f9c78..e70dd7e2fab 100644 --- a/tests/Actions/ActionImplementationTest.php +++ b/tests/Actions/ActionImplementationTest.php @@ -10,6 +10,11 @@ class ActionImplementationTest extends TestCase private $class; private $obj; + public function setUp(): void + { + \Configuration::loadConfiguration(); + } + /** * @dataProvider dataActionsProvider */ diff --git a/tests/Actions/ListActionTest.php b/tests/Actions/ListActionTest.php index 4373be7616b..e0625fb38ce 100644 --- a/tests/Actions/ListActionTest.php +++ b/tests/Actions/ListActionTest.php @@ -7,6 +7,11 @@ class ListActionTest extends TestCase { + public function setUp(): void + { + \Configuration::loadConfiguration(); + } + public function testHeaders() { $action = new \ListAction(); diff --git a/tests/BridgeFactoryTest.php b/tests/BridgeFactoryTest.php new file mode 100644 index 00000000000..a97711ef25b --- /dev/null +++ b/tests/BridgeFactoryTest.php @@ -0,0 +1,30 @@ +<?php + +namespace RssBridge\Tests; + +use PHPUnit\Framework\TestCase; + +class BridgeFactoryTest extends TestCase +{ + public function setUp(): void + { + \Configuration::loadConfiguration(); + } + + public function testNormalizeBridgeName() + { + $this->assertSame('TwitterBridge', \BridgeFactory::normalizeBridgeName('TwitterBridge')); + $this->assertSame('TwitterBridge', \BridgeFactory::normalizeBridgeName('TwitterBridge.php')); + $this->assertSame('TwitterBridge', \BridgeFactory::normalizeBridgeName('Twitter')); + } + + public function testSanitizeBridgeName() + { + $sut = new \BridgeFactory(); + + $this->assertSame('TwitterBridge', $sut->createBridgeClassName('twitterbridge')); + $this->assertSame('TwitterBridge', $sut->createBridgeClassName('twitter')); + $this->assertSame('TwitterBridge', $sut->createBridgeClassName('tWitTer')); + $this->assertSame('TwitterBridge', $sut->createBridgeClassName('TWITTERBRIDGE')); + } +} diff --git a/tests/CacheTest.php b/tests/CacheTest.php index 042fc7a1302..9a8ada142a3 100644 --- a/tests/CacheTest.php +++ b/tests/CacheTest.php @@ -6,6 +6,18 @@ class CacheTest extends TestCase { + public function testConfig() + { + $sut = new \FileCache(['path' => '/tmp/']); + $this->assertSame(['path' => '/tmp/', 'enable_purge' => true], $sut->getConfig()); + + $sut = new \FileCache(['path' => '/', 'enable_purge' => false]); + $this->assertSame(['path' => '/', 'enable_purge' => false], $sut->getConfig()); + + $sut = new \FileCache(['path' => '/tmp', 'enable_purge' => true]); + $this->assertSame(['path' => '/tmp/', 'enable_purge' => true], $sut->getConfig()); + } + public function testFileCache() { $temporaryFolder = sprintf('%s/rss_bridge_%s/', sys_get_temp_dir(), create_random_string()); @@ -19,6 +31,7 @@ public function testFileCache() $sut->purgeCache(-1); $sut->setKey(['key']); + $this->assertNull($sut->getTime()); $this->assertNull($sut->loadData()); $sut->saveData('data'); diff --git a/tests/ConfigurationTest.php b/tests/ConfigurationTest.php index e913e463c1c..c2eca95cea3 100644 --- a/tests/ConfigurationTest.php +++ b/tests/ConfigurationTest.php @@ -14,6 +14,7 @@ public function testValueFromDefaultConfig() Configuration::loadConfiguration(); $this->assertSame(null, Configuration::getConfig('foobar', '')); $this->assertSame(null, Configuration::getConfig('foo', 'bar')); + $this->assertSame('baz', Configuration::getConfig('foo', 'bar', 'baz')); $this->assertSame(null, Configuration::getConfig('cache', '')); $this->assertSame('UTC', Configuration::getConfig('system', 'timezone')); } @@ -26,13 +27,18 @@ public function testValueFromCustomConfig() public function testValueFromEnv() { - putenv('RSSBRIDGE_system_timezone=Europe/Berlin'); - putenv('RSSBRIDGE_TwitterV2Bridge_twitterv2apitoken=aaa'); - putenv('RSSBRIDGE_SQLiteCache_file=bbb'); - Configuration::loadConfiguration([], getenv()); + $env = [ + 'RSSBRIDGE_system_timezone' => 'Europe/Berlin', + 'RSSBRIDGE_SYSTEM_MESSAGE' => 'hello', + 'RSSBRIDGE_system_enabled_bridges' => 'TwitterBridge,GettrBridge', + 'RSSBRIDGE_system_enable_debug_mode' => 'true', + 'RSSBRIDGE_fileCache_path' => '/tmp/kek', + ]; + Configuration::loadConfiguration([], $env); $this->assertSame('Europe/Berlin', Configuration::getConfig('system', 'timezone')); - $this->assertSame('aaa', Configuration::getConfig('TwitterV2Bridge', 'twitterv2apitoken')); - $this->assertSame('bbb', Configuration::getConfig('SQLiteCache', 'file')); - $this->assertSame('bbb', Configuration::getConfig('sqlitecache', 'file')); + $this->assertSame('hello', Configuration::getConfig('system', 'message')); + $this->assertSame(true, Configuration::getConfig('system', 'enable_debug_mode')); + $this->assertSame('/tmp/kek', Configuration::getConfig('FileCache', 'path')); + $this->assertSame(['TwitterBridge', 'GettrBridge'], Configuration::getConfig('system', 'enabled_bridges')); } } diff --git a/whitelist.default.txt b/whitelist.default.txt deleted file mode 100644 index 17df47ee548..00000000000 --- a/whitelist.default.txt +++ /dev/null @@ -1,9 +0,0 @@ -Youtube -Twitter -Telegram -Reddit -Filter -Vk -FeedMerge -Twitch -ThePirateBay