diff --git a/.github/prtester.py b/.github/prtester.py index 30a9f43b639..3d7dae99bd3 100644 --- a/.github/prtester.py +++ b/.github/prtester.py @@ -5,6 +5,7 @@ from datetime import datetime from typing import Iterable import os.path +import urllib # This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge # @@ -45,15 +46,14 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w bridgeid = bridge_card.get('id') bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata print(f'{bridgeid}{instance_suffix}') - bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html' bridge_name = bridgeid.replace('Bridge', '') context_forms = bridge_card.find_all("form") form_number = 1 for context_form in context_forms: # a bridge can have multiple contexts, named 'forms' in html - # this code will produce a fully working formstring that should create a working feed when called + # this code will produce a fully working url that should create a working feed when called # this will create an example feed for every single context, to test them all - formstring = '' + context_parameters = {} error_messages = [] context_name = '*untitled*' context_name_element = context_form.find_previous_sibling('h5') @@ -62,27 +62,27 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w parameters = context_form.find_all("input") lists = context_form.find_all("select") # this for/if mess cycles through all available input parameters, checks if it required, then pulls - # the default or examplevalue and then combines it all together into the formstring + # the default or examplevalue and then combines it all together into the url parameters # if an example or default value is missing for a required attribute, it will throw an error # any non-required fields are not tested!!! for parameter in parameters: - if parameter.get('type') == 'hidden' and parameter.get('name') == 'context': - cleanvalue = parameter.get('value').replace(" ","+") - formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue - if parameter.get('type') == 'number' or parameter.get('type') == 'text': + parameter_type = parameter.get('type') + parameter_name = parameter.get('name') + if parameter_type == 'hidden': + context_parameters[parameter_name] = parameter.get('value') + if parameter_type == 'number' or parameter_type == 'text': if parameter.has_attr('required'): if parameter.get('placeholder') == '': if parameter.get('value') == '': - name_value = parameter.get('name') - error_messages.append(f'Missing example or default value for parameter "{name_value}"') + error_messages.append(f'Missing example or default value for parameter "{parameter_name}"') else: - formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value') + context_parameters[parameter_name] = parameter.get('value') else: - formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder') - # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring - if parameter.get('type') == 'checkbox': + context_parameters[parameter_name] = parameter.get('placeholder') + # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the url parameters + if parameter_type == 'checkbox': if parameter.has_attr('checked'): - formstring = formstring + '&' + parameter.get('name') + '=on' + context_parameters[parameter_name] = 'on' for listing in lists: selectionvalue = '' listname = listing.get('name') @@ -102,15 +102,21 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w if 'selected' in selectionentry.attrs: selectionvalue = selectionentry.get('value') break - formstring = formstring + '&' + listname + '=' + selectionvalue + context_parameters[listname] = selectionvalue termpad_url = 'about:blank' if error_messages: status = '
'.join(map(lambda m: f'❌ `{m}`', error_messages)) else: - # if all example/default values are present, form the full request string, run the request, add a tag with + # if all example/default values are present, form the full request url, run the request, add a tag with # the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and # then upload it to termpad.com, a pastebin-like-site. - response = requests.get(instance.url + bridgestring + formstring) + context_parameters.update({ + 'action': 'display', + 'bridge': bridgeid, + 'format': 'Html', + }) + request_url = f'{instance.url}/?{urllib.parse.urlencode(context_parameters)}' + response = requests.get(request_url) page_text = response.text.replace('','') page_text = page_text.encode("utf_8") soup = BeautifulSoup(page_text, "html.parser") @@ -163,8 +169,8 @@ def getFirstLine(value: str) -> str: for instance_arg in args.instances: instance_arg_parts = instance_arg.split('::') instance = Instance() - instance.name = instance_arg_parts[1] if len(instance_arg_parts) >= 2 else '' - instance.url = instance_arg_parts[0] + instance.name = instance_arg_parts[1].strip() if len(instance_arg_parts) >= 2 else '' + instance.url = instance_arg_parts[0].strip().rstrip("/") instances.append(instance) else: instance = Instance() diff --git a/Dockerfile b/Dockerfile index 2f1f4f3d93a..1326dba0dbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,3 @@ -FROM lwthiker/curl-impersonate:0.5-ff-slim-buster AS curlimpersonate - FROM debian:12-slim AS rssbridge LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one." @@ -7,7 +5,8 @@ LABEL repository="https://github.com/RSS-Bridge/rss-bridge" LABEL website="https://github.com/RSS-Bridge/rss-bridge" ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update && \ +RUN set -xe && \ + apt-get update && \ apt-get install --yes --no-install-recommends \ ca-certificates \ nginx \ @@ -24,18 +23,44 @@ RUN apt-get update && \ php-xml \ php-zip \ # php-zlib is enabled by default with PHP 8.2 in Debian 12 + # for downloading libcurl-impersonate + curl \ && \ + # install curl-impersonate library + curlimpersonate_version=0.6.0 && \ + { \ + { \ + [ $(arch) = 'aarch64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \ + sha512sum="d04b1eabe71f3af06aa1ce99b39a49c5e1d33b636acedcd9fad163bc58156af5c3eb3f75aa706f335515791f7b9c7a6c40ffdfa47430796483ecef929abd905d" \ + ; } \ + || { \ + [ $(arch) = 'armv7l' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \ + sha512sum="05906b4efa1a6ed8f3b716fd83d476b6eea6bfc68e3dbc5212d65a2962dcaa7bd1f938c9096a7535252b11d1d08fb93adccc633585ff8cb8cec5e58bfe969bc9" \ + ; } \ + || { \ + [ $(arch) = 'x86_64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \ + sha512sum="480bbe9452cd9aff2c0daaaf91f1057b3a96385f79011628a9237223757a9b0d090c59cb5982dc54ea0d07191657299ea91ca170a25ced3d7d410fcdff130ace" \ + ; } \ + } && \ + curl -LO "https://github.com/lwthiker/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \ + echo "$sha512sum $archive" | sha512sum -c - && \ + mkdir -p /usr/local/lib/curl-impersonate && \ + tar xaf "$archive" -C /usr/local/lib/curl-impersonate --wildcards 'libcurl-impersonate-ff.so*' && \ + rm "$archive" && \ + apt-get purge --assume-yes curl && \ rm -rf /var/lib/apt/lists/* +ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so +ENV CURL_IMPERSONATE ff91esr + # logs should go to stdout / stderr RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \ ln -sfT /dev/stdout /var/log/nginx/access.log; \ chown -R --no-dereference www-data:adm /var/log/nginx/ -COPY --from=curlimpersonate /usr/local/lib/libcurl-impersonate-ff.so /usr/local/lib/curl-impersonate/ -ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so -ENV CURL_IMPERSONATE ff91esr - COPY ./config/nginx.conf /etc/nginx/sites-available/default COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini diff --git a/README.md b/README.md index f8d08058ddd..6124a4eac96 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ These instructions have been tested on a fresh Debian 12 VM from Digital Ocean ( ```shell timedatectl set-timezone Europe/Oslo -apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl +apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl # Create a new user account useradd --shell /bin/bash --create-home rss-bridge @@ -167,12 +167,10 @@ Restart fpm and nginx: ```shell # Lint and restart php-fpm -php-fpm8.2 -t -systemctl restart php8.2-fpm +php-fpm8.2 -t && systemctl restart php8.2-fpm # Lint and restart nginx -nginx -t -systemctl restart nginx +nginx -t && systemctl restart nginx ``` ### How to install from Composer diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index ed063825a87..93813004f22 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -32,7 +32,7 @@ public function execute(Request $request) return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']); } } - return $cachedResponse; + return $cachedResponse->withHeader('rss-bridge', 'This is a cached response'); } if (!$bridgeName) { @@ -51,7 +51,6 @@ public function execute(Request $request) return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400); } - if ( Configuration::getConfig('proxy', 'url') && Configuration::getConfig('proxy', 'by_bridge') @@ -62,8 +61,6 @@ public function execute(Request $request) } $bridge = $bridgeFactory->create($bridgeClassName); - $formatFactory = new FormatFactory(); - $format = $formatFactory->create($format); $response = $this->createResponse($request, $bridge, $format); @@ -93,7 +90,7 @@ public function execute(Request $request) return $response; } - private function createResponse(Request $request, BridgeAbstract $bridge, FormatAbstract $format) + private function createResponse(Request $request, BridgeAbstract $bridge, string $format) { $items = []; $feed = []; @@ -157,6 +154,9 @@ private function createResponse(Request $request, BridgeAbstract $bridge, Format } } + $formatFactory = new FormatFactory(); + $format = $formatFactory->create($format); + $format->setItems($items); $format->setFeed($feed); $now = time(); diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index e30c6b70c96..4c09c28c048 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -12,8 +12,20 @@ class AO3Bridge extends BridgeAbstract 'url' => [ 'name' => 'url', 'required' => true, - // Example: F/F tag, complete works only - 'exampleValue' => 'https://archiveofourown.org/works?work_search[complete]=T&tag_id=F*s*F', + // Example: F/F tag + 'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works', + ], + 'range' => [ + 'name' => 'Chapter Content', + 'title' => 'Chapter(s) to include in each work\'s feed entry', + 'defaultValue' => null, + 'type' => 'list', + 'values' => [ + 'None' => null, + 'First' => 'first', + 'Latest' => 'last', + 'Entire work' => 'all', + ], ], ], 'Bookmarks' => [ @@ -39,18 +51,13 @@ public function collectData() { switch ($this->queriedContext) { case 'Bookmarks': - $user = $this->getInput('user'); - $this->title = $user; - $url = self::URI - . '/users/' . $user - . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; - $this->collectList($url); + $this->collectList($this->getURI()); break; case 'List': - $this->collectList($this->getInput('url')); + $this->collectList($this->getURI()); break; case 'Work': - $this->collectWork($this->getInput('id')); + $this->collectWork($this->getURI()); break; } } @@ -61,9 +68,21 @@ public function collectData() */ private function collectList($url) { - $html = getSimpleHTMLDOM($url); + $httpClient = RssBridge::getHttpClient(); + $version = 'v0.0.1'; + $agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"]; + + $response = $httpClient->request($url, $agent); + $html = \str_get_html($response->getBody()); $html = defaultLinkTo($html, self::URI); + // Get list title. Will include page range + count in some cases + $heading = ($html->find('#main h2', 0)); + if ($heading->find('a.tag')) { + $heading = $heading->find('a.tag', 0); + } + $this->title = $heading->plaintext; + foreach ($html->find('.index.group > li') as $element) { $item = []; @@ -72,17 +91,65 @@ private function collectList($url) continue; // discard deleted works } $item['title'] = $title->plaintext; - $item['content'] = $element; $item['uri'] = $title->href; $strdate = $element->find('div p.datetime', 0)->plaintext; $item['timestamp'] = strtotime($strdate); + // detach from rest of page because remove() is buggy + $element = str_get_html($element->outertext()); + $tags = $element->find('ul.required-tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + $tags = $element->find('ul.tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + + $item['content'] = implode('', $element->childNodes()); + $chapters = $element->find('dl dd.chapters', 0); // bookmarked series and external works do not have a chapters count $chapters = (isset($chapters) ? $chapters->plaintext : 0); $item['uid'] = $item['uri'] . "/$strdate/$chapters"; + // Fetch workskin of desired chapter(s) in list + if ($this->getInput('range')) { + $url = $item['uri']; + switch ($this->getInput('range')) { + case ('all'): + $url .= '?view_full_work=true'; + break; + case ('first'): + break; + case ('last'): + // only way to get this is using the navigate page unfortunately + $url .= '/navigate'; + $response = $httpClient->request($url, $agent); + $html = \str_get_html($response->getBody()); + $html = defaultLinkTo($html, self::URI); + $url = $html->find('ol.index.group > li > a', -1)->href; + break; + } + $response = $httpClient->request($url, $agent); + $html = \str_get_html($response->getBody()); + $html = defaultLinkTo($html, self::URI); + // remove duplicate fic summary + if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) { + $ficsum->remove(); + } + $item['content'] .= $html->find('#workskin', 0); + } + + // Use predictability of download links to generate enclosures + $wid = explode('/', $item['uri'])[4]; + foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) { + $item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext; + } + $this->items[] = $item; } } @@ -90,26 +157,29 @@ private function collectList($url) /** * Feed for recent chapters of a specific work. */ - private function collectWork($id) + private function collectWork($url) { - $url = self::URI . "/works/$id/navigate"; $httpClient = RssBridge::getHttpClient(); - $version = 'v0.0.1'; - $response = $httpClient->request($url, [ - 'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)", - ]); + $agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"]; + $response = $httpClient->request($url . '/navigate', $agent); $html = \str_get_html($response->getBody()); $html = defaultLinkTo($html, self::URI); + $response = $httpClient->request($url . '?view_full_work=true', $agent); + $workhtml = \str_get_html($response->getBody()); + $workhtml = defaultLinkTo($workhtml, self::URI); + $this->title = $html->find('h2 a', 0)->plaintext; - foreach ($html->find('ol.index.group > li') as $element) { + $nav = $html->find('ol.index.group > li'); + for ($i = 0; $i < count($nav); $i++) { $item = []; + $element = $nav[$i]; $item['title'] = $element->find('a', 0)->plaintext; - $item['content'] = $element; + $item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0); $item['uri'] = $element->find('a', 0)->href; $strdate = $element->find('span.datetime', 0)->plaintext; @@ -138,4 +208,24 @@ public function getIcon() { return self::URI . '/favicon.ico'; } + + public function getURI() + { + $url = parent::getURI(); + switch ($this->queriedContext) { + case 'Bookmarks': + $user = $this->getInput('user'); + $url = self::URI + . '/users/' . $user + . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; + break; + case 'List': + $url = $this->getInput('url'); + break; + case 'Work': + $url = self::URI . '/works/' . $this->getInput('id'); + break; + } + return $url; + } } diff --git a/bridges/AllegroBridge.php b/bridges/AllegroBridge.php index 7cad11f1709..55e9f116040 100644 --- a/bridges/AllegroBridge.php +++ b/bridges/AllegroBridge.php @@ -13,12 +13,9 @@ class AllegroBridge extends BridgeAbstract 'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660', 'required' => true, ], - 'sessioncookie' => [ - 'name' => 'The \'wdctx\' session cookie', - 'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits', - 'pattern' => '^.{70,};?$', - // phpcs:ignore - 'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd', + 'cookie' => [ + 'name' => 'The complete cookie value', + 'title' => 'Paste the value of the cookie value from your browser if you want to prevent Allegro imposing rate limits', 'required' => false, ], 'includeSponsoredOffers' => [ @@ -70,9 +67,9 @@ public function collectData() $opts = []; - // If a session cookie is provided - if ($sessioncookie = $this->getInput('sessioncookie')) { - $opts[CURLOPT_COOKIE] = 'wdctx=' . $sessioncookie; + // If a cookie is provided + if ($cookie = $this->getInput('cookie')) { + $opts[CURLOPT_COOKIE] = $cookie; } $html = getSimpleHTMLDOM($url, [], $opts); @@ -84,11 +81,11 @@ public function collectData() $results = $html->find('article[data-analytics-view-custom-context="REGULAR"]'); - if (!$this->getInput('includeSponsoredOffers')) { + if ($this->getInput('includeSponsoredOffers')) { $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]')); } - if (!$this->getInput('includePromotedOffers')) { + if ($this->getInput('includePromotedOffers')) { $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]')); } diff --git a/bridges/AnisearchBridge.php b/bridges/AnisearchBridge.php new file mode 100644 index 00000000000..d5aad1c9352 --- /dev/null +++ b/bridges/AnisearchBridge.php @@ -0,0 +1,86 @@ + [ + 'name' => 'Dub', + 'type' => 'list', + 'values' => [ + 'DE' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=de&sort=date&order=desc&view=4', + 'EN' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=en&sort=date&order=desc&view=4', + 'JP' + => 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=ja&sort=date&order=desc&view=4' + ] + ], + 'trailers' => [ + 'name' => 'Trailers', + 'type' => 'checkbox', + 'title' => 'Will include trailes', + 'defaultValue' => false + ] + ]]; + + public function collectData() + { + $baseurl = 'https://www.anisearch.de/'; + $trailers = false; + $trailers = $this->getInput('trailers'); + $limit = 10; + if ($trailers) { + $limit = 5; + } + + $dom = getSimpleHTMLDOM($this->getInput('category')); + + foreach ($dom->find('li.btype0') as $key => $li) { + if ($key >= $limit) { + break; + } + + $a = $li->find('a', 0); + $title = $a->find('span.title', 0); + $url = $baseurl . $a->href; + + //get article + $domarticle = getSimpleHTMLDOM($url); + $content = $domarticle->find('div.details-text', 0); + + //get header-image and set absolute src + $headerimage = $domarticle->find('img#details-cover', 0); + $src = $headerimage->src; + + foreach ($content->find('.hidden') as $element) { + $element->remove(); + } + + //get trailer + $ytlink = ''; + if ($trailers) { + $trailerlink = $domarticle->find('section#trailers > div > div.swiper > ul.swiper-wrapper > li.swiper-slide > a', 0); + if (isset($trailerlink)) { + $trailersite = getSimpleHTMLDOM($baseurl . $trailerlink->href); + $trailer = $trailersite->find('div#player > iframe', 0); + $ytlink = <<'; + EOT; + } + } + + $this->items[] = [ + 'title' => $title->plaintext, + 'uri' => $url, + 'content' => $headerimage . '
' . $content . $ytlink + ]; + } + } +} diff --git a/bridges/AnnasArchiveBridge.php b/bridges/AnnasArchiveBridge.php index acb943b497b..b857fadfc80 100644 --- a/bridges/AnnasArchiveBridge.php +++ b/bridges/AnnasArchiveBridge.php @@ -126,30 +126,36 @@ public function collectData() return; } - $elements = $list->find('.w-full > .mb-4 > div > a'); + $elements = $list->find('.w-full > .mb-4 > div'); foreach ($elements as $element) { - $item = []; - $item['title'] = $element->find('h3', 0)->plaintext; - $item['author'] = $element->find('div.italic', 0)->plaintext; - $item['uri'] = $element->href; - $item['content'] = $element->plaintext; - $item['uid'] = $item['uri']; + // stop added entries once partial match list starts + if (str_contains($element->innertext, 'partial match')) { + break; + } + if ($element = $element->find('a', 0)) { + $item = []; + $item['title'] = $element->find('h3', 0)->plaintext; + $item['author'] = $element->find('div.italic', 0)->plaintext; + $item['uri'] = $element->href; + $item['content'] = $element->plaintext; + $item['uid'] = $item['uri']; - $item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20); - if ($item_html) { - $item_html = defaultLinkTo($item_html, self::URI); - $item['content'] .= $item_html->find('main img', 0); - $item['content'] .= $item_html->find('main .mt-4', 0); // Summary - if ($links = $item_html->find('main ul.mb-4', -1)) { - foreach ($links->find('li > a.js-download-link') as $file) { - $item['enclosures'][] = $file->href; + $item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20); + if ($item_html) { + $item_html = defaultLinkTo($item_html, self::URI); + $item['content'] .= $item_html->find('main img', 0); + $item['content'] .= $item_html->find('main .mt-4', 0); // Summary + foreach ($item_html->find('main ul.mb-4 > li > a.js-download-link') as $file) { + if (!str_contains($file->href, 'fast_download')) { + $item['enclosures'][] = $file->href; + } } // Remove bulk torrents from enclosures list $item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']); } - } - $this->items[] = $item; + $this->items[] = $item; + } } } diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index 2c631871caf..fcb1bd4fb18 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -39,6 +39,10 @@ protected function parseItem(array $item) $item_html = defaultLinkTo($item_html, self::URI); $item['content'] = $item_html->find('.article-content', 0); + $parsely = $item_html->find('[name="parsely-page"]', 0); + $parsely_json = json_decode(html_entity_decode($parsely->content), true); + $item['categories'] = $parsely_json['tags']; + $pages = $item_html->find('nav.page-numbers > .numbers > a', -2); if (null !== $pages) { for ($i = 2; $i <= $pages->innertext; $i++) { diff --git a/bridges/BlizzardNewsBridge.php b/bridges/BlizzardNewsBridge.php index 3930e0a4d1a..993492d404b 100644 --- a/bridges/BlizzardNewsBridge.php +++ b/bridges/BlizzardNewsBridge.php @@ -37,7 +37,7 @@ class BlizzardNewsBridge extends XPathAbstract const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article'; const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2'; - const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]'; + const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]/text()'; const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href'; const XPATH_EXPRESSION_ITEM_AUTHOR = ''; const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp'; @@ -57,4 +57,11 @@ protected function getSourceUrl() } return 'https://news.blizzard.com/' . $locale; } + + public function getIcon() + { + return <<find('h1', 0)->plaintext); - $author = $article_dom->find('span.c-assetAuthor_name', 0)->plaintext; + $author = $article_dom->find('span.c-assetAuthor_name', 0); $headline = $article_dom->find('p.c-contentHeader_description', 0); $content = $article_dom->find('div.c-pageArticle_content, div.single-article__content, div.article-main-body', 0); $date = null; @@ -97,7 +97,11 @@ public function collectData() $item = []; $item['uri'] = $article_uri; $item['title'] = $title; - $item['author'] = $author; + + if ($author) { + $item['author'] = $author->plaintext; + } + $item['content'] = $content; if (!is_null($date)) { diff --git a/bridges/CaschyBridge.php b/bridges/CaschyBridge.php index 0e3a07bc723..c25cdb08d42 100644 --- a/bridges/CaschyBridge.php +++ b/bridges/CaschyBridge.php @@ -54,7 +54,7 @@ private function addArticleToItem($item, $article) { // remove unwanted stuff foreach ( - $article->find('div.video-container, div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content, + $article->find('div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content, div.wp-embed, p.wp-caption-text, script') as $element ) { $element->remove(); diff --git a/bridges/ComicsKingdomBridge.php b/bridges/ComicsKingdomBridge.php index 8baf75118af..227426c4edb 100644 --- a/bridges/ComicsKingdomBridge.php +++ b/bridges/ComicsKingdomBridge.php @@ -2,59 +2,65 @@ class ComicsKingdomBridge extends BridgeAbstract { - const MAINTAINER = 'stjohnjohnson'; + const MAINTAINER = 'TReKiE'; + // const MAINTAINER = 'stjohnjohnson'; const NAME = 'Comics Kingdom Unofficial RSS'; - const URI = 'https://comicskingdom.com/'; + const URI = 'https://wp.comicskingdom.com/wp-json/wp/v2/ck_comic'; const CACHE_TIMEOUT = 21600; // 6h const DESCRIPTION = 'Comics Kingdom Unofficial RSS'; const PARAMETERS = [ [ 'comicname' => [ - 'name' => 'comicname', + 'name' => 'Name of comic', 'type' => 'text', 'exampleValue' => 'mutts', 'title' => 'The name of the comic in the URL after https://comicskingdom.com/', 'required' => true + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'The number of recent comics to get', + 'defaultValue' => 10 ] ]]; + protected $comicName; + public function collectData() { - $html = getSimpleHTMLDOM($this->getURI(), [], [], true, false); + $json = getContents($this->getURI()); + $data = json_decode($json, false); - // Get author from first page - $author = $html->find('div.author p', 0); - ; + if (isset($data[0]->_embedded->{'wp:term'}[0][0])) { + $this->comicName = $data[0]->_embedded->{'wp:term'}[0][0]->name; + } - // Get current date/link - $link = $html->find('meta[property=og:url]', -1)->content; - for ($i = 0; $i < 3; $i++) { + foreach ($data as $comicitem) { $item = []; - $page = getSimpleHTMLDOM($link); - - $imagelink = $page->find('meta[property=og:image]', 0)->content; - - $date = explode('/', $link); - - $item['id'] = $imagelink; - $item['uri'] = $link; - $item['author'] = $author; - $item['title'] = 'Comics Kingdom ' . $this->getInput('comicname'); - $item['timestamp'] = DateTime::createFromFormat('Y-m-d', $date[count($date) - 1])->getTimestamp(); - $item['content'] = ''; - + $item['id'] = $comicitem->id; + $item['uri'] = $comicitem->yoast_head_json->og_url; + $item['author'] = str_ireplace('By ', '', $comicitem->ck_comic_byline); + $item['title'] = $comicitem->yoast_head_json->title; + $item['timestamp'] = $comicitem->date; + $item['content'] = ''; $this->items[] = $item; - $link = $page->find('div.comic-viewer-inline a', 0)->href; - if (empty($link)) { - break; // allow bridge to continue if there's less than 3 comics - } } } public function getURI() { if (!is_null($this->getInput('comicname'))) { - return self::URI . urlencode($this->getInput('comicname')); + $params = [ + 'ck_feature' => $this->getInput('comicname'), + 'per_page' => $this->getInput('limit'), + 'date_inclusive' => 'true', + 'order' => 'desc', + 'page' => '1', + '_embed' => 'true' + ]; + + return self::URI . '?' . http_build_query($params); } return parent::getURI(); @@ -62,8 +68,8 @@ public function getURI() public function getName() { - if (!is_null($this->getInput('comicname'))) { - return $this->getInput('comicname') . ' - Comics Kingdom'; + if ($this->comicName) { + return $this->comicName . ' - Comics Kingdom'; } return parent::getName(); diff --git a/bridges/CssSelectorComplexBridge.php b/bridges/CssSelectorComplexBridge.php index 632e6b6aa3c..a2e001b27ef 100644 --- a/bridges/CssSelectorComplexBridge.php +++ b/bridges/CssSelectorComplexBridge.php @@ -442,7 +442,7 @@ protected function parseEntryElement( if (!is_null($time_selector) && $time_selector != '') { $time_element = $entry_html->find($time_selector, 0); $time = $time_element->getAttribute('datetime'); - if (is_null($time)) { + if (empty($time)) { $time = $time_element->innertext; } diff --git a/bridges/DailythanthiBridge.php b/bridges/DailythanthiBridge.php new file mode 100644 index 00000000000..114f42d824a --- /dev/null +++ b/bridges/DailythanthiBridge.php @@ -0,0 +1,96 @@ + [ + 'name' => 'topic', + 'type' => 'list', + 'values' => [ + 'news' => [ + 'tamilnadu' => 'news/state', + 'india' => 'news/india', + 'world' => 'news/world', + 'sirappu-katturaigal' => 'news/sirappukatturaigal', + ], + 'cinema' => [ + 'news' => 'cinema/cinemanews', + ], + 'sports' => [ + 'sports' => 'sports', + 'cricket' => 'sports/cricket', + 'football' => 'sports/football', + 'tennis' => 'sports/tennis', + 'hockey' => 'sports/hockey', + 'other-sports' => 'sports/othersports', + ], + 'devotional' => [ + 'devotional' => 'others/devotional', + 'aalaya-varalaru' => 'aalaya-varalaru', + ], + ], + ], + ], + ]; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ' - ' . ucfirst($topic) : ''); + } + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI . $this->getInput('topic')); + + foreach ($dom->find('div.ListingNewsWithMEDImage') as $element) { + $slug = $element->find('a', 1); + $title = $element->find('h3', 0); + if (!$slug || !$title) { + continue; + } + + $url = self::URI . $slug->href; + $date = $element->find('span', 1); + $date = $date ? $date->{'data-datestring'} : ''; + + $this->items[] = [ + 'content' => $this->constructContent($url), + 'timestamp' => $date ? $date . 'UTC' : '', + 'title' => $title->plaintext, + 'uid' => $slug->href, + 'uri' => $url, + ]; + } + } + + private function constructContent($url) + { + $dom = getSimpleHTMLDOMCached($url); + + $article = $dom->find('div.details-content-story', 0); + if (!$article) { + return 'Content Not Found'; + } + + // Remove ads + foreach ($article->find('div[id*="_ad"]') as $remove) { + $remove->outertext = ''; + } + + // Correct image tag in $article + foreach ($article->find('h-img') as $img) { + $img->parent->outertext = sprintf('

', $img->src); + } + + $image = $dom->find('div.main-image-caption-container img', 0); + $image = $image ? '

' . $image->outertext . '

' : ''; + + return $image . $article; + } +} diff --git a/bridges/DavesTrailerPageBridge.php b/bridges/DavesTrailerPageBridge.php deleted file mode 100644 index 965f7e59b0c..00000000000 --- a/bridges/DavesTrailerPageBridge.php +++ /dev/null @@ -1,40 +0,0 @@ -find('tr') as $tr) { - // If it's a date row, update the current date - if ($tr->align == 'center') { - $curr_date = $tr->plaintext; - continue; - } - - $item = []; - - // title - $item['title'] = $tr->find('td', 0)->find('b', 0)->plaintext; - - // content - $item['content'] = $tr->find('ul', 1); - - // uri - $item['uri'] = $tr->find('a', 3)->getAttribute('href'); - - // date: parsed by FeedItem using strtotime - $item['timestamp'] = $curr_date; - - $this->items[] = $item; - } - } -} diff --git a/bridges/DealabsBridge.php b/bridges/DealabsBridge.php index a5a3771b3f8..3ee1c6f5464 100644 --- a/bridges/DealabsBridge.php +++ b/bridges/DealabsBridge.php @@ -1913,11 +1913,7 @@ class DealabsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/bons-plans?merchant-id=', 'request-error' => 'Impossible de joindre Dealabs', 'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré', - 'no-results' => 'Il n'y a rien à afficher pour le moment :(', 'currency' => '€', - 'relative-date-indicator' => [ - 'il y a', - ], 'price' => 'Prix', 'shipping' => 'Livraison', 'origin' => 'Origine', @@ -1925,42 +1921,7 @@ class DealabsBridge extends PepperBridgeAbstract 'title-keyword' => 'Recherche', 'title-group' => 'Groupe', 'title-talk' => 'Surveillance Discussion', - 'local-months' => [ - 'janvier', - 'février', - 'mars', - 'avril', - 'mai', - 'juin', - 'juillet', - 'août', - 'septembre', - 'octobre', - 'novembre', - 'décembre' - ], - 'local-time-relative' => [ - 'il y a ', - 'min', - 'h', - 'jour', - 'jours', - 'mois', - 'ans', - 'et ' - ], - 'date-prefixes' => [ - 'Actualisé ', - ], - 'relative-date-alt-prefixes' => [ - 'Actualisé ', - ], - 'relative-date-ignore-suffix' => [ - ], - - 'localdeal' => [ - 'Local', - 'Pays d\'expédition' - ], + 'deal-type' => 'Type de deal', + 'localdeal' => 'Deal Local', ]; } diff --git a/bridges/DiarioDoAlentejoBridge.php b/bridges/DiarioDoAlentejoBridge.php index 9b82b49fc6d..0bd0f1d4e7b 100644 --- a/bridges/DiarioDoAlentejoBridge.php +++ b/bridges/DiarioDoAlentejoBridge.php @@ -47,7 +47,7 @@ public function collectData() }, self::PT_MONTH_NAMES), array_map(function ($num) { return sprintf('-%02d-', $num); - }, range(1, sizeof(self::PT_MONTH_NAMES))), + }, range(1, count(self::PT_MONTH_NAMES))), $element->find('span.date', 0)->innertext ); diff --git a/bridges/DribbbleBridge.php b/bridges/DribbbleBridge.php index 3957c9de828..539127b36ba 100644 --- a/bridges/DribbbleBridge.php +++ b/bridges/DribbbleBridge.php @@ -18,12 +18,12 @@ public function collectData() { $html = getSimpleHTMLDOM(self::URI); - $json = $this->loadEmbeddedJsonData($html); + $data = $this->fetchData($html); foreach ($html->find('li[id^="screenshot-"]') as $shot) { $item = []; - $additional_data = $this->findJsonForShot($shot, $json); + $additional_data = $this->findJsonForShot($shot, $data); if ($additional_data === null) { $item['uri'] = self::URI . $shot->find('a', 0)->href; $item['title'] = $shot->find('.shot-title', 0)->plaintext; @@ -46,9 +46,8 @@ public function collectData() } } - private function loadEmbeddedJsonData($html) + private function fetchData($html) { - $json = []; $scripts = $html->find('script'); foreach ($scripts as $script) { @@ -69,12 +68,17 @@ private function loadEmbeddedJsonData($html) $end = strpos($script->innertext, '];') + 1; // convert JSON to PHP array - $json = json_decode(substr($script->innertext, $start, $end - $start), true); - break; + $json = substr($script->innertext, $start, $end - $start); + + try { + // TODO: fix broken json + return Json::decode($json); + } catch (\JsonException $e) { + return []; + } } } - - return $json; + return []; } private function findJsonForShot($shot, $json) diff --git a/bridges/EBayBridge.php b/bridges/EBayBridge.php index 507930ea760..879581645cc 100644 --- a/bridges/EBayBridge.php +++ b/bridges/EBayBridge.php @@ -10,7 +10,7 @@ class EBayBridge extends BridgeAbstract 'url' => [ 'name' => 'Search URL', 'title' => 'Copy the URL from your browser\'s address bar after searching for your items and paste it here', - 'pattern' => '^(https:\/\/)?(www.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk).*$', + 'pattern' => '^(https:\/\/)?(www\.)?(befr\.|benl\.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk).*$', 'exampleValue' => 'https://www.ebay.com/sch/i.html?_nkw=atom+rss', 'required' => true, ] diff --git a/bridges/FDroidRepoBridge.php b/bridges/FDroidRepoBridge.php index 7ce41bafe5c..844f6abbb2c 100644 --- a/bridges/FDroidRepoBridge.php +++ b/bridges/FDroidRepoBridge.php @@ -14,7 +14,7 @@ class FDroidRepoBridge extends BridgeAbstract 'name' => 'Repository URL', 'title' => 'Usually ends with /repo/', 'required' => true, - 'exampleValue' => 'https://srv.tt-rss.org/fdroid/repo' + 'exampleValue' => 'https://molly.im/fdroid/foss/fdroid/repo' ] ], 'Latest Updates' => [ @@ -35,7 +35,7 @@ class FDroidRepoBridge extends BridgeAbstract 'package' => [ 'name' => 'Package Identifier', 'required' => true, - 'exampleValue' => 'org.fox.ttrss' + 'exampleValue' => 'im.molly.app' ] ] ]; @@ -45,11 +45,7 @@ class FDroidRepoBridge extends BridgeAbstract public function collectData() { - if (!extension_loaded('zip')) { - throw new \Exception('FDroidRepoBridge requires the php-zip extension'); - } - - $this->repo = $this->getRepo(); + $this->repo = $this->fetchData(); switch ($this->queriedContext) { case 'Latest Updates': $this->getAllUpdates(); @@ -58,63 +54,15 @@ public function collectData() $this->getPackage($this->getInput('package')); break; default: - returnServerError('Unimplemented Context (collectData)'); + throw new \Exception('Unimplemented Context (collectData)'); } } - public function getURI() - { - if (empty($this->queriedContext)) { - return parent::getURI(); - } - - $url = rtrim($this->GetInput('url'), '/'); - return strstr($url, '?', true) ?: $url; - } - - public function getName() - { - if (empty($this->queriedContext)) { - return parent::getName(); - } - - $name = $this->repo['repo']['name']; - switch ($this->queriedContext) { - case 'Latest Updates': - return $name; - case 'Follow Package': - return $this->getInput('package') . ' - ' . $name; - default: - returnServerError('Unimplemented Context (getName)'); - } - } - - private function getRepo() + private function fetchData() { $url = $this->getURI(); - - // Get repo information (only available as JAR) - $jar = getContents($url . '/index-v1.jar'); - $jar_loc = tempnam(sys_get_temp_dir(), ''); - file_put_contents($jar_loc, $jar); - - // JAR files are specially formatted ZIP files - $jar = new \ZipArchive(); - if ($jar->open($jar_loc) !== true) { - unlink($jar_loc); - throw new \Exception('Failed to extract archive'); - } - - // Get file pointer to the relevant JSON inside - $fp = $jar->getStream('index-v1.json'); - if (!$fp) { - returnServerError('Failed to get file pointer'); - } - - $data = json_decode(stream_get_contents($fp), true); - fclose($fp); - $jar->close(); - unlink($jar_loc); + $json = getContents($url . '/index-v1.json'); + $data = Json::decode($json); return $data; } @@ -158,9 +106,9 @@ private function getAllUpdates() $summary = $lang['summary'] ?? $app['summary'] ?? ''; $description = markdownToHtml(trim($lang['description'] ?? $app['description'] ?? 'None')); $whatsNew = markdownToHtml(trim($lang['whatsNew'] ?? 'None')); - $website = $this->link($lang['webSite'] ?? $app['webSite'] ?? $app['authorWebSite'] ?? null); - $source = $this->link($app['sourceCode'] ?? null); - $issueTracker = $this->link($app['issueTracker'] ?? null); + $website = $this->createAnchor($lang['webSite'] ?? $app['webSite'] ?? $app['authorWebSite'] ?? null); + $source = $this->createAnchor($app['sourceCode'] ?? null); + $issueTracker = $this->createAnchor($app['issueTracker'] ?? null); $license = $app['license'] ?? 'None'; $item['content'] = <<repo['packages'][$package])) { - returnClientError('Invalid Package Name'); + throw new \Exception('Invalid Package Name'); } $package = $this->repo['packages'][$package]; @@ -192,7 +140,7 @@ private function getPackage($package) $item['uri'] = $this->getURI() . '/' . $version['apkName']; $item['title'] = $version['versionName']; $item['timestamp'] = date(DateTime::ISO8601, (int) ($version['added'] / 1000)); - $item['uid'] = $version['versionCode']; + $item['uid'] = (string) $version['versionCode']; $size = round($version['size'] / 1048576, 1); // Bytes -> MB $sdk_link = 'https://developer.android.com/studio/releases/platforms'; $item['content'] = <<queriedContext)) { + return parent::getURI(); + } + + $url = rtrim($this->getInput('url'), '/'); + if (strstr($url, '?', true)) { + return strstr($url, '?', true); + } else { + return $url; + } + } + + public function getName() + { + if (empty($this->queriedContext)) { + return parent::getName(); + } + + $name = $this->repo['repo']['name']; + switch ($this->queriedContext) { + case 'Latest Updates': + return $name; + case 'Follow Package': + return $this->getInput('package') . ' - ' . $name; + default: + throw new \Exception('Unimplemented Context (getName)'); + } + } + + private function createAnchor($url) { if (empty($url)) { return null; } - return '' . $url . ''; + return sprintf('%s', $url, $url); } } diff --git a/bridges/FeedMergeBridge.php b/bridges/FeedMergeBridge.php index f2c1d9d5a2c..37b574b6720 100644 --- a/bridges/FeedMergeBridge.php +++ b/bridges/FeedMergeBridge.php @@ -64,6 +64,7 @@ public function collectData() $this->collectExpandableDatas($feed); } catch (HttpException $e) { $this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e))); + // This feed item might be spammy. Considering dropping it. $this->items[] = [ 'title' => 'RSS-Bridge: ' . $e->getMessage(), // Give current time so it sorts to the top @@ -71,7 +72,7 @@ public function collectData() ]; continue; } catch (\Exception $e) { - if (str_starts_with($e->getMessage(), 'Unable to parse xml')) { + if (str_starts_with($e->getMessage(), 'Failed to parse xml')) { // Allow this particular exception from FeedExpander $this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e))); continue; @@ -83,6 +84,8 @@ public function collectData() } } + // If $this->items is empty we should consider throw exception here + // Sort by timestamp descending usort($this->items, function ($a, $b) { $t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title']; diff --git a/bridges/FilterBridge.php b/bridges/FilterBridge.php index 1add47f49cf..a1066fb5cfb 100644 --- a/bridges/FilterBridge.php +++ b/bridges/FilterBridge.php @@ -15,6 +15,12 @@ class FilterBridge extends FeedExpander 'exampleValue' => 'https://lorem-rss.herokuapp.com/feed?unit=day', 'required' => true, ], + 'name' => [ + 'name' => 'Feed name (optional)', + 'type' => 'text', + 'exampleValue' => 'My feed', + 'required' => false, + ], 'filter' => [ 'name' => 'Filter (regular expression!!!)', 'required' => false, @@ -77,7 +83,7 @@ public function collectData() { $url = $this->getInput('url'); if (!Url::validate($url)) { - returnClientError('The url parameter must either refer to http or https protocol.'); + throw new \Exception('The url parameter must either refer to http or https protocol.'); } $this->collectExpandableDatas($this->getURI()); } @@ -158,11 +164,18 @@ protected function parseItem(array $item) public function getURI() { $url = $this->getInput('url'); - - if (empty($url)) { - $url = parent::getURI(); + if ($url) { + return $url; } + return parent::getURI(); + } - return $url; + public function getName() + { + $name = $this->getInput('name'); + if ($name) { + return $name; + } + return parent::getName(); } } diff --git a/bridges/GameBananaBridge.php b/bridges/GameBananaBridge.php index 591ac0e9647..0f04f56b8e4 100644 --- a/bridges/GameBananaBridge.php +++ b/bridges/GameBananaBridge.php @@ -28,6 +28,8 @@ public function getIcon() return 'https://images.gamebanana.com/static/img/favicon/favicon.ico'; } + private $title; + public function collectData() { $url = 'https://api.gamebanana.com/Core/List/New?itemtype=Mod&page=1&gameid=' . $this->getInput('gid'); @@ -38,7 +40,7 @@ public function collectData() $json_list = json_decode($api_response, true); // Get first page mod list $url = 'https://api.gamebanana.com/Core/Item/Data?itemtype[]=Game&fields[]=name&itemid[]=' . $this->getInput('gid'); - $fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate'; + $fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate,Updates().aLatestUpdates(),Category().name,RootCategory().name'; foreach ($json_list as $element) { // Build api request to minimize API calls $mid = $element[1]; $url .= '&itemtype[]=Mod&fields[]=' . $fields . '&itemid[]=' . $mid; @@ -50,11 +52,18 @@ public function collectData() array_shift($json_list); // Take title from API request and remove from json foreach ($json_list as $element) { + // Trashed mod IDs are still picked up and return null; skip + if ($element[0] == null) { + continue; + } + $item = []; $item['uri'] = $element[6]; $item['comments'] = $item['uri'] . '#PostsListModule'; $item['title'] = $element[0]; $item['author'] = $element[1]; + $item['categories'][] = $element[9]; + $item['categories'][] = $element[10]; $item['timestamp'] = $element[5]; if ($this->getInput('updates')) { @@ -72,6 +81,22 @@ public function collectData() foreach ($img_list as $img_element) { $item['content'] .= ''; } + + // Get updates from element[8], if applicable + if ($this->getInput('updates') && count($element[8]) > 0) { + $update = $element[8][0]; + $item['content'] .= '
Update: ' . $update['_sTitle']; + if ($update['_sText'] != '') { + $item['content'] .= '
' . $update['_sText']; + } + foreach ($update['_aChangeLog'] as $change) { + if ($change['cat'] == '') { + $change['cat'] = 'Change'; + } + $item['content'] .= '
' . $change['cat'] . ': ' . $change['text']; + } + $item['content'] .= '

'; + } $item['content'] .= '
' . $element[2]; $item['uid'] = $item['uri'] . $item['title'] . $item['timestamp']; diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 3381e096aca..b46b3ce663a 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -21,6 +21,10 @@ public function collectData() $rawContent = getContents($apiUrl); $cleanedContent = trim($rawContent, '"'); + $cleanedContent = str_replace([ + '', + '' + ], '', $cleanedContent); $cleanedContent = str_replace('\r\n', "\n", $cleanedContent); $cleanedContent = stripslashes($cleanedContent); diff --git a/bridges/GenshinImpactBridge.php b/bridges/GenshinImpactBridge.php index 24bc39d860a..924155d9d55 100644 --- a/bridges/GenshinImpactBridge.php +++ b/bridges/GenshinImpactBridge.php @@ -2,11 +2,11 @@ class GenshinImpactBridge extends BridgeAbstract { - const MAINTAINER = 'corenting'; const NAME = 'Genshin Impact'; - const URI = 'https://genshin.mihoyo.com/en/news'; - const CACHE_TIMEOUT = 7200; // 2h - const DESCRIPTION = 'News from the Genshin Impact website'; + const URI = 'https://genshin.hoyoverse.com/en/news'; + const CACHE_TIMEOUT = 18000; // 5h + const DESCRIPTION = 'Latest news from the Genshin Impact website'; + const MAINTAINER = 'Miicat_47'; const PARAMETERS = [ [ 'category' => [ @@ -25,37 +25,31 @@ class GenshinImpactBridge extends BridgeAbstract public function collectData() { - $category = $this->getInput('category'); - - $url = 'https://genshin.mihoyo.com/content/yuanshen/getContentList'; - $url = $url . '?pageSize=5&pageNum=1&channelId=' . $category; + $url = 'https://api-os-takumi-static.hoyoverse.com/content_v2_user/app/a1b1f9d3315447cc/getContentList?iAppId=32&iChanId=395&iPageSize=5&iPage=1&sLangKey=en-us'; $api_response = getContents($url); - $json_list = json_decode($api_response, true); + $json_list = Json::decode($api_response); foreach ($json_list['data']['list'] as $json_item) { - $article_url = 'https://genshin.mihoyo.com/content/yuanshen/getContent'; - $article_url = $article_url . '?contentId=' . $json_item['contentId']; - $article_res = getContents($article_url); - $article_json = json_decode($article_res, true); - $article_time = $article_json['data']['start_time']; - $timezone = 'Asia/Shanghai'; - $article_timestamp = new DateTime($article_time, new DateTimeZone($timezone)); - + $article_html = str_get_html($json_item['sContent']); + + // Check if article contains a embed YouTube video + $exp_youtube = '#https://[w\.]+youtube\.com/embed/([\w]+)#m'; + if (preg_match($exp_youtube, $article_html, $matches)) { + // Replace the YouTube embed with a YouTube link + $yt_embed = $article_html->find('div[class="ttr-video-frame"]', 0); + $yt_link = sprintf('https://youtube.com/watch?v=%1$s', $matches[1]); + $article_html = str_replace($yt_embed, $yt_link, $article_html); + } $item = []; - - $item['title'] = $article_json['data']['title']; - $item['timestamp'] = $article_timestamp->format('U'); - $item['content'] = $article_json['data']['content']; - $item['uri'] = $this->getArticleUri($json_item); - $item['id'] = $json_item['contentId']; + $item['title'] = $json_item['sTitle']; + $item['timestamp'] = $json_item['dtStartTime']; + $item['content'] = $article_html; + $item['uri'] = 'https://genshin.hoyoverse.com/en/news/detail/' . $json_item['iInfoId']; + $item['id'] = $json_item['iInfoId']; // Picture - foreach ($article_json['data']['ext'] as $ext) { - if ($ext['arrtName'] == 'banner' && count($ext['value']) == 1) { - $item['enclosures'] = [$ext['value'][0]['url']]; - break; - } - } + $json_ext = Json::decode($json_item['sExt']); + $item['enclosures'] = [$json_ext['banner'][0]['url']]; $this->items[] = $item; } @@ -63,11 +57,6 @@ public function collectData() public function getIcon() { - return 'https://genshin.mihoyo.com/favicon.ico'; - } - - private function getArticleUri($json_item) - { - return 'https://genshin.mihoyo.com/en/news/detail/' . $json_item['contentId']; + return 'https://genshin.hoyoverse.com/favicon.ico'; } } diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index 599d713a0ee..b52d3c2f469 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -106,10 +106,33 @@ private function extractContent($page) $article = $page->find('article', 0); + //built youtube iframes + foreach ($article->find('.embedcontent') as &$embedcontent) { + $ytscript = $embedcontent->find('script', 0); + if (preg_match('/www.youtube.com.*?\"/', $ytscript->innertext, $link)) { + $link = 'https://' . str_replace('\\', '', $link[0]); + $embedcontent->innertext .= <<'; + EOT; + } + } + + //built golem videos + foreach ($article->find('.gvideofig') as &$embedcontent) { + if (preg_match('/gvideo_(.*)/', $embedcontent->id, $videoid)) { + $embedcontent->innertext .= << + EOT; + } + } + // delete known bad elements foreach ( $article->find('div[id*="adtile"], #job-market, #seminars, iframe, - div.gbox_affiliate, div.toc, .embedcontent, script') as $bad + div.gbox_affiliate, div.toc') as $bad ) { $bad->remove(); } @@ -129,7 +152,7 @@ private function extractContent($page) $img->src = $img->getAttribute('data-src-full'); } - foreach ($content->find('p, h1, h2, h3, img[src*="."]') as $element) { + foreach ($content->find('p, h1, h2, h3, img[src*="."], iframe, video') as $element) { $item .= $element; } diff --git a/bridges/HarvardHealthBlogBridge.php b/bridges/HarvardHealthBlogBridge.php new file mode 100644 index 00000000000..bb6a5ede419 --- /dev/null +++ b/bridges/HarvardHealthBlogBridge.php @@ -0,0 +1,71 @@ + [ + 'name' => 'Article Image', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ], + ], + ]; + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI); + $count = 0; + + foreach ($dom->find('div[class="mb-16 md:flex"]') as $element) { + if ($count >= self::MAX_ARTICLES) { + break; + } + + $data = $element->find('a[class="hover:text-red transition-colors duration-200"]', 0); + if (!$data) { + continue; + } + + $url = $data->href; + + $this->items[] = [ + 'content' => $this->constructContent($url), + 'timestamp' => $element->find('time', 0)->datetime, + 'title' => $data->plaintext, + 'uid' => $url, + 'uri' => $url, + ]; + + $count++; + } + } + + private function constructContent($url) + { + $dom = getSimpleHTMLDOMCached($url); + + $article = $dom->find('div[class*="content-repository-content"]', 0); + if (!$article) { + return 'Content Not Found'; + } + + // remove article image + if (!$this->getInput('image')) { + $image = $article->find('p', 0); + $image->remove(); + } + + // remove ads + foreach ($article->find('.inline-ad') as $ad) { + $ad->outertext = ''; + } + + return $article->innertext; + } +} diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 504bcfb52e7..82c1f1aa4ff 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -160,9 +160,17 @@ private function addArticleToItem($item, $article) $article = defaultLinkTo($article, $item['uri']); // remove unwanted stuff - foreach ($article->find('figure.branding, a-ad, div.ho-text, a-img, .opt-in__content-container, .a-toc__list, a-collapse') as $element) { + foreach ( + $article->find('figure.branding, figure.a-inline-image, a-ad, div.ho-text, a-img, + .a-toc__list, a-collapse, .opt-in__description, .opt-in__footnote') as $element + ) { $element->remove(); } + foreach ($article->find('img') as $element) { + if (str_contains($element->alt, 'l+f')) { + $element->remove(); + } + } // reload html, as remove() is buggy $article = str_get_html($article->outertext); @@ -179,6 +187,30 @@ private function addArticleToItem($item, $article) } } + //fix for embbedded youtube-videos + $oldlink = ''; + foreach ($article->find('div.video__yt-container') as &$ytvideo) { + if (preg_match('/www.youtube.*?\"/', $ytvideo->innertext, $link) && $link[0] != $oldlink) { + //save link to prevent duplicates + $oldlink = $link[0]; + $ytiframe = << + EOT; + //check if video is in header or article for correct possitioning + if (strpos($header->innertext, $link[0])) { + $item['content'] .= $ytiframe; + } else { + $ytvideo->innertext .= $ytiframe; + $reloadneeded = 1; + } + } + } + if (isset($reloadneeded)) { + $article = str_get_html($article->outertext); + } + $categories = $article->find('.article-footer__topics ul.topics li.topics__item a-topic a'); foreach ($categories as $category) { $item['categories'][] = trim($category->plaintext); @@ -187,7 +219,7 @@ private function addArticleToItem($item, $article) $content = $article->find('.article-content', 0); if ($content) { $contentElements = $content->find( - 'p, h3, ul, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption' + 'p, h3, ul, ol, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption, noscript iframe' ); $item['content'] .= implode('', $contentElements); } diff --git a/bridges/HinduTamilBridge.php b/bridges/HinduTamilBridge.php new file mode 100644 index 00000000000..1b556ed80b6 --- /dev/null +++ b/bridges/HinduTamilBridge.php @@ -0,0 +1,103 @@ + [ + 'name' => 'topic', + 'type' => 'list', + 'defaultValue' => 'crime', + 'values' => [ + 'Astrology' => 'astrology', + 'Blogs' => 'blogs', + 'Business' => 'business', + 'Cartoon' => 'cartoon', + 'Cinema' => 'cinema', + 'Crime' => 'crime', + 'Discussion' => 'discussion', + 'Education' => 'education', + 'Environment' => 'environment', + 'India' => 'india', + 'Lifestyle' => 'life-style', + 'Literature' => 'literature', + 'Opinion' => 'opinion', + 'Reporters' => 'reporters-page', + 'Socialmedia' => 'social-media', + 'Spirituals' => 'spirituals', + 'Sports' => 'sports', + 'Supplements' => 'supplements', + 'Tamilnadu' => 'tamilnadu', + 'Technology' => 'technology', + 'Tourism' => 'tourism', + 'World' => 'world', + ], + ], + 'limit' => [ + 'name' => 'limit (max 100)', + 'type' => 'number', + 'defaultValue' => 10, + ], + ], + ]; + + const FEED_BASE_URL = 'https://feeds.feedburner.com/Hindu_Tamil_'; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ' - ' . $topic : ''); + } + + public function collectData() + { + $limit = min(100, $this->getInput('limit')); + $url = self::FEED_BASE_URL . $this->getInput('topic'); + $this->collectExpandableDatas($url, $limit); + } + + protected function parseItem($item) + { + $dom = getSimpleHTMLDOMCached($item['uri']); + + $date = $dom->find('p span.date', 1); + if ($date) { + $item['timestamp'] = $this->convertToRFC3339($date->plaintext); + } + + $content = $dom->find('#pgContentPrint', 0); + if (!$content) { + return $item; + } + + $image = $dom->find('#LoadArticle figure', 0); + $item['content'] = $image . $this->cleanContent($content); + + return $item; + } + + private function cleanContent($content) + { + foreach ($content->find('div[align="center"], script') as $remove) { + $remove->outertext = ''; + } + + return $content; + } + + private function convertToRFC3339($DateString) + { + $timestamp = strtotime(trim($DateString)); + + if ($timestamp !== false) { + $rfc3339DateTime = date('Y-m-d\TH:i:s', $timestamp) . '+05:30'; + return $rfc3339DateTime; + } else { + return null; + } + } +} diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index 44da417a1c8..6958220e645 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -3277,11 +3277,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/deals?merchant-id=', 'request-error' => 'Could not request HotUKDeals', 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', - 'no-results' => 'Ooops, looks like we could', 'currency' => '£', - 'relative-date-indicator' => [ - 'ago', - ], 'price' => 'Price', 'shipping' => 'Shipping', 'origin' => 'Origin', @@ -3289,51 +3285,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'title-keyword' => 'Search', 'title-group' => 'Group', 'title-talk' => 'Discussion Monitoring', - 'local-months' => [ - 'Jan', - 'Feb', - 'Mar', - 'Apr', - 'May', - 'Jun', - 'Jul', - 'Aug', - 'Sep', - 'Occ', - 'Nov', - 'Dec', - 'st', - 'nd', - 'rd', - 'th' - ], - 'local-time-relative' => [ - 'Posted ', - 'm', - 'h,', - 'day', - 'days', - 'month', - 'year', - 'and ' - ], - 'date-prefixes' => [ - 'Posted ', - 'Found ', - 'Refreshed ', - 'Made hot ' - ], - 'relative-date-alt-prefixes' => [ - 'Made hot ', - 'Refreshed ', - 'Last updated ' - ], - 'relative-date-ignore-suffix' => [ - '/by.*$/' - ], - 'localdeal' => [ - 'Local', - 'Expires' - ] + 'deal-type' => 'Deal Type', + 'localdeal' => 'Local deal', ]; } diff --git a/bridges/HumbleBundleBridge.php b/bridges/HumbleBundleBridge.php new file mode 100644 index 00000000000..42e025a58db --- /dev/null +++ b/bridges/HumbleBundleBridge.php @@ -0,0 +1,68 @@ + [ + 'name' => 'Bundle type', + 'type' => 'list', + 'defaultValue' => 'bundles', + 'values' => [ + 'All' => 'bundles', + 'Books' => 'books', + 'Games' => 'games', + 'Software' => 'software', + ] + ] + ]]; + + public function collectData() + { + $page = getSimpleHTMLDOMCached($this->getURI()); + $json_text = $page->find('#landingPage-json-data', 0)->innertext; + $json = json_decode(html_entity_decode($json_text), true)['data']; + + $products = []; + $types = ['books', 'games', 'software']; + $types = $this->getInput('type') === 'bundles' ? $types : [$this->getInput('type')]; + foreach ($types as $type) { + $products = array_merge($products, $json[$type]['mosaic'][0]['products']); + } + + foreach ($products as $element) { + $item = []; + $item['author'] = $element['author']; + $item['timestamp'] = $element['start_date|datetime']; + $item['title'] = $element['tile_short_name']; + $item['uid'] = $element['machine_name']; + $item['uri'] = parent::getURI() . $element['product_url']; + + $item['content'] = $element['marketing_blurb']; + $item['content'] .= '
' . $element['detailed_marketing_blurb']; + + $item['categories'] = $element['hover_highlights']; + array_unshift($item['categories'], explode(':', $element['tile_name'])[0]); + array_unshift($item['categories'], $element['tile_stamp']); + + $item['enclosures'] = [$element['tile_logo'], $element['high_res_tile_image']]; + $this->items[] = $item; + } + } + + public function getName() + { + $name = parent::getName(); + $name .= $this->getInput('type') ? ' - ' . $this->getInput('type') : ''; + return $name; + } + + public function getURI() + { + $uri = parent::getURI() . $this->getInput('type'); + return $uri; + } +} diff --git a/bridges/HytaleBridge.php b/bridges/HytaleBridge.php index 7ca11af62e1..01fc0f385c7 100644 --- a/bridges/HytaleBridge.php +++ b/bridges/HytaleBridge.php @@ -18,26 +18,27 @@ public function collectData() $blogPosts = json_decode(getContents(self::_API_URL_PUBLISHED)); $length = count($blogPosts); - for ($i = 1; $i < $length; $i += 3) { + for ($i = 0; $i < $length; $i += 3) { $slug = $blogPosts[$i]->slug; $blogPost = json_decode(getContents(self::_API_URL_BLOG_POST . $slug)); - if (property_exists($blogPost, 'previous')) { - $this->addBlogPost($blogPost->previous); + if (property_exists($blogPost, 'next')) { + $this->addBlogPost($blogPost->next); } $this->addBlogPost($blogPost); - if (property_exists($blogPost, 'next')) { - $this->addBlogPost($blogPost->next); + if (property_exists($blogPost, 'previous')) { + $this->addBlogPost($blogPost->previous); } } - if ($length % 3 == 1) { - $slug = $blogPosts[count($blogPosts) - 1]->slug; + if (($length >= 3) && ($length % 3 == 0)) { + $slug = $blogPosts[$length - 1]->slug; $blogPost = json_decode(getContents(self::_API_URL_BLOG_POST . $slug)); + $this->addBlogPost($blogPost); } } diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php index cef2b812165..f426a45c326 100644 --- a/bridges/IdealoBridge.php +++ b/bridges/IdealoBridge.php @@ -2,15 +2,15 @@ class IdealoBridge extends BridgeAbstract { - const NAME = 'Idealo.de Bridge'; + const NAME = 'idealo.de / idealo.fr / idealo.es Bridge'; const URI = 'https://www.idealo.de'; - const DESCRIPTION = 'Tracks the price for a product on idealo.de. Pricealarm if specific price is set'; + const DESCRIPTION = 'Tracks the price for a product on idealo.de / idealo.fr / idealo.es. Pricealarm if specific price is set'; const MAINTAINER = 'SebLaus'; const CACHE_TIMEOUT = 60 * 30; // 30 min const PARAMETERS = [ [ 'Link' => [ - 'name' => 'Idealo.de Link to productpage', + 'name' => 'idealo.de / idealo.fr / idealo.es Link to productpage', 'required' => true, 'exampleValue' => 'https://www.idealo.de/preisvergleich/OffersOfProduct/202007367_-s7-pro-ultra-roborock.html' ], @@ -40,6 +40,87 @@ public function getIcon() return 'https://cdn.idealo.com/storage/ids-assets/ico/favicon.ico'; } + /** + * Returns the RSS Feed title when a RSS feed is rendered + * @return string the RSS feed Title + */ + private function getFeedTitle() + { + $cacheDuration = 604800; + $link = $this->getInput('Link'); + $keyTITLE = $link . 'TITLE'; + $product = $this->loadCacheValue($keyTITLE, $cacheDuration); + + // The cache does not contain the title of the bridge, we must get it and save it in the cache + if ($product === null) { + $header = [ + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15' + ]; + $html = getSimpleHTMLDOM($link, $header); + $product = $html->find('.oopStage-title', 0)->find('span', 0)->plaintext; + $this->saveCacheValue($keyTITLE, $product); + } + + $MaxPriceUsed = $this->getInput('MaxPriceUsed'); + $MaxPriceNew = $this->getInput('MaxPriceNew'); + $titleParts = []; + + $titleParts[] = $product; + + // Add Max Prices to the title + if ($MaxPriceUsed !== null) { + $titleParts[] = 'Max Price Used : ' . $MaxPriceUsed . '€'; + } + if ($MaxPriceNew !== null) { + $titleParts[] = 'Max Price New : ' . $MaxPriceNew . '€'; + } + + $title = implode(' ', $titleParts); + + + return $title . ' - ' . $this::NAME; + } + + /** + * Returns the Price as float + * @return float rhe price converted in float + */ + private function convertPriceToFloat($price) + { + // Every price is stored / displayed as "xxx,xx €", but PHP can't convert it as float + + if ($price !== null) { + // Convert comma as dot + $price = str_replace(',', '.', $price); + // Remove the '€' char + $price = str_replace('€', '', $price); + // Convert to float + return floatval($price); + } else { + return $price; + } + } + + /** + * Returns the Price Trend emoji + * @return string the Price Trend Emoji + */ + private function getPriceTrend($NewPrice, $OldPrice) + { + $NewPrice = $this->convertPriceToFloat($NewPrice); + $OldPrice = $this->convertPriceToFloat($OldPrice); + // In case there is no old Price, then show no trend + if ($OldPrice === null || $OldPrice == 0) { + $trend = ''; + } else if ($NewPrice > $OldPrice) { + $trend = '↗'; + } else if ($NewPrice == $OldPrice) { + $trend = '➡'; + } else if ($NewPrice < $OldPrice) { + $trend = '↘'; + } + return $trend; + } public function collectData() { // Needs header with user-agent to function properly. @@ -65,16 +146,20 @@ public function collectData() $OldPriceNew = $this->loadCacheValue($KeyNEW); $OldPriceUsed = $this->loadCacheValue($KeyUSED); - // First button is new. Found at oopStage-conditionButton-wrapper-text class (.) + // First button contains the new price. Found at oopStage-conditionButton-wrapper-text class (.) $FirstButton = $html->find('.oopStage-conditionButton-wrapper-text', 0); if ($FirstButton) { $PriceNew = $FirstButton->find('strong', 0)->plaintext; + // Save current price + $this->saveCacheValue($KeyNEW, $PriceNew); } - // Second Button is used + // Second Button contains the used product price $SecondButton = $html->find('.oopStage-conditionButton-wrapper-text', 1); if ($SecondButton) { $PriceUsed = $SecondButton->find('strong', 0)->plaintext; + // Save current price + $this->saveCacheValue($KeyUSED, $PriceUsed); } // Only continue if a price has changed @@ -82,18 +167,20 @@ public function collectData() // Get Product Image $image = $html->find('.datasheet-cover-image', 0)->src; + $content = ''; + // Generate Content - if ($PriceNew > 1) { - $content = "

Price New:
$PriceNew

"; + if (isset($PriceNew) && $this->convertPriceToFloat($PriceNew) > 0) { + $content .= sprintf('

Price New:
%s %s

', $PriceNew, $this->getPriceTrend($PriceNew, $OldPriceNew)); $content .= "

Price New before:
$OldPriceNew

"; } if ($this->getInput('MaxPriceNew') != '') { - $content .= sprintf('

Max Price Used:
%s,00 €

', $this->getInput('MaxPriceNew')); + $content .= sprintf('

Max Price New:
%s,00 €

', $this->getInput('MaxPriceNew')); } - if ($PriceUsed > 1) { - $content .= "

Price Used:
$PriceUsed

"; + if (isset($PriceUsed) && $this->convertPriceToFloat($PriceUsed) > 0) { + $content .= sprintf('

Price Used:
%s %s

', $PriceUsed, $this->getPriceTrend($PriceUsed, $OldPriceUsed)); $content .= "

Price Used before:
$OldPriceUsed

"; } @@ -110,8 +197,8 @@ public function collectData() // Currently under Max new price if ($this->getInput('MaxPriceNew') != '') { - if ($PriceNew < $this->getInput('MaxPriceNew')) { - $title = sprintf($Pricealarm, 'Used', $PriceNew, $Productname, $now); + if (isset($PriceNew) && $this->convertPriceToFloat($PriceNew) < $this->getInput('MaxPriceNew')) { + $title = sprintf($Pricealarm, 'New', $PriceNew, $Productname, $now); $item = [ 'title' => $title, 'uri' => $link, @@ -124,7 +211,7 @@ public function collectData() // Currently under Max used price if ($this->getInput('MaxPriceUsed') != '') { - if ($PriceUsed < $this->getInput('MaxPriceUsed')) { + if (isset($PriceUsed) && $this->convertPriceToFloat($PriceUsed) < $this->getInput('MaxPriceUsed')) { $title = sprintf($Pricealarm, 'Used', $PriceUsed, $Productname, $now); $item = [ 'title' => $title, @@ -136,32 +223,21 @@ public function collectData() } } - // General Priceupdate + // General Priceupdate Without any Max Price for new and Used product if ($this->getInput('MaxPriceUsed') == '' && $this->getInput('MaxPriceNew') == '') { // check if a relevant pricechange happened if ( (!$this->getInput('ExcludeNew') && $PriceNew != $OldPriceNew ) || (!$this->getInput('ExcludeUsed') && $PriceUsed != $OldPriceUsed ) ) { - $title .= 'Priceupdate! '; - - if (!$this->getInput('ExcludeNew')) { - if ($PriceNew < $OldPriceNew) { - $title .= 'NEW:⬇ '; // Arrow Down Emoji - } - if ($PriceNew > $OldPriceNew) { - $title .= 'NEW:⬆ '; // Arrow Up Emoji - } - } + $title = 'Priceupdate! '; + if (!$this->getInput('ExcludeNew') && isset($PriceNew)) { + $title .= 'NEW' . $this->getPriceTrend($PriceNew, $OldPriceNew) . ' '; + } - if (!$this->getInput('ExcludeUsed')) { - if ($PriceUsed < $OldPriceUsed) { - $title .= 'USED:⬇ '; // Arrow Down Emoji - } - if ($PriceUsed > $OldPriceUsed) { - $title .= 'USED:⬆ '; // Arrow Up Emoji - } + if (!$this->getInput('ExcludeUsed') && isset($PriceUsed)) { + $title .= 'USED' . $this->getPriceTrend($PriceUsed, $OldPriceUsed) . ' '; } $title .= $Productname; $title .= ' '; @@ -177,9 +253,19 @@ public function collectData() } } } + } - // Save current price - $this->saveCacheValue($KeyNEW, $PriceNew); - $this->saveCacheValue($KeyUSED, $PriceUsed); + /** + * Returns the RSS Feed title according to the parameters + * @return string the RSS feed Tile + */ + public function getName() + { + switch ($this->queriedContext) { + case '0': + return $this->getFeedTitle(); + default: + return parent::getName(); + } } } diff --git a/bridges/ItakuBridge.php b/bridges/ItakuBridge.php index 0577752cc55..4f4145742a4 100644 --- a/bridges/ItakuBridge.php +++ b/bridges/ItakuBridge.php @@ -347,17 +347,17 @@ private function getImagesSearch(array $opt) $url = self::URI . "/api/galleries/images/?by_following=false&date_range={$opt['range']}&ordering={$opt['order']}&is_video={$opt['video_only']}"; $url .= "&text={$opt['text']}&visibility=PUBLIC&visibility=PROFILE_ONLY&page=1&page_size=30&format=json"; - if (sizeof($opt['optional_tags']) > 0) { + if (count($opt['optional_tags']) > 0) { foreach ($opt['optional_tags'] as $tag) { $url .= "&optional_tags=$tag"; } } - if (sizeof($opt['negative_tags']) > 0) { + if (count($opt['negative_tags']) > 0) { foreach ($opt['negative_tags'] as $tag) { $url .= "&negative_tags=$tag"; } } - if (sizeof($opt['required_tags']) > 0) { + if (count($opt['required_tags']) > 0) { foreach ($opt['required_tags'] as $tag) { $url .= "&required_tags=$tag"; } @@ -381,17 +381,17 @@ private function getPostsSearch(array $opt) $url = self::URI . "/api/posts/?by_following=false&date_range={$opt['range']}&ordering={$opt['order']}"; $url .= '&visibility=PUBLIC&visibility=PROFILE_ONLY&page=1&page_size=30&format=json'; - if (sizeof($opt['optional_tags']) > 0) { + if (count($opt['optional_tags']) > 0) { foreach ($opt['optional_tags'] as $tag) { $url .= "&optional_tags=$tag"; } } - if (sizeof($opt['negative_tags']) > 0) { + if (count($opt['negative_tags']) > 0) { foreach ($opt['negative_tags'] as $tag) { $url .= "&negative_tags=$tag"; } } - if (sizeof($opt['required_tags']) > 0) { + if (count($opt['required_tags']) > 0) { foreach ($opt['required_tags'] as $tag) { $url .= "&required_tags=$tag"; } @@ -446,7 +446,7 @@ private function getOwnerID($username) private function getPost($id, array $metadata = null) { - if (isset($metadata) && sizeof($metadata['gallery_images']) < $metadata['num_images']) { + if (isset($metadata) && count($metadata['gallery_images']) < $metadata['num_images']) { $metadata = null; //force re-fetch of metadata } $uri = self::URI . '/posts/' . $id; @@ -457,7 +457,7 @@ private function getPost($id, array $metadata = null) $content_str = nl2br($data['content']); $content = "

{$content_str}


"; //TODO: Add link and itaku user mention detection and convert into links. - if (array_key_exists('tags', $data) && sizeof($data['tags']) > 0) { + if (array_key_exists('tags', $data) && count($data['tags']) > 0) { $tag_types = [ 'ARTIST' => '', 'COPYRIGHT' => '', @@ -479,7 +479,7 @@ private function getPost($id, array $metadata = null) } } - if (sizeof($data['folders']) > 0) { + if (count($data['folders']) > 0) { $content .= '📁 In Folder(s): '; foreach ($data['folders'] as $folder) { $url = self::URI . '/profile/' . $data['owner_username'] . '/posts/' . $folder['id']; @@ -488,7 +488,7 @@ private function getPost($id, array $metadata = null) } $content .= '
'; - if (sizeof($data['gallery_images']) > 0) { + if (count($data['gallery_images']) > 0) { foreach ($data['gallery_images'] as $media) { $title = $media['title']; $url = self::URI . '/images/' . $media['id']; @@ -529,7 +529,7 @@ private function getCommission($id, array $metadata = null) $content_str = nl2br($data['description']); $content = "

{$content_str}


"; //TODO: Add link and itaku user mention detection and convert into links. - if (array_key_exists('tags', $data) && sizeof($data['tags']) > 0) { + if (array_key_exists('tags', $data) && count($data['tags']) > 0) { // $content .= "🏷 Tag(s): "; $tag_types = [ 'ARTIST' => '', @@ -552,7 +552,7 @@ private function getCommission($id, array $metadata = null) } } - if (array_key_exists('reference_gallery_sections', $data) && sizeof($data['reference_gallery_sections']) > 0) { + if (array_key_exists('reference_gallery_sections', $data) && count($data['reference_gallery_sections']) > 0) { $content .= '📁 Example folder(s): '; foreach ($data['folders'] as $folder) { $url = self::URI . '/profile/' . $data['owner_username'] . '/gallery/' . $folder['id']; @@ -601,7 +601,7 @@ private function getImage($id /* array $metadata = null */) //$metadata disabled $content_str = nl2br($data['description']); $content = "

{$content_str}


"; //TODO: Add link and itaku user mention detection and convert into links. - if (array_key_exists('tags', $data) && sizeof($data['tags']) > 0) { + if (array_key_exists('tags', $data) && count($data['tags']) > 0) { // $content .= "🏷 Tag(s): "; $tag_types = [ 'ARTIST' => '', @@ -624,7 +624,7 @@ private function getImage($id /* array $metadata = null */) //$metadata disabled } } - if (array_key_exists('sections', $data) && sizeof($data['sections']) > 0) { + if (array_key_exists('sections', $data) && count($data['sections']) > 0) { $content .= '📁 In Folder(s): '; foreach ($data['sections'] as $folder) { $url = self::URI . '/profile/' . $data['owner_username'] . '/gallery/' . $folder['id']; diff --git a/bridges/JohannesBlickBridge.php b/bridges/JohannesBlickBridge.php index 6c00fecaba1..80ca9a711a7 100644 --- a/bridges/JohannesBlickBridge.php +++ b/bridges/JohannesBlickBridge.php @@ -3,7 +3,7 @@ class JohannesBlickBridge extends BridgeAbstract { const NAME = 'Johannes Blick'; - const URI = 'https://www.st-johannes-baptist.de/index.php/unsere-medien/johannesblick-archiv'; + const URI = 'https://www.st-johannes-baptist.de/index.php/medien-und-downloads/archiv-johannesblick'; const DESCRIPTION = 'RSS feed for Johannes Blick'; const MAINTAINER = 'jummo4@yahoo.de'; @@ -13,7 +13,7 @@ public function collectData() or returnServerError('Could not request: ' . self::URI); $html = defaultLinkTo($html, self::URI); - foreach ($html->find('td > a') as $index => $a) { + foreach ($html->find('ul[class=easyfolderlisting] > li > a') as $index => $a) { $item = []; // Create an empty item $articlePath = $a->href; $item['title'] = $a->innertext; diff --git a/bridges/LuftfahrtBundesAmtBridge.php b/bridges/LuftfahrtBundesAmtBridge.php index 2b0384a21c2..406d2476450 100644 --- a/bridges/LuftfahrtBundesAmtBridge.php +++ b/bridges/LuftfahrtBundesAmtBridge.php @@ -26,8 +26,13 @@ protected function provideFeedIcon(\DOMXPath $xpath) protected function formatItemTimestamp($value) { $value = trim($value); - $dti = DateTimeImmutable::createFromFormat('d.m.Y', $value); - $dti = $dti->setTime(0, 0, 0); + if (strpos($value, 'Uhr') !== false) { + $value = str_replace(' Uhr', '', $value); + $dti = DateTimeImmutable::createFromFormat('d.m.Y G:i', $value); + } else { + $dti = DateTimeImmutable::createFromFormat('d.m.Y', $value); + $dti = $dti->setTime(0, 0); + } return $dti->getTimestamp(); } diff --git a/bridges/MaalaimalarBridge.php b/bridges/MaalaimalarBridge.php new file mode 100644 index 00000000000..87f85694cd0 --- /dev/null +++ b/bridges/MaalaimalarBridge.php @@ -0,0 +1,117 @@ + [ + 'name' => 'topic', + 'type' => 'list', + 'values' => [ + 'news' => [ + 'tamilnadu' => 'news/state', + 'puducherry' => 'puducherry', + 'india' => 'news/national', + 'world' => 'news/world', + ], + 'district' => [ + 'chennai' => 'chennai', + 'ariyalur' => 'ariyalur', + 'chengalpattu' => 'chengalpattu', + 'coimbatore' => 'coimbatore', + 'cuddalore' => 'cuddalore', + 'dharmapuri' => 'dharmapuri', + 'dindugal' => 'dindugal', + 'erode' => 'erode', + 'kaanchepuram' => 'kaanchepuram', + 'kallakurichi' => 'kallakurichi', + 'kanyakumari' => 'kanyakumari', + 'karur' => 'karur', + 'krishnagiri' => 'krishnagiri', + 'madurai' => 'madurai', + 'mayiladuthurai' => 'mayiladuthurai', + 'nagapattinam' => 'nagapattinam', + 'namakal' => 'namakal', + 'nilgiris' => 'nilgiris', + 'perambalur' => 'perambalur', + 'pudukottai' => 'pudukottai', + 'ramanathapuram' => 'ramanathapuram', + 'ranipettai' => 'ranipettai', + 'salem' => 'salem', + 'sivagangai' => 'sivagangai', + 'tanjore' => 'tanjore', + 'theni' => 'theni', + 'thenkasi' => 'thenkasi', + 'thiruchirapalli' => 'thiruchirapalli', + 'thirunelveli' => 'thirunelveli', + 'thirupathur' => 'thirupathur', + 'thiruvarur' => 'thiruvarur', + 'thoothukudi' => 'thoothukudi', + 'tirupur' => 'tirupur', + 'tiruvallur' => 'tiruvallur', + 'tiruvannamalai' => 'tiruvannamalai', + 'vellore' => 'vellore', + 'villupuram' => 'villupuram', + 'virudhunagar' => 'virudhunagar', + ], + 'cinema' => [ + 'news' => 'cinema/cinemanews', + 'gossip' => 'cinema/gossip', + ], + ], + ], + ], + ]; + + public function getName() + { + $topic = $this->getKey('topic'); + return self::NAME . ($topic ? ' - ' . ucfirst($topic) : ''); + } + + public function collectData() + { + $dom = getSimpleHTMLDOM(self::URI . $this->getInput('topic')); + $articles = $dom->find('div.mb-20.infinite-card-wrapper.white-section'); + + foreach ($articles as $article) { + $titleElement = $article->find('h2.title a', 0); + if (!$titleElement) { + continue; + } + + $dateElement = $article->find('time.h-date span', 0); + $date = $dateElement ? $dateElement->{'data-datestring'} . 'UTC' : ''; + + $content = $this->constructContent($article); + + $this->items[] = [ + 'content' => $content, + 'timestamp' => $date, + 'title' => $titleElement->plaintext, + 'uid' => $titleElement->href, + 'uri' => self::URI . $titleElement->href, + ]; + } + } + + private function constructContent($article) + { + $content = ''; + $imageElement = $article->find('div.ignore-autoplay img', 0); + if ($imageElement) { + $content .= '

'; + } + + $storyElement = $article->find('div.story-content', 0); + if ($storyElement) { + $content .= $storyElement->innertext; + } + + return $content; + } +} diff --git a/bridges/MagellantvBridge.php b/bridges/MagellantvBridge.php index b1f0403e105..0a225160d8f 100644 --- a/bridges/MagellantvBridge.php +++ b/bridges/MagellantvBridge.php @@ -63,7 +63,7 @@ public function collectData() // Check whether items exists $article_list = $dom->find('div.articlePreview_preview-card__mLMOm'); - if (sizeof($article_list) == 0) { + if (count($article_list) == 0) { throw new Exception(sprintf('Unable to find css selector on `%s`', $url)); } diff --git a/bridges/MangaReaderBridge.php b/bridges/MangaReaderBridge.php index 1fa0c62dc54..1b8e765b134 100644 --- a/bridges/MangaReaderBridge.php +++ b/bridges/MangaReaderBridge.php @@ -26,11 +26,26 @@ class MangaReaderBridge extends BridgeAbstract ] ]; + protected $feedName = ''; + + + public function getName() + { + if (empty($this->feedName)) { + return parent::getName(); + } else { + return $this->feedName; + } + } + public function collectData() { $url = $this->getInput('url'); $lang = $this->getInput('lang'); $dom = getSimpleHTMLDOM($url); + $aniDetail = $dom->getElementById('ani_detail'); + $this->feedName = html_entity_decode($aniDetail->find('h2', 0)->plaintext); + $chapters = $dom->getElementById($lang . '-chapters'); foreach ($chapters->getElementsByTagName('li') as $chapter) { diff --git a/bridges/MediapartBlogsBridge.php b/bridges/MediapartBlogsBridge.php index fa8c3d5f1d3..d1e1c3c9867 100644 --- a/bridges/MediapartBlogsBridge.php +++ b/bridges/MediapartBlogsBridge.php @@ -35,7 +35,12 @@ public function collectData() $item['title'] = $item_title->innertext; $item['uri'] = self::BASE_URI . trim($item_title->href); - $item['author'] = $element->find('.author .subscriber', 0)->innertext; + + $author = $element->find('.author .subscriber', 0); + if ($author) { + $item['author'] = $author->innertext; + } + $item['content'] = $item_divs[count($item_divs) - 2] . $item_divs[count($item_divs) - 1]; $item['timestamp'] = strtotime($element->find('.author time', 0)->datetime); diff --git a/bridges/MydealsBridge.php b/bridges/MydealsBridge.php index dda3d2a9076..7b23f263936 100644 --- a/bridges/MydealsBridge.php +++ b/bridges/MydealsBridge.php @@ -2024,12 +2024,7 @@ class MydealsBridge extends PepperBridgeAbstract 'uri-merchant' => 'search/gutscheine?merchant-id=', 'request-error' => 'Could not request mydeals', 'thread-error' => 'Die ID der Diskussion kann nicht ermittelt werden. Überprüfen Sie die eingegebene URL', - 'no-results' => 'Ups, wir konnten nichts', 'currency' => '€', - 'relative-date-indicator' => [ - 'vor', - 'seit' - ], 'price' => 'Preis', 'shipping' => 'Versand', 'origin' => 'Ursprung', @@ -2037,49 +2032,7 @@ class MydealsBridge extends PepperBridgeAbstract 'title-keyword' => 'Suche', 'title-group' => 'Gruppe', 'title-talk' => 'Überwachung Diskussion', - 'local-months' => [ - 'Jan', - 'Feb', - 'Mär', - 'Apr', - 'Mai', - 'Jun', - 'Jul', - 'Aug', - 'Sep', - 'Okt', - 'Nov', - 'Dez', - '.' - ], - 'local-time-relative' => [ - 'eingestellt vor ', - 'm', - 'h,', - 'day', - 'days', - 'month', - 'year', - 'and ' - ], - 'date-prefixes' => [ - 'eingestellt am ', - 'lokal ', - 'aktualisiert ', - ], - 'relative-date-alt-prefixes' => [ - 'aktualisiert vor ', - 'kommentiert vor ', - 'eingestellt vor ', - 'heiß seit ', - 'vor ' - ], - 'relative-date-ignore-suffix' => [ - '/von.*$/' - ], - 'localdeal' => [ - 'Lokal ', - 'Läuft bis ' - ] + 'deal-type' => 'Angebotsart', + 'localdeal' => 'Lokales Angebot', ]; } diff --git a/bridges/NACSouthGermanyMediaLibraryBridge.php b/bridges/NACSouthGermanyMediaLibraryBridge.php index fff6c554e0b..70129b37cef 100644 --- a/bridges/NACSouthGermanyMediaLibraryBridge.php +++ b/bridges/NACSouthGermanyMediaLibraryBridge.php @@ -31,7 +31,7 @@ class NACSouthGermanyMediaLibraryBridge extends BridgeAbstract public function getIcon() { - return 'https://www.nak-stuttgart.de/static/themes/nak_sued/images/nak-logo.png'; + return 'https://nak-sued.de/static/themes/sued/images/logo.png'; } private static function parseTimestamp($title) @@ -66,9 +66,12 @@ private static function collectDataForSWR1($parent, $item) private static function collectDataForBayern2($parent, $item) { # Find link - $playerDom = getSimpleHTMLDOMCached(self::BASE_URI . $parent->find('a', 0)->href); - $sourceURI = $playerDom->find('source', 0)->src; - $item['enclosures'] = [self::BASE_URI . $sourceURI]; + $relativeURICode = $parent->find('a', 0)->onclick; + if (preg_match('/window\.open\(\'([^\']*)\'/', $relativeURICode, $matches)) { + $playerDom = getSimpleHTMLDOMCached(self::BASE_URI . $matches[1]); + $sourceURI = $playerDom->find('source', 0)->src; + $item['enclosures'] = [self::BASE_URI . $sourceURI]; + } # Add time to timestamp $item['timestamp'] .= ' 06:45'; @@ -78,16 +81,16 @@ private static function collectDataForBayern2($parent, $item) private function collectDataInList($pageURI, $customizeItemCall) { - $page = getSimpleHTMLDOM(self::BASE_URI . $pageURI); + $page = getSimpleHTMLDOM($pageURI); - foreach ($page->find('div.grids') as $parent) { + foreach ($page->find('div.flex-columns.entry') as $parent) { # Find title - $title = $parent->find('h2', 0)->plaintext; + $title = trim($parent->find('h2')[0]->innertext); # Find content - $contentBlock = $parent->find('ul.contentlist', 0); + $contentBlock = $parent->find('div')[2]; $content = ''; - foreach ($contentBlock->find('li') as $li) { + foreach ($contentBlock->find('li,p') as $li) { $content .= '

' . $li->plaintext . '

'; } @@ -103,7 +106,7 @@ private function collectDataInList($pageURI, $customizeItemCall) private function collectDataFromAllPages($rootURI, $customizeItemCall) { $rootPage = getSimpleHTMLDOM($rootURI); - $pages = $rootPage->find('div#tabmenu', 0); + $pages = $rootPage->find('div.flex-columns.inner_filter', 0); foreach ($pages->find('a') as $page) { self::collectDataInList($page->href, [$this, $customizeItemCall]); } diff --git a/bridges/NOSBridge.php b/bridges/NOSBridge.php index 33cad40b190..60a560aac56 100644 --- a/bridges/NOSBridge.php +++ b/bridges/NOSBridge.php @@ -14,7 +14,7 @@ class NOSBridge extends BridgeAbstract 'name' => 'Onderwerp', 'title' => 'Kies onderwerp', 'values' => [ - 'Laatste nieuws' => 'nieuws', + 'Laatste nieuws' => 'nieuws/laatste', 'Binnenland' => 'nieuws/binnenland', 'Buitenland' => 'nieuws/buitenland', 'Regionaal nieuws' => 'nieuws/regio', @@ -38,17 +38,16 @@ public function collectData() { $url = sprintf('https://www.nos.nl/%s', $this->getInput('topic')); $dom = getSimpleHTMLDOM($url); - $dom = $dom->find('ul.list-items', 0); + $dom = $dom->find('main#content > div > section > ul', 0); if (!$dom) { throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); } $dom = defaultLinkTo($dom, $this->getURI()); - foreach ($dom->find('li.list-items__item') as $article) { - $a = $article->find('a', 0); + foreach ($dom->find('li') as $article) { $this->items[] = [ - 'title' => $article->find('h3.list-items__title', 0)->plaintext, - 'uri' => $article->find('a.list-items__link', 0)->href, - 'content' => $article->find('p.list-items__description', 0)->plaintext, + 'title' => $article->find('h2', 0)->plaintext, + 'uri' => $article->find('a', 0)->href, + 'content' => $article->find('p', 0)->plaintext, 'timestamp' => strtotime($article->find('time', 0)->datetime), ]; } diff --git a/bridges/NationalGeographicBridge.php b/bridges/NationalGeographicBridge.php index f7572240adc..7f8f4fa243d 100644 --- a/bridges/NationalGeographicBridge.php +++ b/bridges/NationalGeographicBridge.php @@ -168,7 +168,7 @@ private function addStory($story) } $image = $story['img']; - $item['enclosures'][] = $image['src']; + $item['enclosures'][] = str_replace(' ', '%20', $image['src']); foreach ($story['tags'] as $tag) { $item['categories'][] = $tag['name'] ?? $tag; @@ -218,7 +218,10 @@ private function handleImages($image_module, $image_type) switch ($image_type) { case 'image': case 'imagegroup': - $image = $image_module['image']; + $image = $image_module['image'] ?? null; + if (!$image) { + return ''; + } $image_src = $image['src']; if (isset($image_module['alt'])) { $image_alt = $image_module['alt']; @@ -266,7 +269,11 @@ private function getFullArticle($uri) $json = json_decode($matches[1][0], true); - $unfiltered_data = $json['page']['content']['article']['frms']; + if (isset($json['page']['content']['article']['frms'])) { + $unfiltered_data = $json['page']['content']['article']['frms']; + } else { + $unfiltered_data = $json['page']['content']['prismarticle']['frms']; + } $filtered_data = $this->filterArticleData($unfiltered_data); $article = $filtered_data['edgs'][0]; @@ -288,7 +295,7 @@ private function getFullArticle($uri) } } - $published_date = $article['pbDt']; + $published_date = $article['pbDt'] ?? $article['dt']; $article_body = $article['bdy']; $content = ''; diff --git a/bridges/NintendoBridge.php b/bridges/NintendoBridge.php index 1c2ef71a843..1c4ecf2bf61 100644 --- a/bridges/NintendoBridge.php +++ b/bridges/NintendoBridge.php @@ -4,7 +4,6 @@ class NintendoBridge extends XPathAbstract { const NAME = 'Nintendo Software Updates'; const URI = 'https://www.nintendo.co.uk/Support/Welcome-to-Nintendo-Support-11593.html'; - const DONATION_URI = ''; const DESCRIPTION = self::NAME; const MAINTAINER = 'Niehztog'; const PARAMETERS = [ diff --git a/bridges/NiusBridge.php b/bridges/NiusBridge.php index c76b29f0809..0be6e89c2fd 100644 --- a/bridges/NiusBridge.php +++ b/bridges/NiusBridge.php @@ -23,6 +23,11 @@ class NiusBridge extends XPathAbstract const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="subtitle"]/text()'; const SETTING_FIX_ENCODING = false; + public function getIcon() + { + return 'https://www.nius.de/favicon.ico'; + } + protected function formatItemTitle($value) { return strip_tags($value); diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index fcf2b1975c6..36708411410 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -66,22 +66,20 @@ public function collectData() $feed = $feedParser->parseFeed(getContents($this->getURI())); foreach ($feed['items'] as $item) { - $item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']); - $item['uri'] = str_replace('/download/', '/view/', $item['uri']); + $item['enclosures'] = [$item['uri']]; $item['uri'] = str_replace('.torrent', '', $item['uri']); + $item['uri'] = str_replace('/download/', '/view/', $item['uri']); + $item['id'] = str_replace('https://nyaa.si/view/', '', $item['uri']); $dom = getSimpleHTMLDOMCached($item['uri']); if ($dom) { $description = $dom->find('#torrent-description', 0)->innertext ?? ''; - $itemDom = str_get_html(markdownToHtml(html_entity_decode($description))); - $item_image = $this->getURI() . 'static/img/avatar/default.png'; - foreach ($itemDom->find('img') as $img) { - if (strpos($img->src, 'prez') === false) { - $item_image = $img->src; - break; - } - } - $item['enclosures'] = [$item_image]; - $item['content'] = (string) $itemDom; + $item['content'] = markdownToHtml(html_entity_decode($description)); + + $magnet = $dom->find('div.panel-footer.clearfix > a', 1)->href; + // can't put raw magnet link in enclosure, this gives information on + // magnet contents and works a way to sent magnet value + $magnet = 'https://torrent.parts/#' . html_entity_decode($magnet); + array_push($item['enclosures'], $magnet); } $this->items[] = $item; if (count($this->items) >= 10) { @@ -90,6 +88,15 @@ public function collectData() } } + public function getName() + { + $name = parent::getName(); + $name .= $this->getInput('u') ? ' - ' . $this->getInput('u') : ''; + $name .= $this->getInput('q') ? ' - ' . $this->getInput('q') : ''; + $name .= $this->getInput('c') ? ' (' . $this->getKey('c') . ')' : ''; + return $name; + } + public function getIcon() { return self::URI . 'static/favicon.png'; diff --git a/bridges/OpenCVEBridge.php b/bridges/OpenCVEBridge.php new file mode 100644 index 00000000000..594bb9ece3c --- /dev/null +++ b/bridges/OpenCVEBridge.php @@ -0,0 +1,427 @@ + [ + 'instance' => [ + 'name' => 'OpenCVE Instance', + 'required' => true, + 'defaultValue' => 'https://www.opencve.io', + 'exampleValue' => 'https://www.opencve.io' + ], + 'login' => [ + 'name' => 'Login', + 'type' => 'text', + 'required' => true + ], + 'password' => [ + 'name' => 'Password', + 'type' => 'text', + 'required' => true + ], + 'pages' => [ + 'name' => 'Number of pages', + 'type' => 'number', + 'required' => false, + 'exampleValue' => 1, + 'defaultValue' => 1 + ], + 'filter' => [ + 'name' => 'Filter', + 'type' => 'text', + 'required' => false, + 'exampleValue' => 'search:jenkins;product:gitlab,cvss:critical', + 'title' => 'Syntax: param1:value1,param2:value2;param1query2:param2query2. See https://docs.opencve.io/api/cve/ for parameters' + ], + 'upd_timestamp' => [ + 'name' => 'Use updated_at instead of created_at as timestamp', + 'type' => 'checkbox' + ], + 'trunc_summary' => [ + 'name' => 'Truncate summary for header', + 'type' => 'number', + 'defaultValue' => 100 + ], + 'fetch_contents' => [ + 'name' => 'Fetch detailed contents for CVEs', + 'defaultValue' => 'checked', + 'type' => 'checkbox' + ] + ] + ]; + + const CSS = ' + '; + + public function collectData() + { + $creds = $this->getInput('login') . ':' . $this->getInput('password'); + $authHeader = 'Authorization: Basic ' . base64_encode($creds); + $instance = $this->getInput('instance'); + + $queries = []; + $filter = $this->getInput('filter'); + $filterValues = []; + if ($filter && mb_strlen($filter) > 0) { + $filterValues = explode(';', $filter); + } else { + $queries[''] = []; + } + foreach ($filterValues as $filterValue) { + $params = explode(',', $filterValue); + $queryName = $filterValue; + $query = []; + foreach ($params as $param) { + [$key, $value] = explode(':', $param); + if ($key == 'title') { + $queryName = $value; + } else { + $query[$key] = $value; + } + } + $queries[$queryName] = $query; + } + + $fetchedIds = []; + + foreach ($queries as $queryName => $query) { + for ($i = 1; $i <= $this->getInput('pages'); $i++) { + $queryPaginated = array_merge($query, ['page' => $i]); + $url = $instance . '/api/cve?' . http_build_query($queryPaginated); + $response = getContents( + $url, + [$authHeader] + ); + $titlePrefix = ''; + if (count($queries) > 1) { + $titlePrefix = '[' . $queryName . '] '; + } + + foreach (json_decode($response) as $cveItem) { + if (array_key_exists($cveItem->id, $fetchedIds)) { + continue; + } + $fetchedIds[$cveItem->id] = true; + $item = [ + 'uri' => $instance . '/cve/' . $cveItem->id, + 'uid' => $cveItem->id, + ]; + if ($this->getInput('upd_timestamp') == 1) { + $item['timestamp'] = strtotime($cveItem->updated_at); + } else { + $item['timestamp'] = strtotime($cveItem->created_at); + } + if ($this->getInput('fetch_contents')) { + [$content, $title] = $this->fetchContents( + $cveItem, + $titlePrefix, + $instance, + $authHeader + ); + $item['content'] = $content; + $item['title'] = $title; + } else { + $item['content'] = $cveItem->summary . $this->getLinks($cveItem->id); + $item['title'] = $this->getTitle($titlePrefix, $cveItem); + } + $this->items[] = $item; + } + } + } + usort($this->items, function ($a, $b) { + return $b['timestamp'] - $a['timestamp']; + }); + } + + private function getTitle($titlePrefix, $cveItem) + { + $summary = $cveItem->summary; + $limit = $this->getInput('limit'); + if ($limit && mb_strlen($summary) > 100) { + $summary = mb_substr($summary, 0, $limit) + '...'; + } + return $titlePrefix . $cveItem->id . '. ' . $summary; + } + + private function fetchContents($cveItem, $titlePrefix, $instance, $authHeader) + { + $url = $instance . '/api/cve/' . $cveItem->id; + $response = getContents( + $url, + [$authHeader] + ); + $datum = json_decode($response); + + $title = $this->getTitleFromDatum($datum, $titlePrefix); + + $result = self::CSS; + $result .= '

' . $cveItem->id . '

'; + $result .= $this->getCVSSLabels($datum); + $result .= '

' . $datum->summary . '

'; + $result .= <<Information: +

+

    +
  • Publication date: {$datum->raw_nvd_data->published} +
  • Last modified: {$datum->raw_nvd_data->lastModified} +
  • Last modified: {$datum->raw_nvd_data->lastModified} +
+

+ EOD; + + $result .= $this->getV3Table($datum); + $result .= $this->getV2Table($datum); + + $result .= $this->getLinks($datum->id); + $result .= $this->getReferences($datum); + + $result .= $this->getVendors($datum); + + return [$result, $title]; + } + + private function getTitleFromDatum($datum, $titlePrefix) + { + $title = $titlePrefix; + if ($datum->cvss->v3) { + $title .= "[v3: {$datum->cvss->v3}] "; + } + if ($datum->cvss->v2) { + $title .= "[v2: {$datum->cvss->v2}] "; + } + $title .= $datum->id . '. '; + $titlePostfix = $datum->summary; + $limit = $this->getInput('limit'); + if ($limit && mb_strlen($titlePostfix) > 100) { + $titlePostfix = mb_substr($titlePostfix, 0, $limit) + '...'; + } + $title .= $titlePostfix; + return $title; + } + + private function getCVSSLabels($datum) + { + $CVSSv2Text = 'n/a'; + $CVSSv2Class = 'cvss-na-color'; + if ($datum->cvss->v2) { + $importance = ''; + if ($datum->cvss->v2 >= 7) { + $importance = 'HIGH'; + $CVSSv2Class = 'cvss-high-color'; + } else if ($datum->cvss->v2 >= 4) { + $importance = 'MEDIUM'; + $CVSSv2Class = 'cvss-medium-color'; + } else { + $importance = 'LOW'; + $CVSSv2Class = 'cvss-low-color'; + } + $CVSSv2Text = sprintf('[%s] %.1f', $importance, $datum->cvss->v2); + } + $CVSSv2Item = "
CVSS v2:
{$CVSSv2Text}
"; + + $CVSSv3Text = 'n/a'; + $CVSSv3Class = 'cvss-na-color'; + if ($datum->cvss->v3) { + $importance = ''; + if ($datum->cvss->v3 >= 9) { + $importance = 'CRITICAL'; + $CVSSv3Class = 'cvss-crit-color'; + } else if ($datum->cvss->v3 >= 7) { + $importance = 'HIGH'; + $CVSSv3Class = 'cvss-high-color'; + } else if ($datum->cvss->v3 >= 4) { + $importance = 'MEDIUM'; + $CVSSv3Class = 'cvss-medium-color'; + } else { + $importance = 'LOW'; + $CVSSv3Class = 'cvss-low-color'; + } + $CVSSv3Text = sprintf('[%s] %.1f', $importance, $datum->cvss->v3); + } + $CVSSv3Item = "
CVSS v3:
{$CVSSv3Text}
"; + return '
' . $CVSSv3Item . $CVSSv2Item . '
'; + } + + private function getReferences($datum) + { + if (count($datum->raw_nvd_data->references) == 0) { + return ''; + } + $res = '

References:

    '; + foreach ($datum->raw_nvd_data->references as $ref) { + $item = '
  • '; + if (isset($ref->tags) && count($ref->tags) > 0) { + $item .= '[' . implode(', ', $ref->tags) . '] '; + } + $item .= "url}\">{$ref->url}"; + $item .= '
  • '; + $res .= $item; + } + $res .= '

'; + return $res; + } + + private function getLinks($id) + { + return <<Links +

+

+

+ EOD; + } + + private function getV3Table($datum) + { + $metrics = $datum->raw_nvd_data->metrics; + if (!isset($metrics->cvssMetricV31) || count($metrics->cvssMetricV31) == 0) { + return ''; + } + $v3 = $metrics->cvssMetricV31[0]; + $data = $v3->cvssData; + return << +

CVSS v3 details

+ + + + + + + + + + + + + + + + + + + + + +
Impact score{$v3->impactScore}Exploitability score{$v3->exploitabilityScore}
Attack vector{$data->attackVector}Confidentiality Impact{$data->confidentialityImpact}
Attack complexity{$data->attackComplexity}Integrity Impact{$data->integrityImpact}
Privileges Required{$data->privilegesRequired}Availability Impact{$data->availabilityImpact}
User Interaction{$data->userInteraction}Scope{$data->scope}
+ + EOD; + } + + private function getV2Table($datum) + { + $metrics = $datum->raw_nvd_data->metrics; + if (!isset($metrics->cvssMetricV2) || count($metrics->cvssMetricV2) == 0) { + return ''; + } + $v2 = $metrics->cvssMetricV2[0]; + $data = $v2->cvssData; + return << +

CVSS v2 details

+ + + + + + + + + + + + + + + + + + +
Impact score{$v2->impactScore}Exploitability score{$v2->exploitabilityScore}
Access Vector{$data->accessVector}Confidentiality Impact{$data->confidentialityImpact}
Access Complexity{$data->accessComplexity}Integrity Impact{$data->integrityImpact}
Authentication{$data->authentication}Availability Impact{$data->availabilityImpact}
+ + EOD; + } + + private function getVendors($datum) + { + if (count((array)$datum->vendors) == 0) { + return ''; + } + $res = '

Affected products

    '; + foreach ($datum->vendors as $vendor => $products) { + $res .= "
  • {$vendor}"; + if (count($products) > 0) { + $res .= '
      '; + foreach ($products as $product) { + $res .= '
    • ' . $product . '
    • '; + } + $res .= '
    '; + } + $res .= '
  • '; + } + $res .= '

'; + } +} diff --git a/bridges/PatreonBridge.php b/bridges/PatreonBridge.php index a21624253c1..895a9306fa2 100644 --- a/bridges/PatreonBridge.php +++ b/bridges/PatreonBridge.php @@ -228,7 +228,7 @@ public function collectData() //post attachments if ( isset($post->relationships->attachments->data) && - sizeof($post->relationships->attachments->data) > 0 + count($post->relationships->attachments->data) > 0 ) { $item['enclosures'] = []; $item['content'] .= '

Attachments:

    '; diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 2516fc1ee33..6e41cf20745 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -44,36 +44,7 @@ protected function collectDataKeywords() protected function collectDeals($url) { $html = getSimpleHTMLDOM($url); - $list = $html->find('article[id]'); - - // Deal Image Link CSS Selector - $selectorImageLink = implode( - ' ', /* Notice this is a space! */ - [ - 'cept-thread-image-link', - 'imgFrame', - 'imgFrame--noBorder', - 'thread-listImgCell', - ] - ); - - // Deal Link CSS Selector - $selectorLink = implode( - ' ', /* Notice this is a space! */ - [ - 'cept-tt', - 'thread-link', - 'linkPlain', - ] - ); - - // Deal Hotness CSS Selector - $selectorHot = implode( - ' ', /* Notice this is a space! */ - [ - 'vote-box' - ] - ); + $list = $html->find('article[id][class*=thread--deal]]'); // Deal Description CSS Selector $selectorDescription = implode( @@ -83,65 +54,39 @@ protected function collectDeals($url) ] ); - // Deal Date CSS Selector - $selectorDate = implode( - ' ', /* Notice this is a space! */ - [ - 'size--all-s', - 'flex', - 'boxAlign-jc--all-fe' - ] - ); - // If there is no results, we don't parse the content because it display some random deals - $noresult = $html->find('h3[class*=text--b]', 0); - if ($noresult != null && strpos($noresult->plaintext, $this->i8n('no-results')) !== false) { + $noresult = $html->find('div[id=content-list]', 0)->find('h2', 0); + if ($noresult !== null) { $this->items = []; } else { foreach ($list as $deal) { - $item = []; - $item['uri'] = $this->getDealURI($deal); - $item['title'] = $this->getTitle($deal); - $item['author'] = $deal->find('span.thread-username', 0)->plaintext; - // Get the JSON Data stored as vue $jsonDealData = $this->getDealJsonData($deal); + $dealMeta = Json::decode($deal->find('div[class=threadGrid-headerMeta]', 0)->find('div[class=js-vue2]', 1)->getAttribute('data-vue2')); + + $item = []; + $item['uri'] = $this->getDealURI($jsonDealData); + $item['title'] = $this->getTitle($jsonDealData); + $item['author'] = $this->getDealAuthor($jsonDealData); $item['content'] = '
    ' . $this->getImage($deal) - . '"/>' - . $this->getHTMLTitle($item) + . '' + . $this->getHTMLTitle($jsonDealData) . $this->getPrice($jsonDealData) . $this->getDiscount($jsonDealData) - . $this->getShipsFrom($deal) - . $this->getShippingCost($deal) + . $this->getShipsFrom($dealMeta) + . $this->getShippingCost($jsonDealData) . $this->getSource($jsonDealData) + . $this->getDealLocation($dealMeta) . $deal->find('div[class*=' . $selectorDescription . ']', 0)->innertext . '' . $this->getTemperature($jsonDealData) . '
    '; - // Check if a clock icon is displayed on the deal - $clocks = $deal->find('svg[class*=icon--clock]'); - if ($clocks !== null && count($clocks) > 0) { - // Get the last clock, corresponding to the deal posting date - $clock = end($clocks); - - // Find the text corresponding to the clock - $spanDateDiv = $clock->next_sibling(); - $itemDate = $spanDateDiv->plaintext; - // In some case of a Local deal, there is no date, but we can use - // this case for other reason (like date not in the last field) - if ($this->contains($itemDate, $this->i8n('localdeal'))) { - $item['timestamp'] = time(); - } elseif ($this->contains($itemDate, $this->i8n('relative-date-indicator'))) { - $item['timestamp'] = $this->relativeDateToTimestamp($itemDate); - } else { - $item['timestamp'] = $this->parseDate($itemDate); - } - } + $item['timestamp'] = $this->getPublishedDate($jsonDealData); $this->items[] = $item; } } @@ -229,13 +174,16 @@ protected function collectDataTalk() $item['uid'] = $comment->commentId; // Timestamp handling needs a new parsing function if ($onlyWithUrl == true) { - // Count Links and Quote Links - $content = str_get_html($item['content']); - $countLinks = count($content->find('a[href]')); - $countQuoteLinks = count($content->find('a[href][class=userHtml-quote-source]')); - // Only add element if there are Links ans more links tant Quote links - if ($countLinks > 0 && $countLinks > $countQuoteLinks) { - $this->items[] = $item; + // Only parse the comment if it is not empry + if ($item['content'] != '') { + // Count Links and Quote Links + $content = str_get_html($item['content']); + $countLinks = count($content->find('a[href]')); + $countQuoteLinks = count($content->find('a[href][class=userHtml-quote-source]')); + // Only add element if there are Links and more links tant Quote links + if ($countLinks > 0 && $countLinks > $countQuoteLinks) { + $this->items[] = $item; + } } } else { $this->items[] = $item; @@ -284,22 +232,31 @@ private function getPrice($jsonDealData) } } + /** + * Get the Publish Date from a Deal if it exists + * @return integer Timestamp of the published date of the deal + */ + private function getPublishedDate($jsonDealData) + { + return $jsonDealData['props']['thread']['publishedAt']; + } + + /** + * Get the Deal Author from a Deal if it exists + * @return String Author of the deal + */ + private function getDealAuthor($jsonDealData) + { + return $jsonDealData['props']['thread']['user']['username']; + } + /** * Get the Title from a Deal if it exists * @return string String of the deal title */ - private function getTitle($deal) + private function getTitle($jsonDealData) { - $titleRoot = $deal->find('div[class*=threadGrid-title]', 0); - $titleA = $titleRoot->find('a[class*=thread-link]', 0); - $titleFirstChild = $titleRoot->first_child(); - if ($titleA !== null) { - $title = $titleA->plaintext; - } else { - // In some case, expired deals have a different format - $title = $titleRoot->find('span', 0)->plaintext; - } - + $title = $jsonDealData['props']['thread']['title']; return $title; } @@ -310,7 +267,7 @@ private function getTitle($deal) private function getTalkTitle() { $html = getSimpleHTMLDOMCached($this->getInput('url')); - $title = $html->find('.thread-title', 0)->plaintext; + $title = $html->find('title', 0)->plaintext; return $title; } @@ -318,14 +275,10 @@ private function getTalkTitle() * Get the HTML Title code from an item * @return string String of the deal title */ - private function getHTMLTitle($item) + private function getHTMLTitle($jsonDealData) { - if ($item['uri'] == '') { - $html = '

    ' . $item['title'] . '

    '; - } else { - $html = '

    ' - . $item['title'] . '

    '; - } + $html = '

    ' + . $this->getTitle($jsonDealData) . '

    '; return $html; } @@ -334,10 +287,11 @@ private function getHTMLTitle($item) * Get the URI from a Deal if it exists * @return string String of the deal URI */ - private function getDealURI($deal) + private function getDealURI($jsonDealData) { - $dealId = $deal->attr['id']; - $uri = $this->i8n('bridge-uri') . $this->i8n('uri-deal') . str_replace('_', '-', $dealId); + $dealSlug = $jsonDealData['props']['thread']['titleSlug']; + $dealId = $jsonDealData['props']['thread']['threadId']; + $uri = $this->i8n('bridge-uri') . $this->i8n('uri-deal') . $dealSlug . '-' . $dealId; return $uri; } @@ -345,18 +299,14 @@ private function getDealURI($deal) * Get the Shipping costs from a Deal if it exists * @return string String of the deal shipping Cost */ - private function getShippingCost($deal) + private function getShippingCost($jsonDealData) { - if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0) != null) { - if ($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1) != null) { - return '
    ' . $this->i8n('shipping') . ' : ' - . strip_tags($deal->find('span[class*=space--ml-2 size--all-s overflow--wrap-off]', 0)->children(1)->innertext) - . '
    '; - } else { + $isFree = $jsonDealData['props']['thread']['shipping']['isFree']; + $price = $jsonDealData['props']['thread']['shipping']['price']; + if ($isFree !== null) { return '
    ' . $this->i8n('shipping') . ' : ' - . strip_tags($deal->find('span[class*=text--color-greyShade flex--inline]', 0)->innertext) + . $price . ' ' . $this->i8n('currency') . '
    '; - } } else { return ''; } @@ -422,6 +372,25 @@ private function getDiscount($jsonDealData) } } + /** + * Get the Deal location if it exists + * @return string String of the deal location + */ + private function getDealLocation($dealMeta) + { + $ribbons = $dealMeta['props']['metaRibbons']; + $isLocal = false; + foreach ($ribbons as $ribbon) { + $isLocal |= ($ribbon['type'] == 'local'); + } + if ($isLocal) { + $content = '
    ' . $this->i8n('deal-type') . ' : ' . $this->i8n('localdeal') . '
    '; + } else { + $content = ''; + } + return $content; + } + /** * Get the Picture URL from a Deal if it exists * @return string String of the deal Picture URL @@ -430,16 +399,15 @@ private function getImage($deal) { // Get thread Image JSON content $content = Json::decode($deal->find('div[class*=threadGrid-image]', 0)->find('div[class=js-vue2]', 0)->getAttribute('data-vue2')); - return $content['props']['threadImageUrl']; + return ''; } /** * Get the originating country from a Deal if it exists * @return string String of the deal originating country */ - private function getShipsFrom($deal) + private function getShipsFrom($dealMeta) { - $dealMeta = Json::decode($deal->find('div[class=threadGrid-headerMeta]', 0)->find('div[class=js-vue2]', 1)->getAttribute('data-vue2')); $metas = $dealMeta['props']['metaRibbons']; $shipsFrom = null; foreach ($metas as $meta) { @@ -453,104 +421,6 @@ private function getShipsFrom($deal) return ''; } - /** - * Transforms a local date into a timestamp - * @return int timestamp of the input date - */ - private function parseDate($string) - { - $month_local = $this->i8n('local-months'); - $month_en = [ - 'January', - 'February', - 'March', - 'April', - 'May', - 'June', - 'July', - 'August', - 'September', - 'October', - 'November', - 'December' - ]; - - // A date can be prfixed with some words, we remove theme - $string = $this->removeDatePrefixes($string); - // We translate the local months name in the english one - $date_str = trim(str_replace($month_local, $month_en, $string)); - - // If the date does not contain any year, we add the current year - if (!preg_match('/[0-9]{4}/', $string)) { - $date_str .= ' ' . date('Y'); - } - - // Add the Hour and minutes - $date_str .= ' 00:00'; - $date = DateTime::createFromFormat('j F Y H:i', $date_str); - // In some case, the date is not recognized : as a workaround the actual date is taken - if ($date === false) { - $date = new DateTime(); - } - return $date->getTimestamp(); - } - - /** - * Remove the prefix of a date if it has one - * @return the date without prefiux - */ - private function removeDatePrefixes($string) - { - $string = str_replace($this->i8n('date-prefixes'), [], $string); - return $string; - } - - /** - * Remove the suffix of a relative date if it has one - * @return the relative date without suffixes - */ - private function removeRelativeDateSuffixes($string) - { - if (count($this->i8n('relative-date-ignore-suffix')) > 0) { - $string = preg_replace($this->i8n('relative-date-ignore-suffix'), '', $string); - } - return $string; - } - - /** - * Transforms a relative local date into a timestamp - * @return int timestamp of the input date - */ - private function relativeDateToTimestamp($str) - { - $date = new DateTime(); - - // The minimal amount of time substracted is a minute : the seconds in the resulting date would be related to the execution time of the script. - // This make no sense, so we set the seconds manually to "00". - $date->setTime($date->format('H'), $date->format('i'), 0); - - // In case of update date, replace it by the regular relative date first word - $str = str_replace($this->i8n('relative-date-alt-prefixes'), $this->i8n('local-time-relative')[0], $str); - - $str = $this->removeRelativeDateSuffixes($str); - - $search = $this->i8n('local-time-relative'); - - $replace = [ - '-', - 'minute', - 'hour', - 'day', - 'month', - 'year', - '' - ]; - $date->modify(str_replace($search, $replace, $str)); - - - return $date->getTimestamp(); - } - /** * Returns the RSS Feed title according to the parameters * @return string the RSS feed Tiyle @@ -605,7 +475,7 @@ private function getSearchURI() $priceFrom = $this->getInput('priceFrom'); $priceTo = $this->getInput('priceTo'); $url = $this->i8n('bridge-uri') - . 'search/advanced?q=' + . 'search?q=' . urlencode($q) . '&hide_expired=' . $hide_expired . '&hide_local=' . $hide_local diff --git a/bridges/PicukiBridge.php b/bridges/PicukiBridge.php index f1d45e2acd8..5f1096b8470 100644 --- a/bridges/PicukiBridge.php +++ b/bridges/PicukiBridge.php @@ -89,9 +89,6 @@ public function collectData() $imageUrlParts[count($imageUrlParts) - 1] = urlencode($imageUrlParts[count($imageUrlParts) - 1]); $imageUrl = implode('/', $imageUrlParts); - // add fake file extension for it to be recognized as image/jpeg instead of application/octet-stream - $imageUrl = $imageUrl . '#.jpg'; - $this->items[] = [ 'uri' => $url, 'author' => $author, diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index fc5b1c19795..8338fb25bf3 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -39,6 +39,9 @@ private function fixLowRes() $pattern = '/https\:\/\/i\.pinimg\.com\/[a-zA-Z0-9]*x\//'; foreach ($this->items as $item) { $item['content'] = preg_replace($pattern, 'https://i.pinimg.com/originals/', $item['content']); + $item['enclosures'] = [ + $item['uri'], + ]; $newitems[] = $item; } $this->items = $newitems; diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php index fc4443ed2d1..604b5d4bed3 100644 --- a/bridges/PixivBridge.php +++ b/bridges/PixivBridge.php @@ -160,7 +160,8 @@ private function getDataFromJSON($json, $json_key) $json = array_reduce($json, function ($acc, $i) { if ($i['illustType'] === 0) { $acc[] = $i; - }return $acc; + } + return $acc; }, []); break; case 'manga': @@ -235,8 +236,10 @@ public function collectData() $item = []; $item['uid'] = $result['id']; + $subpath = array_key_exists('illustType', $result) ? 'artworks/' : 'novel/show.php?id='; $item['uri'] = static::URI . $subpath . $result['id']; + $item['title'] = $result['title']; $item['author'] = $result['userName']; $item['timestamp'] = $result['updateDate']; @@ -253,8 +256,6 @@ public function collectData() } } else { $img_url = $result['url']; - // Temporarily disabling caching of the image - //$img_url = $this->cacheImage($result['url'], $result['id'], array_key_exists('illustType', $result)); } // Currently, this might result in broken image due to their strict referrer check @@ -271,46 +272,6 @@ public function collectData() } } - /** - * todo: remove manual file cache - * See bridge specific documentation for alternative option. - */ - private function cacheImage($url, $illustId, $isImage) - { - $illustId = preg_replace('/[^0-9]/', '', $illustId); - $thumbnailurl = $url; - - $path = PATH_CACHE . 'pixiv_img/'; - if (!is_dir($path)) { - mkdir($path, 0755, true); - } - - $path .= $illustId; - if ($this->getInput('fullsize')) { - $path .= '_fullsize'; - } - $path .= '.jpg'; - - if (!is_file($path)) { - // Get fullsize URL - if ($isImage && $this->getInput('fullsize')) { - $ajax_uri = static::URI . 'ajax/illust/' . $illustId; - $imagejson = $this->getData($ajax_uri, true, true); - $url = $imagejson['body']['urls']['original']; - } - - $headers = ['Referer: ' . static::URI]; - try { - $illust = $this->getData($url, true, false, $headers); - } catch (Exception $e) { - $illust = $this->getData($thumbnailurl, true, false, $headers); // Original thumbnail - } - file_put_contents($path, $illust); - } - - return get_home_page_url() . 'cache/pixiv_img/' . preg_replace('/.*\//', '', $path); - } - private function checkOptions() { $proxy = $this->getOption('proxy_url'); diff --git a/bridges/PresidenciaPTBridge.php b/bridges/PresidenciaPTBridge.php index 052b2751d4a..247e8fce5ad 100644 --- a/bridges/PresidenciaPTBridge.php +++ b/bridges/PresidenciaPTBridge.php @@ -76,7 +76,7 @@ public function collectData() }, self::PT_MONTH_NAMES), array_map(function ($num) { return sprintf('-%02d-', $num); - }, range(1, sizeof(self::PT_MONTH_NAMES))), + }, range(1, count(self::PT_MONTH_NAMES))), $edt ); diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index 9c72f9963f8..7ece0e15b30 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -121,7 +121,7 @@ private function collectDataInternal(): void $comments = false; $frontend = $this->getInput('frontend'); if ($frontend == '') { - $frontend = 'https://old.reddit.com'; + $frontend = 'https://old.reddit.com'; } $section = $this->getInput('d'); @@ -139,36 +139,13 @@ private function collectDataInternal(): void break; } - if (!($this->getInput('search') === '')) { - $keywords = $this->getInput('search'); - $keywords = str_replace([',', ' '], '%20', $keywords); - $keywords = $keywords . '%20'; - } else { - $keywords = ''; - } - - if (!empty($this->getInput('f')) && $this->queriedContext == 'single') { - $flair = $this->getInput('f'); - $flair = str_replace(' ', '%20', $flair); - $flair = 'flair%3A%22' . $flair . '%22%20'; - } else { - $flair = ''; - } + $search = $this->getInput('search'); + $flareInput = $this->getInput('f'); foreach ($subreddits as $subreddit) { - $name = trim($subreddit); - $url = self::URI - . '/search.json?q=' - . $keywords - . $flair - . ($user ? 'author%3A' : 'subreddit%3A') - . $name - . '&sort=' - . $this->getInput('d') - . '&include_over_18=on'; - $version = 'v0.0.1'; $useragent = "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"; + $url = self::createUrl($search, $flareInput, $subreddit, $user, $section, $this->queriedContext); $json = getContents($url, ['User-Agent: ' . $useragent]); $parsedJson = Json::decode($json, false); @@ -212,7 +189,7 @@ private function collectDataInternal(): void // Comment $item['content'] = htmlspecialchars_decode($data->body_html); - } elseif ($data->is_self) { + } elseif ($data->is_self && isset($data->selftext_html)) { // Text post $item['content'] = htmlspecialchars_decode($data->selftext_html); @@ -278,6 +255,32 @@ private function collectDataInternal(): void }); } + public static function createUrl($search, $flareInput, $subreddit, bool $user, $section, $queriedContext): string + { + if ($search === '') { + $keywords = ''; + } else { + $keywords = $search; + $keywords = str_replace([',', ' '], ' ', $keywords); + $keywords = $keywords . ' '; + } + + if ($flareInput && $queriedContext == 'single') { + $flair = $flareInput; + $flair = str_replace([',', ' '], ' ', $flair); + $flair = 'flair:"' . $flair . '" '; + } else { + $flair = ''; + } + $name = trim($subreddit); + $query = [ + 'q' => $keywords . $flair . ($user ? 'author:' : 'subreddit:') . $name, + 'sort' => $section, + 'include_over_18' => 'on', + ]; + return 'https://old.reddit.com/search.json?' . http_build_query($query); + } + public function getIcon() { return 'https://www.redditstatic.com/desktop2x/img/favicon/favicon-96x96.png'; diff --git a/bridges/RoosterTeethBridge.php b/bridges/RoosterTeethBridge.php index 21bac4fec50..464c83a8c9b 100644 --- a/bridges/RoosterTeethBridge.php +++ b/bridges/RoosterTeethBridge.php @@ -17,6 +17,7 @@ class RoosterTeethBridge extends BridgeAbstract 'values' => [ 'All channels' => 'all', 'Achievement Hunter' => 'achievement-hunter', + 'Camp Camp' => 'camp-camp', 'Cow Chop' => 'cow-chop', 'Death Battle' => 'death-battle', 'Friends of RT' => 'friends-of-rt', diff --git a/bridges/ScientificAmericanBridge.php b/bridges/ScientificAmericanBridge.php index d575bf9488f..51cdc0d9f2e 100644 --- a/bridges/ScientificAmericanBridge.php +++ b/bridges/ScientificAmericanBridge.php @@ -25,7 +25,7 @@ class ScientificAmericanBridge extends FeedExpander ]; const FEED = 'http://rss.sciam.com/ScientificAmerican-Global'; - const ISSUES = 'https://www.scientificamerican.com/archive/issues/2020s/'; + const ISSUES = 'https://www.scientificamerican.com/archive/issues/'; public function collectData() { @@ -50,7 +50,7 @@ public function collectData() if ($this->getInput('addContents') == 1) { usort($this->items, function ($item1, $item2) { - return $item1['timestamp'] - $item2['timestamp']; + return $item2['timestamp'] - $item1['timestamp']; }); } } @@ -66,8 +66,12 @@ private function collectFeed() private function collectIssues() { $html = getSimpleHTMLDOMCached(self::ISSUES); - $content = $html->getElementById('content')->children(3); - $issues = $content->children(); + $content = $html->getElementById('app'); + $issues_list = $content->find('div[class^="issue__list"]', 0); + if ($issues_list == null) { + return []; + } + $issues = $issues_list->find('div[class^="list__item"]'); $issues_count = min( (int)$this->getInput('parseIssues'), count($issues) @@ -87,36 +91,19 @@ private function parseIssue($issue_link) $items = []; $html = getSimpleHTMLDOMCached($issue_link); - $features = $html->find('[class^=Detail_issue__article__previews__featured]', 0); - if ($features != null) { - $articles = $features->find('div', 0)->children(); + $blocks = $html->find('[class^="issueArchiveArticleListCompact"]'); + foreach ($blocks as $block) { + $articles = $block->find('article[class*="article"]'); foreach ($articles as $article) { - $h4 = $article->find('h4', 0); - $a = $h4->find('a', 0); + $a = $article->find('a[class^="articleLink"]', 0); $link = 'https://scientificamerican.com' . $a->getAttribute('href'); - $title = $a->plaintext; - $items[] = [ + $title = $a->find('h2[class^="articleTitle"]', 0); + array_push($items, [ 'uri' => $link, - 'title' => $title, + 'title' => $title->plaintext, 'uid' => $link, 'content' => '' - ]; - } - } - - $departments = $html->find('[class^=Detail_issue__article__previews__departments]', 0); - if ($departments != null) { - $headers = $departments->find('[class*=Listing_article__listing__title]'); - foreach ($headers as $header) { - $a = $header->find('a', 0); - $link = 'https://scientificamerican.com' . $a->getAttribute('href'); - $title = $a->plaintext; - $items[] = [ - 'uri' => $link, - 'title' => $title, - 'uid' => $link, - 'content' => '' - ]; + ]); } } @@ -126,63 +113,66 @@ private function parseIssue($issue_link) private function updateItem($item) { $html = getSimpleHTMLDOMCached($item['uri']); - $article = $html->find('#sa_body', 0)->find('article', 0); + $article = $html->find('#app', 0)->find('article', 0); - $time = $article->find('time[itemprop="datePublished"]', 0); - if ($time == null) { - $time = $html->find('span[itemprop="datePublished"]', 0); - } + $time = $article->find('p[class^="article_pub_date"]', 0); if ($time) { $datetime = DateTime::createFromFormat('F j, Y', $time->plaintext); + $datetime->setTime(0, 0, 0, 0); $item['timestamp'] = $datetime->format('U'); } - $main = $article->find('section.article-grid__main', 0); - if ($main == null) { - $main = $article->find('div.article-text', 0); + $authors = $article->find('a[class^="article_authors__link"]'); + if ($authors) { + $author = implode('; ', array_map(fn($a) => $a->plaintext, $authors)); + $item['author'] = $author; } - if ($main == null) { - return $item; + $res = ''; + $desc = $article->find('div[class^="article_dek"]', 0); + if ($desc) { + $res .= $desc->innertext; } - foreach ($main->find('img') as $img) { - $img->removeAttribute('width'); - $img->removeAttribute('height'); - $img->setAttribute('style', 'height: auto; width: auto; max-height: 768px'); + $lead_figure = $article->find('figure[class^="lead_image"]', 0); + if ($lead_figure) { + $res .= $lead_figure->outertext; } - $rights_link = $main->find('div.article-rightslink', 0); - if ($rights_link != null) { - $rights_link->parent->removeChild($rights_link); - } - $reprints_link = $main->find('div.article-reprintsLink', 0); - if ($reprints_link != null) { - $reprints_link->parent->removeChild($reprints_link); - } - $about_section = $main->find('section.article-author-container', 0); - if ($about_section != null) { - $about_section->parent->removeChild($about_section); - } - $read_next = $main->find('#read-next', 0); - if ($read_next != null) { - $read_next->parent->removeChild($read_next); - } - - foreach ($main->find('iframe') as $iframe) { - $a = $html->createElement('a'); - $a->href = $iframe->src; - $a->innertext = $iframe->src; - $iframe->parent->appendChild($a); - $iframe->parent->removeChild($iframe); + $content = $article->find('div[class^="article__content"]', 0); + if ($content) { + foreach ($content->children() as $block) { + if (str_contains($block->innertext, 'On supporting science journalism')) { + continue; + } + if ( + ($block->tag == 'p' && $block->getAttribute('data-block') == 'sciam/paragraph') + || ($block->tag == 'figure' && str_starts_with($block->class, 'article__image')) + ) { + $iframe = $block->find('iframe', 0); + if ($iframe) { + $res .= "src}\">{$iframe->src}"; + } else { + $res .= $block->outertext; + } + } else if ($block->tag == 'h2') { + $res .= '

    ' . $block->innertext . '

    '; + } else if ($block->tag == 'blockquote') { + $res .= $block->outertext; + } else if ($block->tag == 'hr' && $block->getAttribute('data-block') == 'sciam/raw_html') { + $res .= '
    '; + } + } } - $authors = $main->find('span[itemprop="author"]', 0); - if ($authors != null) { - $item['author'] = $authors->plaintext; + $footer = $article->find('footer[class*="footer"]', 0); + if ($footer) { + $bios = $footer->find('div[class^=bio]'); + $bio = implode('', array_map(fn($b) => $b->innertext, $bios)); + $res .= $bio; } - $item['content'] = $main->innertext; + $item['content'] = $res; return $item; } } diff --git a/bridges/ScribbleHubBridge.php b/bridges/ScribbleHubBridge.php index 0f7c7a6c7ff..b4f7beaa13c 100644 --- a/bridges/ScribbleHubBridge.php +++ b/bridges/ScribbleHubBridge.php @@ -23,6 +23,14 @@ class ScribbleHubBridge extends FeedExpander // Example: latest chapters from Uskweirs 'exampleValue' => '965299', ], + ], + 'List' => [ + 'url' => [ + 'name' => 'url', + 'required' => true, + // Example: latest stories with the 'Transgender' tag + 'exampleValue' => 'https://www.scribblehub.com/series-finder/?sf=1&gi=6&tgi=1088&sort=dateadded', + ], ] ]; @@ -34,6 +42,10 @@ public function getIcon() public function collectData() { $url = 'https://rssscribblehub.com/rssfeed.php?type='; + if ($this->queriedContext === 'List') { + $this->collectList($this->getURI()); + return; + } if ($this->queriedContext === 'Author') { $url = $url . 'author&uid=' . $this->getInput('uid'); } else { //All and Series use the same source feed @@ -42,6 +54,44 @@ public function collectData() $this->collectExpandableDatas($url); } + protected $author = ''; + + private function collectList($url) + { + $html = getSimpleHTMLDOMCached($url); + foreach ($html->find('.search_main_box') as $element) { + $item = []; + + $title = $element->find('.search_title a', 0); + $item['title'] = $title->plaintext; + $item['uri'] = $title->href; + + $strdate = $element->find('[title="Last Updated"]', 0)->plaintext; + $item['timestamp'] = strtotime($strdate); + $item['uid'] = $item['uri']; + + $details = getSimpleHTMLDOMCached($item['uri']); + $item['enclosures'][] = $details->find('.fic_image img', 0)->src; + $item['content'] = $details->find('.wi_fic_desc', 0); + + foreach ($details->find('.fic_genre') as $tag) { + $item['categories'][] = $tag->plaintext; + } + foreach ($details->find('.stag') as $tag) { + $item['categories'][] = $tag->plaintext; + } + + $read_url = $details->find('.read_buttons a', 0)->href; + $read_html = getSimpleHTMLDOMCached($read_url); + $item['content'] .= '

    '; + $item['content'] .= $read_html->find('.chapter-title', 0); + $item['content'] .= '

    '; + $item['content'] .= $read_html->find('#chp_raw', 0); + + $this->items[] = $item; + } + } + protected function parseItem(array $item) { //For series, filter out other series from 'All' feed @@ -57,12 +107,13 @@ protected function parseItem(array $item) } $item['comments'] = $item['uri'] . '#comments'; + $item['uid'] = $item['uri']; try { $dom = getSimpleHTMLDOMCached($item['uri']); } catch (HttpException $e) { // 403 Forbidden, This means we got anti-bot response - if ($e->getCode() === 403) { + if ($e->getCode() === 403 || $e->getCode() === 429) { return $item; } throw $e; @@ -84,7 +135,6 @@ protected function parseItem(array $item) //Generate UID $item_pid = $dom->find('#mypostid', 0)->value; - $item['uid'] = $item_sid . "/$item_pid"; return $item; } @@ -102,12 +152,17 @@ public function getName() } catch (HttpException $e) { // 403 Forbidden, This means we got anti-bot response if ($e->getCode() === 403) { - return $item; + return $name; } throw $e; } $title = html_entity_decode($page->find('.fic_title', 0)->plaintext); break; + case 'List': + $page = getSimpleHTMLDOMCached($this->getURI()); + $title = $page->find('head > title', 0)->plaintext; + $title = explode(' |', $title)[0]; + break; } if (isset($title)) { $name .= " - $title"; @@ -125,6 +180,9 @@ public function getURI() case 'Series': $uri = self::URI . 'series/' . $this->getInput('sid') . '/a'; break; + case 'List': + $uri = $this->getInput('url'); + break; } return $uri; } diff --git a/bridges/TarnkappeBridge.php b/bridges/TarnkappeBridge.php new file mode 100644 index 00000000000..c04c9546558 --- /dev/null +++ b/bridges/TarnkappeBridge.php @@ -0,0 +1,79 @@ + [ + 'name' => 'Category', + 'required' => false, + 'title' => <<<'TITLE' + If you only want to subscribe to a specific category + you can enter it here. + If not, leave it blank to subscribe to everything. + TITLE, + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Specify number of full articles to return', + 'defaultValue' => 10 + ] + ]]; + const LIMIT = 10; + + public function collectData() + { + if (empty($this->getInput('category'))) { + $category = 'https://tarnkappe.info/feed'; + } else { + $category = 'https://tarnkappe.info/artikel/' . $this->getInput('category') . '/feed'; + } + + $this->collectExpandableDatas( + $category, + $this->getInput('limit') ?: static::LIMIT + ); + } + + protected function parseItem(array $item) + { + if (strpos($item['uri'], 'https://tarnkappe.info/') !== 0) { + return $item; + } + + $article = getSimpleHTMLDOMCached($item['uri']); + + if ($article) { + $article = defaultLinkTo($article, $item['uri']); + $item = $this->addArticleToItem($item, $article); + } + + return $item; + } + + private function addArticleToItem($item, $article) + { + $item['content'] = $article->find('a.image-header', 0); + + $article = $article->find('main#article article div.card-content div.content.entry-content', 0); + + // remove unwanted stuff + foreach ( + $article->find('em, section, div.menu') as $element + ) { + $element->remove(); + } + // reload html, as remove() is buggy + $article = str_get_html($article->outertext); + + $item['content'] .= $article; + + return $item; + } +} diff --git a/bridges/TldrTechBridge.php b/bridges/TldrTechBridge.php index 984117b2d9d..d29553479ba 100644 --- a/bridges/TldrTechBridge.php +++ b/bridges/TldrTechBridge.php @@ -22,11 +22,15 @@ class TldrTechBridge extends BridgeAbstract 'type' => 'list', 'values' => [ 'Tech' => 'tech', - 'Crypto' => 'crypto', + 'Web Dev' => 'webdev', 'AI' => 'ai', - 'Web Dev' => 'engineering', + 'Information Security' => 'infosec', + 'Product Management' => 'product', + 'DevOps' => 'devops', + 'Crypto' => 'crypto', + 'Design' => 'design', + 'Marketing' => 'marketing', 'Founders' => 'founders', - 'Cybersecurity' => 'cybersecurity' ], 'defaultValue' => 'tech' ] @@ -48,12 +52,17 @@ public function collectData() // Convert //2023-01-01 to unix timestamp $date_items = explode('/', $child->href); $date = strtotime(end($date_items)); - $this->items[] = [ - 'uri' => self::URI . $child->href, - 'title' => $child->plaintext, - 'timestamp' => $date, - 'content' => $this->extractContent(self::URI . $child->href), - ]; + $item_url = self::URI . ltrim($child->href, '/'); + try { + $this->items[] = [ + 'uri' => self::URI . $child->href, + 'title' => $child->plaintext, + 'timestamp' => $date, + 'content' => $this->extractContent($item_url), + ]; + } catch (HttpException $e) { + continue; + } $added++; if ($added >= $limit) { break; @@ -66,7 +75,7 @@ private function extractContent($url) $html = getSimpleHTMLDOM($url); $content = $html->find('div.content-center.mt-5', 0); if (!$content) { - return ''; + throw new HttpException('Could not find content', 500); } $subscribe_form = $content->find('div.mt-5 > div > form', 0); if ($subscribe_form) { diff --git a/bridges/TrelloBridge.php b/bridges/TrelloBridge.php index cab2bde2880..42651fd13fc 100644 --- a/bridges/TrelloBridge.php +++ b/bridges/TrelloBridge.php @@ -553,10 +553,8 @@ class TrelloBridge extends BridgeAbstract private function queryAPI($path, $params = []) { - $data = json_decode(getContents('https://trello.com/1/' - . $path - . '?' - . http_build_query($params))); + $url = 'https://trello.com/1/' . $path . '?' . http_build_query($params); + $data = json_decode(getContents($url)); return $data; } @@ -576,33 +574,21 @@ private function renderAction($action, $textOnly = false) && !$textOnly && isset($entity->originalUrl) ) { - $string = '

    '; + $string = sprintf( + '

    ', + $entity->originalUrl, + $entity->previewUrl ?? '' + ); } elseif ($type === 'card' && !$textOnly) { - $string = '' - . $entity->text - . ''; + $string = sprintf('%s', $entity->shortLink, $entity->text); } elseif ($type === 'member' && !$textOnly) { - $string = '' - . $entity->text - . ''; + $string = sprintf('%s', $entity->username, $entity->text); } elseif ($type === 'date') { $string = gmdate('M j, Y \a\t g:i A T', strtotime($entity->date)); } elseif ($type === 'translatable') { $string = self::ACTION_TEXTS[$entity->translationKey]; } else { - if (isset($entity->text)) { - $string = $entity->text; - } else { - $string = ''; - } + $string = $entity->text ?? ''; } $strings['{' . $entity_name . '}'] = $string; } diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index f408f8855ca..424cd6e3b20 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -95,10 +95,14 @@ public function collectData() if ($data->user === null) { throw new \Exception(sprintf('Unable to find channel `%s`', $channel)); } + $user = $data->user; if ($user->videos === null) { - throw new HttpException('Service Unavailable', 503); + // twitch regularly does this for unknown reasons + //$this->logger->info('Twitch returned empty set of videos', ['data' => $data]); + return; } + foreach ($user->videos->edges as $edge) { $video = $edge->node; diff --git a/bridges/VieDeMerdeBridge.php b/bridges/VieDeMerdeBridge.php index fc69b234780..249c80df71f 100644 --- a/bridges/VieDeMerdeBridge.php +++ b/bridges/VieDeMerdeBridge.php @@ -26,7 +26,7 @@ public function collectData() $html = getSimpleHTMLDOM(self::URI, []); $quotes = $html->find('article.bg-white'); - if (sizeof($quotes) === 0) { + if (count($quotes) === 0) { return; } diff --git a/bridges/YandexZenBridge.php b/bridges/YandexZenBridge.php index 8a3db48b2b8..572423284e2 100644 --- a/bridges/YandexZenBridge.php +++ b/bridges/YandexZenBridge.php @@ -3,17 +3,17 @@ class YandexZenBridge extends BridgeAbstract { const NAME = 'YandexZen Bridge'; - const URI = 'https://zen.yandex.com'; - const DESCRIPTION = 'Latest posts from the specified profile.'; + const URI = 'https://dzen.ru'; + const DESCRIPTION = 'Latest posts from the specified channel.'; const MAINTAINER = 'llamasblade'; const PARAMETERS = [ [ - 'username' => [ - 'name' => 'Username', + 'channelURL' => [ + 'name' => 'Channel URL', 'type' => 'text', 'required' => true, - 'title' => 'The account\'s username, found in its URL', - 'exampleValue' => 'dream_faity_diy', + 'title' => 'The channel\'s URL', + 'exampleValue' => 'https://dzen.ru/dream_faity_diy', ], 'limit' => [ 'name' => 'Limit', @@ -27,14 +27,41 @@ class YandexZenBridge extends BridgeAbstract ]; # credit: https://github.com/teromene see #1032 - const _API_URL = 'https://zen.yandex.ru/api/v3/launcher/more?channel_name='; + const _BASE_API_URL_WITH_CHANNEL_NAME = 'https://dzen.ru/api/v3/launcher/more?channel_name='; + const _BASE_API_URL_WITH_CHANNEL_ID = 'https://dzen.ru/api/v3/launcher/more?channel_id='; + + const _ACCOUNT_URL_WITH_CHANNEL_ID_REGEX = '#^https?://dzen\.ru/id/(?[a-z0-9]{24})#'; + const _ACCOUNT_URL_WITH_CHANNEL_NAME_REGEX = '#^https?://dzen\.ru/(?[\w\.]+)#'; + + private $channelRealName = null; # as shown in the webpage, not in the URL + public function collectData() { - $profile_json = json_decode(getContents($this->getAPIUrl())); + $channelURL = $this->getInput('channelURL'); + + if (preg_match(self::_ACCOUNT_URL_WITH_CHANNEL_ID_REGEX, $channelURL, $matches)) { + $channelID = $matches['channelID']; + $channelAPIURL = self::_BASE_API_URL_WITH_CHANNEL_ID . $channelID; + } elseif (preg_match(self::_ACCOUNT_URL_WITH_CHANNEL_NAME_REGEX, $channelURL, $matches)) { + $channelName = $matches['channelName']; + $channelAPIURL = self::_BASE_API_URL_WITH_CHANNEL_NAME . $channelName; + } else { + returnClientError(<<channelRealName = $APIResponse->header->title; + $limit = $this->getInput('limit'); - foreach (array_slice($profile_json->items, 0, $limit) as $post) { + foreach (array_slice($APIResponse->items, 0, $limit) as $post) { $item = []; $item['uri'] = $post->share_link; @@ -56,21 +83,19 @@ public function collectData() } } - private function getAPIUrl() - { - return self::_API_URL . $this->getInput('username'); - } - public function getURI() { - return self::URI . '/' . $this->getInput('username'); + if (is_null($this->getInput('channelURL'))) { + return parent::getURI(); + } + return $this->getInput('channelURL'); } public function getName() { - if (is_null($this->getInput('username'))) { + if (is_null($this->channelRealName)) { return parent::getName(); } - return $this->getInput('username') . '\'s latest zen.yandex posts'; + return $this->channelRealName . '\'s latest zen.yandex posts'; } } diff --git a/bridges/YorushikaBridge.php b/bridges/YorushikaBridge.php index 12d02f1f88d..d75b97d7d84 100644 --- a/bridges/YorushikaBridge.php +++ b/bridges/YorushikaBridge.php @@ -7,6 +7,20 @@ class YorushikaBridge extends BridgeAbstract const DESCRIPTION = 'Return news from Yorushika\'s offical website'; const MAINTAINER = 'Miicat_47'; const PARAMETERS = [ + 'global' => [ + 'lang' => [ + 'name' => 'Language', + 'defaultValue' => 'jp', + 'type' => 'list', + 'values' => [ + '日本語' => 'jp', + 'English' => 'en', + '한국어' => 'ko', + '中文(繁體字)' => 'zh-tw', + '中文(簡体字)' => 'zh-cn', + ] + ], + ], 'All categories' => [ ], 'Only selected categories' => [ @@ -27,6 +41,27 @@ class YorushikaBridge extends BridgeAbstract public function collectData() { + switch ($this->getInput('lang')) { + case 'jp': + $url = 'https://yorushika.com/news/5/'; + break; + case 'en': + $url = 'https://yorushika.com/news/5/?lang=en'; + break; + case 'ko': + $url = 'https://yorushika.com/news/5/?lang=ko'; + break; + case 'zh-tw': + $url = 'https://yorushika.com/news/5/?lang=zh-tw'; + break; + case 'zh-cn': + $url = 'https://yorushika.com/news/5/?lang=zh-cn'; + break; + default: + $url = 'https://yorushika.com/news/5/'; + break; + } + $categories = []; if ($this->queriedContext == 'All categories') { array_push($categories, 'all'); @@ -42,7 +77,7 @@ public function collectData() } } - $html = getSimpleHTMLDOM('https://yorushika.com/news/5/')->find('.list--news', 0); + $html = getSimpleHTMLDOM($url)->find('.list--news', 0); $html = defaultLinkTo($html, $this->getURI()); foreach ($html->find('.inview') as $art) { @@ -62,10 +97,10 @@ public function collectData() $url = $art->find('a.clearfix', 0)->href; // Get article date - $exp_date = '/\d+\.\d+\.\d+/'; $date = $art->find('.date', 0)->plaintext; - preg_match($exp_date, $date, $matches); - $date = date_create_from_format('Y.m.d', $matches[0]); + preg_match('/(\d+)[\.年](\d+)[\.月](\d+)/u', $date, $matches); + $formattedDate = sprintf('%d.%02d.%02d', $matches[1], $matches[2], $matches[3]); + $date = date_create_from_format('Y.m.d', $formattedDate); $date = date_format($date, 'd.m.Y'); // Get article info diff --git a/bridges/YouTubeCommunityTabBridge.php b/bridges/YouTubeCommunityTabBridge.php index 20822828b0d..284b81f924d 100644 --- a/bridges/YouTubeCommunityTabBridge.php +++ b/bridges/YouTubeCommunityTabBridge.php @@ -32,7 +32,7 @@ class YouTubeCommunityTabBridge extends BridgeAbstract private $itemTitle = ''; private $urlRegex = '/youtube\.com\/(channel|user|c)\/([\w]+)\/community/'; - private $jsonRegex = '/var ytInitialData = (.*);<\/script>/'; + private $jsonRegex = '/var ytInitialData = ([^<]*);<\/script>/'; public function detectParameters($url) { @@ -70,7 +70,7 @@ public function collectData() $html = getSimpleHTMLDOM($this->feedUrl); } - $json = $this->extractJson($html->find('body', 0)->innertext); + $json = $this->extractJson($html->find('html', 0)->innertext); $this->feedName = $json->header->c4TabbedHeaderRenderer->title; @@ -204,7 +204,15 @@ private function getText($runs) $text = ''; foreach ($runs as $part) { - $text .= $this->formatUrls($part->text); + if (isset($part->navigationEndpoint->browseEndpoint->canonicalBaseUrl)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->browseEndpoint->canonicalBaseUrl); + } elseif (isset($part->navigationEndpoint->urlEndpoint->url)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->urlEndpoint->url); + } elseif (isset($part->navigationEndpoint->commandMetadata->webCommandMetadata->url)) { + $text .= $this->formatUrls($part->text, $part->navigationEndpoint->commandMetadata->webCommandMetadata->url); + } else { + $text .= $this->formatUrls($part->text, null); + } } return nl2br($text); @@ -220,8 +228,8 @@ private function getAttachments($details) if (isset($details->backstageAttachment)) { $attachments = $details->backstageAttachment; - // Video if (isset($attachments->videoRenderer) && isset($attachments->videoRenderer->videoId)) { + // Video if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted a video'; } @@ -230,10 +238,8 @@ private function getAttachments($details) EOD; - } - - // Image - if (isset($attachments->backstageImageRenderer)) { + } elseif (isset($attachments->backstageImageRenderer)) { + // Image if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted an image'; } @@ -243,10 +249,8 @@ private function getAttachments($details) $content = <<

    EOD; - } - - // Poll - if (isset($attachments->pollRenderer)) { + } elseif (isset($attachments->pollRenderer)) { + // Poll if (empty($this->itemTitle)) { $this->itemTitle = $this->feedName . ' posted a poll'; } @@ -262,6 +266,23 @@ private function getAttachments($details) $content = <<

    Poll ({$attachments->pollRenderer->totalVotes->simpleText})

      {$pollChoices}

    EOD; + } elseif (isset($attachments->postMultiImageRenderer->images)) { + // Multiple images + $images = $attachments->postMultiImageRenderer->images; + + if (is_array($images)) { + if (empty($this->itemTitle)) { + $this->itemTitle = $this->feedName . ' posted ' . count($images) . ' images'; + } + + foreach ($images as $image) { + $lastThumb = end($image->backstageImageRenderer->image->thumbnails); + + $content .= <<

    +EOD; + } + } } } @@ -275,6 +296,7 @@ private function ellipsisTitle($text) { $length = 100; + $text = strip_tags($text); if (strlen($text) > $length) { $text = explode('
    ', wordwrap($text, $length, '
    ')); return $text[0] . '...'; @@ -283,12 +305,26 @@ private function ellipsisTitle($text) return $text; } - private function formatUrls($content) + private function formatUrls($content, $url) { - return preg_replace( - '/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims', - '$1 ', - $content - ); + if (substr(strval($url), 0, 1) == '/') { + // fix relative URL + $url = 'https://www.youtube.com' . $url; + } elseif (substr(strval($url), 0, 33) == 'https://www.youtube.com/redirect?') { + // extract actual URL from YouTube redirect + parse_str(substr($url, 33), $params); + if (strpos(($params['q'] ?? ''), rtrim($content, '.')) === 0) { + $url = $params['q']; + } + } + + // ensure all URLs are made clickable + $url = $url ?? $content; + + if (filter_var($url, FILTER_VALIDATE_URL)) { + return '' . $content . ''; + } + + return $content; } } diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 6a29e387158..af14c856f2c 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -193,14 +193,7 @@ private function collectDataInternal() $html = $this->fetch($url_listing); $jsonData = $this->extractJsonFromHtml($html); $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; - $jsonData = $jsonData->sectionListRenderer->contents; - foreach ($jsonData as $data) { - // Search result includes some ads, have to filter them - if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) { - $jsonData = $data->itemSectionRenderer->contents; - break; - } - } + $jsonData = $jsonData->sectionListRenderer->contents[0]->itemSectionRenderer->contents; $this->fetchItemsFromFromJsonData($jsonData); $this->feeduri = $url_listing; $this->feedName = 'Search: ' . $search; diff --git a/bridges/ZeitBridge.php b/bridges/ZeitBridge.php index 0ed9276bdc0..ae8a1a666e0 100644 --- a/bridges/ZeitBridge.php +++ b/bridges/ZeitBridge.php @@ -87,7 +87,9 @@ private function parseArticle($item, $article) // remove known bad elements foreach ( $article->find( - 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, .article-heading__container--podcast' + 'aside, .visually-hidden, .carousel-container, #tickaroo-liveblog, .zplus-badge, + .article-heading__container--podcast, .podcast-player__image, div[data-paywall], + .js-embed-consent, script, nav, .article-flexible-toc__subheading-link, .faq-link' ) as $bad ) { $bad->remove(); @@ -108,16 +110,15 @@ private function parseArticle($item, $article) } // authors - $authors = $article->find('*[itemtype*="schema.org/Person"]'); - if (!$authors) { - $authors = $article->find('.metadata__source'); - } + $authors = $article->find('*[itemtype*="schema.org/Person"]') ?? $article->find('.metadata__source'); if ($authors) { - $item['author'] = implode(', ', $authors); + $item['author'] = implode(', ', array_map(function ($e) { + return trim($e->plaintext); + }, $authors)); } // header image - $headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('header', 0); + $headerimg = $article->find('*[data-ct-row="headerimage"]', 0) ?? $article->find('.article-header', 0) ?? $article->find('header', 0); if ($headerimg) { $item['content'] .= implode('', $headerimg->find('img[src], figcaption')); } @@ -127,7 +128,7 @@ private function parseArticle($item, $article) if ($pages) { foreach ($pages as $page) { - $elements = $page->find('p, h2, figcaption, img[src]'); + $elements = $page->find('p, ul, ol, h2, figure.article__media img[src], figure.article__media figcaption, figure.quote'); $item['content'] .= implode('', $elements); } } diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md index 1d59d9180b0..fa8d5fddd53 100644 --- a/docs/01_General/06_Public_Hosts.md +++ b/docs/01_General/06_Public_Hosts.md @@ -3,12 +3,12 @@ | Country | Address | Status | Contact | Comment | |:-------:|---------|--------|----------|---------| | ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.org/bridge01 | ![](https://img.shields.io/website/https/rss-bridge.org/bridge01.svg) | [@dvikan](https://github.com/dvikan) | London, Digital Ocean| -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in/ | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with OVH SAS (Maintained in India) | -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.cheredeprince.net/ | ![](https://img.shields.io/website/https/rss-bridge.cheredeprince.net) | [@La_Bécasse](https://cheredeprince.net/contact) | Self-Hosted at home in France | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with OVH SAS (Maintained in India) | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.cheredeprince.net | ![](https://img.shields.io/website/https/rss-bridge.cheredeprince.net) | [@La_Bécasse](https://cheredeprince.net/contact) | Self-Hosted at home in France | | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.sans-nuage.fr | ![](https://img.shields.io/website/https/rss-bridge.sans-nuage.fr) | [@Alsace Réseau Neutre](https://arn-fai.net/contact) | Hosted in Alsace, France | | ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.lewd.tech | ![](https://img.shields.io/website/https/rss-bridge.lewd.tech.svg) | [@Erisa](https://github.com/Erisa) | Hosted in London, protected by Cloudflare Rate Limiting | | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://bridge.easter.fr | ![](https://img.shields.io/website/https/bridge.easter.fr.svg) | [@chatainsim](https://github.com/chatainsim) | Hosted in Isère, France | -| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://wtf.roflcopter.fr/rss-bridge/ | ![](https://img.shields.io/website/https/wtf.roflcopter.fr/rss-bridge.svg) | [roflcopter.fr](https://wtf.roflcopter.fr/) | Hosted in France | +| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://wtf.roflcopter.fr/rss-bridge | ![](https://img.shields.io/website/https/wtf.roflcopter.fr/rss-bridge.svg) | [roflcopter.fr](https://wtf.roflcopter.fr/) | Hosted in France | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss.nixnet.services | ![](https://img.shields.io/website/https/rss.nixnet.services.svg) | [@amolith](https://nixnet.services/contact) | Hosted in Wunstorf, Germany | | ![](https://iplookup.flagfox.net/images/h16/AT.png) | https://rss-bridge.ggc-project.de | ![](https://img.shields.io/website/https/rss-bridge.ggc-project.de) | [@ggc-project.de](https://social.dev-wiki.de/@ggc_project) | Hosted in Steyr, Austria | | ![](https://iplookup.flagfox.net/images/h16/CA.png) | https://rssbridge.bus-hit.me | ![](https://img.shields.io/website/https/rssbridge.bus-hit.me.svg)| [@austinhuang0131](https://austinhuang.me/) | Hosted with Oracle in Québec, Canada | @@ -16,14 +16,15 @@ | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.boldair.dev | ![](https://img.shields.io/website?down_color=red&down_message=down&up_color=lime&up_message=up&url=https%3A%2F%2Frssbridge.boldair.dev) | [@Boldairdev](https://github.com/Boldairdev) | Latest Github release, Hosted on PHP 8.0 in Roubaix, France | | ![](https://iplookup.flagfox.net/images/h16/IN.png) | https://rss-bridge.bb8.fun | ![](https://img.shields.io/website/https/rss-bridge.bb8.fun.svg) | [@captn3m0](https://github.com/captn3m0) | Hosted in Bengaluru, India | | ![](https://iplookup.flagfox.net/images/h16/RU.png) | https://ololbu.ru/rss-bridge | ![](https://img.shields.io/website/https/ololbu.ru) | [@Ololbu](https://github.com/Ololbu) | Hosted in Moscow, Russia | -| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://tools.bheil.net/rss-bridge/ | ![](https://img.shields.io/website/https/tools.bheil.net.svg) | [@bheil](https://www.bheil.net) | Hosted in Germany | +| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://tools.bheil.net/rss-bridge | ![](https://img.shields.io/website/https/tools.bheil.net.svg) | [@bheil](https://www.bheil.net) | Hosted in Germany | | ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://bridge.suumitsu.eu | ![](https://img.shields.io/website/https/bridge.suumitsu.eu.svg) | [@mitsukarenai](https://github.com/mitsukarenai) | Hosted in Paris, France | | ![](https://iplookup.flagfox.net/images/h16/NL.png) | https://feed.eugenemolotov.ru | ![](https://img.shields.io/website/https/feed.eugenemolotov.ru.svg) | [@em92](https://github.com/em92) | Hosted in Amsterdam, Netherlands | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss-bridge.mediani.de | ![](https://img.shields.io/website/https/rss-bridge.mediani.de.svg) | [@sokai](https://github.com/sokai) | Hosted with Netcup, Germany | -| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.foxhaven.cyou| ![](https://img.shields.io/badge/website-up-brightgreen) | [@Aysilu](https://foxhaven.cyou) | Hosted with Timeweb (Maintained in Poland) | -| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.m3wz.su| ![](https://img.shields.io/badge/website-up-brightgreen) | [@m3oweezed](https://m3wz.su/en/about) | Poland, Hosted with Timeweb Cloud | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany | ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine +| ![](https://iplookup.flagfox.net/images/h16/IN.png) | https://rssbridge.projectsegfau.lt | ![](https://img.shields.io/website/https/rssbridge.projectsegfau.lt) | [@gi-yt](https://aryak.me) | Self-Hosted at Mumbai, India with Airtel (ISP) | +| ![](https://iplookup.flagfox.net/images/h16/US.png) | https://rb.vern.cc | ![](https://img.shields.io/website/https/rb.vern.cc.svg) | [@vern.cc](https://vern.cc/en/admin) | Hosted with Hetzner, US | +| ![](https://iplookup.flagfox.net/images/h16/RO.png) | https://rss.bloat.cat | ![](https://img.shields.io/website/https/rss.bloat.cat) | [@vlnst](https://bloat.cat/contact) | Hosted with Kyun, Romania | ## Inactive instances @@ -31,4 +32,3 @@ | Country | Address | Status | Contact | Comment | |:-------:|---------|--------|----------|---------| | ![](https://iplookup.flagfox.net/images/h16/FI.png) | https://rss-bridge.snopyta.org | ![](https://img.shields.io/website/https/rss-bridge.snopyta.org.svg) | [@Perflyst](https://github.com/Perflyst) | Hosted in Helsinki, Finland | -| ![](https://iplookup.flagfox.net/images/h16/US.png) | http://rb.vern.cc/ | ![](https://img.shields.io/website/https/rb.vern.cc.svg) | [@vern.cc](https://vern.cc/en/admin) | Hosted with Hetzner, US | diff --git a/docs/03_For_Hosts/05_Whitelisting.md b/docs/03_For_Hosts/05_Whitelisting.md index 113c4e3d43b..156174f0d72 100644 --- a/docs/03_For_Hosts/05_Whitelisting.md +++ b/docs/03_For_Hosts/05_Whitelisting.md @@ -1,14 +1,18 @@ -Modify `config.ini.php` to limit available bridges. +Modify `config.ini.php` to limit available bridges. Those changes should be applied in the `[system]` section. ## Enable all bridges ``` +[system] + enabled_bridges[] = * ``` ## Enable some bridges ``` +[system] + enabled_bridges[] = TwitchBridge enabled_bridges[] = GettrBridge ``` diff --git a/docs/06_Helper_functions/index.md b/docs/06_Helper_functions/index.md index 31a13953235..2b675ca3fd2 100644 --- a/docs/06_Helper_functions/index.md +++ b/docs/06_Helper_functions/index.md @@ -8,6 +8,8 @@ $this->getInput('your input name here'); `getInput` will either return the value for your parameter or `null` if the parameter is unknown or not specified. +[Defined in lib/BridgeAbstract.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/BridgeAbstract.php) + # getKey The `getKey` function is used to receive the key name to a selected list value given the name of the list, specified in `const PARAMETERS` @@ -39,6 +41,8 @@ $this->getKey('country'); `getKey` will either return the key name for your parameter or `null` if the parameter is unknown or not specified. +[Defined in lib/BridgeAbstract.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/BridgeAbstract.php) + # getContents The `getContents` function uses [cURL](https://secure.php.net/manual/en/book.curl.php) to acquire data from the specified URI while respecting the various settings defined at a global level by RSS-Bridge (i.e., proxy host, user agent, etc.). This function accepts a few parameters: @@ -55,6 +59,8 @@ $opts = array(CURLOPT_POST => 1); $html = getContents($url, $header, $opts); ``` +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # getSimpleHTMLDOM The `getSimpleHTMLDOM` function is a wrapper for the [simple_html_dom](https://simplehtmldom.sourceforge.io/) [file_get_html](https://simplehtmldom.sourceforge.io/docs/1.9/api/file_get_html/) function in order to provide context by design. @@ -62,6 +68,9 @@ The `getSimpleHTMLDOM` function is a wrapper for the ```PHP $html = getSimpleHTMLDOM('your URI'); ``` + +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # getSimpleHTMLDOMCached The `getSimpleHTMLDOMCached` function does the same as the [`getSimpleHTMLDOM`](#getsimplehtmldom) function, @@ -76,6 +85,8 @@ This function allows to specify the cache duration with the second parameter. $html = getSimpleHTMLDOMCached('your URI', 86400); // Duration 24h ``` +[Defined in lib/contents.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/contents.php) + # returnClientError The `returnClientError` function aborts execution of the current bridge and returns the given error message with error code **400**: @@ -86,6 +97,8 @@ returnClientError('Your error message') Use this function when the user provided invalid parameter or a required parameter is missing. +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + # returnServerError The `returnServerError` function aborts execution of the current bridge and returns the given error message with error code **500**: @@ -96,6 +109,8 @@ returnServerError('Your error message') Use this function when a problem occurs that has nothing to do with the parameters provided by the user. (like: Host service gone missing, empty data received, etc...) +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + # defaultLinkTo Automatically replaces any relative URL in a given string or DOM object (i.e. the one returned by [getSimpleHTMLDOM](#getsimplehtmldom)) with an absolute URL. @@ -122,6 +137,8 @@ $html = defaultLinkTo($html, $this->getURI()); // Using bridge URL // ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # backgroundToImg Replaces tags with styles of `backgroud-image` by `` tags. @@ -131,6 +148,8 @@ backgroundToImg(mixed $htmlContent) : object Returns a DOM object (even if provided a string). +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # extractFromDelimiters Extract the first part of a string matching the specified start and end delimiters. ```php @@ -151,6 +170,8 @@ $extracted = extractFromDelimiters($string, $start, $end); // 'John Doe' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # stripWithDelimiters Remove one or more part(s) of a string using a start and end delimiter. It is the inverse of `extractFromDelimiters`. @@ -173,6 +194,8 @@ $cleaned = stripWithDelimiters($string, $start, $end); // 'foobar' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # stripRecursiveHTMLSection Remove HTML sections containing one or more sections using the same HTML tag. @@ -192,6 +215,8 @@ $cleaned = stripRecursiveHTMLSection($string, $tag_name, $tag_start); // 'foobar' ``` +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + # markdownToHtml Converts markdown input to HTML using [Parsedown](https://parsedown.org/). @@ -233,3 +258,84 @@ $html = markdownToHtml($input); //
  • Translation improvements
  • //
``` + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# e +The `e` function is used to convert special characters to HTML entities + +```PHP +e('0 < 1 and 2 > 1'); +``` + +`e` will return the content of the string escape that can be rendered as is in HTML + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# truncate +The `truncate` function is used to shorten a string if exceeds a certain length, and add a string indicating that the string has been shortened. + +```PHP +truncate('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed a neque nunc. Nam nibh sem.', 20 , '...'); +``` + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# sanitize +The `sanitize` function is used to remove some tags from a given HTML text. + +```PHP +$html = 'Sample Page +

Lorem ipsum dolor sit amet, consectetur adipiscing elit...

+ + +'; +$tags_to_remove = ['script', 'iframe', 'input', 'form']; +$attributes_to_keep = ['title', 'href', 'src']; +$text_to_keep = []; +sanitize($html, $tags_to_remove, $attributes_to_keep, $text_to_keep); +``` + +This function returns a simplehtmldom object of the remaining contents. + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# convertLazyLoading +The `convertLazyLoading` function is used to convert onvert lazy-loading images and frames (video embeds) into static elements. It accepts the HTML content as HTML objects or string objects. It returns the HTML content with fixed image/frame URLs (same type as input). + +```PHP +$html = ' + +

Hello world!

+ + +backgroundToImg($html); +``` + +[Defined in lib/html.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/html.php) + +# Json::encode +The `Json::encode` function is used to encode a value as à JSON string. + +```PHP +$array = [ + "foo" => "bar", + "bar" => "foo", +]; +Json::encode($array, true, true); +``` + +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) + +# Json::decode +The `Json::decode` function is used to decode a JSON string into à PHP variable. + +```PHP +$json = '{ + "foo": "bar", + "bar": "foo" +}'; +Json::decode($json); +``` + +[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php) diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 93c824b384c..37ef3a930db 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -6,34 +6,26 @@ class HtmlFormat extends FormatAbstract public function stringify() { + // This query string is url encoded $queryString = $_SERVER['QUERY_STRING']; $feedArray = $this->getFeed(); $formatFactory = new FormatFactory(); - $buttons = []; - $linkTags = []; - foreach ($formatFactory->getFormatNames() as $formatName) { - // Dynamically build buttons for all formats (except HTML) + $formats = []; + + // Create all formats (except HTML) + $formatNames = $formatFactory->getFormatNames(); + foreach ($formatNames as $formatName) { if ($formatName === 'Html') { continue; } - $formatUrl = '?' . str_ireplace('format=Html', 'format=' . $formatName, htmlentities($queryString)); - $buttons[] = [ - 'href' => $formatUrl, - 'value' => $formatName, - ]; - $format = $formatFactory->create($formatName); - $linkTags[] = [ - 'href' => $formatUrl, - 'title' => $formatName, - 'type' => $format->getMimeType(), - ]; - } - - if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) { - $buttons[] = [ - 'href' => e($feedArray['donationUri']), - 'value' => 'Donate to maintainer', + // The format url is relative, but should be absolute in order to help feed readers. + $formatUrl = '?' . str_ireplace('format=Html', 'format=' . $formatName, $queryString); + $formatObject = $formatFactory->create($formatName); + $formats[] = [ + 'url' => $formatUrl, + 'name' => $formatName, + 'type' => $formatObject->getMimeType(), ]; } @@ -50,13 +42,18 @@ public function stringify() ]; } + $donationUri = null; + if (Configuration::getConfig('admin', 'donations') && $feedArray['donationUri']) { + $donationUri = $feedArray['donationUri']; + } + $html = render_template(__DIR__ . '/../templates/html-format.html.php', [ - 'charset' => $this->getCharset(), - 'title' => $feedArray['name'], - 'linkTags' => $linkTags, - 'uri' => $feedArray['uri'], - 'buttons' => $buttons, - 'items' => $items, + 'charset' => $this->getCharset(), + 'title' => $feedArray['name'], + 'formats' => $formats, + 'uri' => $feedArray['uri'], + 'items' => $items, + 'donation_uri' => $donationUri, ]); // Remove invalid characters ini_set('mbstring.substitute_character', 'none'); diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 1456e1c3e24..2467dec60e1 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -6,8 +6,17 @@ abstract class BridgeAbstract const URI = ''; const DONATION_URI = ''; const DESCRIPTION = 'No description provided'; + + /** + * Preferably a github username + */ const MAINTAINER = 'No maintainer'; + + /** + * Cache TTL in seconds + */ const CACHE_TIMEOUT = 3600; + const CONFIGURATION = []; const PARAMETERS = []; const TEST_DETECT_PARAMETERS = []; diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index c4677b9d7b5..d15ac865e0a 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -78,7 +78,7 @@ class="bridge-card" $card .= sprintf('', $bridgeClassName); - if ($bridge->getDonationURI() !== '' && Configuration::getConfig('admin', 'donations')) { + if (Configuration::getConfig('admin', 'donations') && $bridge->getDonationURI()) { $card .= sprintf( '

%s ~ Donate

', $bridge->getMaintainer(), diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index abe964e147a..fe809bc259b 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -34,6 +34,7 @@ public function collectExpandableDatas(string $url, $maxItems = -1) try { $this->feed = $feedParser->parseFeed($xmlString); } catch (\Exception $e) { + // FeedMergeBridge relies on this string throw new \Exception(sprintf('Failed to parse xml from %s: %s', $url, create_sane_exception_message($e))); } diff --git a/lib/FeedItem.php b/lib/FeedItem.php index bd37f119398..fc4549a7b5f 100644 --- a/lib/FeedItem.php +++ b/lib/FeedItem.php @@ -178,7 +178,6 @@ public function setAuthor($author) } else { $this->author = $author; } - return $this; } public function getContent(): ?string @@ -284,7 +283,6 @@ public function addMisc($name, $value) } else { $this->misc[$name] = $value; } - return $this; } public function toArray(): array diff --git a/lib/FeedParser.php b/lib/FeedParser.php index 37d3005bbca..b774cc14e23 100644 --- a/lib/FeedParser.php +++ b/lib/FeedParser.php @@ -92,7 +92,7 @@ public function parseAtomItem(\SimpleXMLElement $feedItem): array $item['uri'] = (string)$feedItem->id; } if (isset($feedItem->title)) { - $item['title'] = html_entity_decode((string)$feedItem->title); + $item['title'] = trim(html_entity_decode((string)$feedItem->title)); } if (isset($feedItem->updated)) { $item['timestamp'] = strtotime((string)$feedItem->updated); @@ -154,7 +154,7 @@ public function parseRss2Item(\SimpleXMLElement $feedItem): array $item['uri'] = (string)$feedItem->link; } if (isset($feedItem->title)) { - $item['title'] = html_entity_decode((string)$feedItem->title); + $item['title'] = trim(html_entity_decode((string)$feedItem->title)); } if (isset($feedItem->description)) { $item['content'] = (string)$feedItem->description; diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php index 2206f79ac9d..6163ca132d6 100644 --- a/lib/XPathAbstract.php +++ b/lib/XPathAbstract.php @@ -76,15 +76,6 @@ abstract class XPathAbstract extends BridgeAbstract */ const XPATH_EXPRESSION_ITEM_CONTENT = ''; - /** - * Use raw item content - * Whether to use the raw item content or to replace certain characters with - * special significance in HTML by HTML entities (using the PHP function htmlspecialchars). - * - * Use {@see XPathAbstract::getSettingUseRawItemContent()} to read this parameter - */ - const SETTING_USE_RAW_ITEM_CONTENT = false; - /** * XPath expression for extracting an item link from the item context * This expression should match a node's attribute containing the article URL @@ -158,6 +149,15 @@ abstract class XPathAbstract extends BridgeAbstract */ const SETTING_FIX_ENCODING = false; + /** + * Use raw item content + * Whether to use the raw item content or to replace certain characters with + * special significance in HTML by HTML entities (using the PHP function htmlspecialchars). + * + * Use {@see XPathAbstract::getSettingUseRawItemContent()} to read this parameter + */ + const SETTING_USE_RAW_ITEM_CONTENT = true; + /** * Internal storage for resulting feed name, automatically detected * @var string @@ -245,15 +245,6 @@ protected function getExpressionItemContent() return static::XPATH_EXPRESSION_ITEM_CONTENT; } - /** - * Use raw item content - * @return bool - */ - protected function getSettingUseRawItemContent(): bool - { - return static::SETTING_USE_RAW_ITEM_CONTENT; - } - /** * XPath expression for extracting an item link from the item context * @return string @@ -309,6 +300,15 @@ protected function getSettingFixEncoding(): bool return static::SETTING_FIX_ENCODING; } + /** + * Use raw item content + * @return bool + */ + protected function getSettingUseRawItemContent(): bool + { + return static::SETTING_USE_RAW_ITEM_CONTENT; + } + /** * Internal helper method for quickly accessing all the user defined constants * in derived classes @@ -331,8 +331,6 @@ private function getParam($name) return $this->getExpressionItemTitle(); case 'content': return $this->getExpressionItemContent(); - case 'raw_content': - return $this->getSettingUseRawItemContent(); case 'uri': return $this->getExpressionItemUri(); case 'author': @@ -345,6 +343,8 @@ private function getParam($name) return $this->getExpressionItemCategories(); case 'fix_encoding': return $this->getSettingFixEncoding(); + case 'raw_content': + return $this->getSettingUseRawItemContent(); } } @@ -438,8 +438,15 @@ public function collectData() continue; } - $isContent = $param === 'content'; - $value = $this->getItemValueOrNodeValue($typedResult, $isContent, $isContent && !$this->getSettingUseRawItemContent()); + if ('categories' === $param && $typedResult instanceof \DOMNodeList) { + $value = []; + foreach ($typedResult as $domNode) { + $value[] = $this->getItemValueOrNodeValue($domNode, false); + } + } else { + $value = $this->getItemValueOrNodeValue($typedResult, 'content' === $param); + } + $item->__set($param, $this->formatParamValue($param, $value)); } @@ -459,7 +466,8 @@ public function collectData() */ protected function formatParamValue($param, $value) { - $value = $this->fixEncoding($value); + $value = is_array($value) ? array_map('trim', $value) : trim($value); + $value = is_array($value) ? array_map([$this, 'fixEncoding'], $value) : $this->fixEncoding($value); switch ($param) { case 'title': return $this->formatItemTitle($value); @@ -502,7 +510,7 @@ protected function formatItemTitle($value) */ protected function formatItemContent($value) { - return $value; + return $this->getParam('raw_content') ? $value : htmlspecialchars($value); } /** @@ -572,12 +580,12 @@ protected function formatItemEnclosures($value) * formatted as array. * Can be easily overwritten for in case the values need to be transformed into something * else. - * @param string $value + * @param string|array $value * @return array */ protected function formatItemCategories($value) { - return [$value]; + return is_array($value) ? $value : [$value]; } /** @@ -596,35 +604,30 @@ protected function cleanMediaUrl($mediaUrl) /** * @param $typedResult + * @param bool $returnXML + * @param bool $escapeHtml * @return string + * @throws Exception */ - protected function getItemValueOrNodeValue($typedResult, $returnXML = false, $escapeHtml = false) + protected function getItemValueOrNodeValue($typedResult, $returnXML = false) { if ($typedResult instanceof \DOMNodeList) { - $item = $typedResult->item(0); - if ($item instanceof \DOMElement) { - // Don't escape XML - if ($returnXML) { - return ($item->ownerDocument ?? $item)->saveXML($item); - } - $text = $item->nodeValue; - } elseif ($item instanceof \DOMAttr) { - $text = $item->value; - } elseif ($item instanceof \DOMText) { - $text = $item->wholeText; - } - } elseif (is_string($typedResult) && strlen($typedResult) > 0) { - $text = $typedResult; - } else { - throw new \Exception('Unknown type of XPath expression result.'); + $typedResult = $typedResult->item(0); } - $text = trim($text); - - if ($escapeHtml) { - return htmlspecialchars($text); + if ($typedResult instanceof \DOMElement) { + return $returnXML ? ($typedResult->ownerDocument ?? $typedResult)->saveXML($typedResult) : $typedResult->nodeValue; + } elseif ($typedResult instanceof \DOMAttr) { + return $typedResult->value; + } elseif ($typedResult instanceof \DOMText) { + return $typedResult->wholeText; + } elseif (is_string($typedResult)) { + return $typedResult; + } elseif (null === $typedResult) { + return ''; } - return $text; + + throw new \Exception('Unknown type of XPath expression result: ' . gettype($typedResult)); } /** diff --git a/lib/contents.php b/lib/contents.php index 43db8c031dc..ba6dd531a10 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -17,6 +17,8 @@ function getContents( $httpClient = RssBridge::getHttpClient(); $cache = RssBridge::getCache(); + // TODO: consider url validation at this point + $httpHeadersNormalized = []; foreach ($httpHeaders as $httpHeader) { $parts = explode(':', $httpHeader); diff --git a/lib/http.php b/lib/http.php index e4f9bf482ae..39f0c72710f 100644 --- a/lib/http.php +++ b/lib/http.php @@ -331,7 +331,14 @@ public function getHeader(string $name, bool $all = false) return array_pop($header); } - public function withBody(string $body): Response + public function withHeader(string $name, string $value): self + { + $clone = clone $this; + $clone->headers[$name] = [$value]; + return $clone; + } + + public function withBody(string $body): self { $clone = clone $this; $clone->body = $body; diff --git a/phpcs.xml b/phpcs.xml index bd1aca28489..9e393a137a0 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -35,6 +35,14 @@ + + + + + + + + diff --git a/static/rss-bridge.js b/static/rss-bridge.js index b9b466d6cb8..9cd004cb00c 100644 --- a/static/rss-bridge.js +++ b/static/rss-bridge.js @@ -1,21 +1,14 @@ function rssbridge_list_search() { - function remove_www_from_url(url) { - if (url.hostname.indexOf('www.') === 0) { - url.hostname = url.hostname.substr(4); - } - } - var search = document.getElementById('searchfield').value; - var searchAsUrl = document.createElement('a'); - searchAsUrl.href = search; - remove_www_from_url(searchAsUrl); + var bridgeCards = document.querySelectorAll('section.bridge-card'); for (var i = 0; i < bridgeCards.length; i++) { var bridgeName = bridgeCards[i].getAttribute('data-ref'); var bridgeShortName = bridgeCards[i].getAttribute('data-short-name'); var bridgeDescription = bridgeCards[i].querySelector('.description'); - var bridgeUrl = bridgeCards[i].getElementsByTagName('a')[0]; - remove_www_from_url(bridgeUrl); + var bridgeUrlElement = bridgeCards[i].getElementsByTagName('a')[0]; + var bridgeUrl = bridgeUrlElement.toString(); + bridgeCards[i].style.display = 'none'; if (!bridgeName || !bridgeUrl) { continue; @@ -30,10 +23,7 @@ function rssbridge_list_search() { if (bridgeDescription.textContent.match(searchRegex)) { bridgeCards[i].style.display = 'block'; } - if (bridgeUrl.toString().match(searchRegex)) { - bridgeCards[i].style.display = 'block'; - } - if (bridgeUrl.hostname === searchAsUrl.hostname) { + if (bridgeUrl.match(searchRegex)) { bridgeCards[i].style.display = 'block'; } } diff --git a/templates/html-format.html.php b/templates/html-format.html.php index 3b0fe6fe2bd..bc95c5d04e7 100644 --- a/templates/html-format.html.php +++ b/templates/html-format.html.php @@ -8,12 +8,13 @@ - + + @@ -33,11 +34,21 @@ - - - + + + + + + + + + diff --git a/tests/BridgeImplementationTest.php b/tests/BridgeImplementationTest.php index d2f74931dd0..dd68934edc9 100644 --- a/tests/BridgeImplementationTest.php +++ b/tests/BridgeImplementationTest.php @@ -157,29 +157,6 @@ public function testParameters($path) } } - /** - * @dataProvider dataBridgesProvider - */ - public function testVisibleMethods($path) - { - $bridgeAbstractMethods = get_class_methods(BridgeAbstract::class); - sort($bridgeAbstractMethods); - $feedExpanderMethods = get_class_methods(FeedExpander::class); - sort($feedExpanderMethods); - - $this->setBridge($path); - - $publicMethods = get_class_methods($this->bridge); - sort($publicMethods); - foreach ($publicMethods as $publicMethod) { - if ($this->bridge instanceof FeedExpander) { - $this->assertContains($publicMethod, $feedExpanderMethods); - } else { - $this->assertContains($publicMethod, $bridgeAbstractMethods); - } - } - } - /** * @dataProvider dataBridgesProvider */ diff --git a/tests/RedditBridgeTest.php b/tests/RedditBridgeTest.php new file mode 100644 index 00000000000..17a62e68874 --- /dev/null +++ b/tests/RedditBridgeTest.php @@ -0,0 +1,33 @@ +assertSame($expected, $actual); + + // https://old.reddit.com/search.json?q=author:RavenousRandy&sort=hot&include_over_18=on + $expected = 'https://old.reddit.com/search.json?q=author%3ARavenousRandy&sort=hot&include_over_18=on'; + $actual = RedditBridge::createUrl('', '', 'RavenousRandy', true, 'hot', 'user'); + $this->assertSame($expected, $actual); + + // https://old.reddit.com/search.json?q=cats dogs hen flair:"Proxy" subreddit:php&sort=hot&include_over_18=on + $expected = 'https://old.reddit.com/search.json?q=cats+dogs+hen+flair%3A%22Proxy%22+subreddit%3Aphp&sort=hot&include_over_18=on'; + $actual = RedditBridge::createUrl('cats,dogs hen', 'Proxy', 'php', false, 'hot', 'single'); + $this->assertSame($expected, $actual); + + // https://old.reddit.com/search.json?q=cats dogs hen flair:"Proxy Linux Server" subreddit:php&sort=hot&include_over_18=on + $expected = 'https://old.reddit.com/search.json?q=cats+dogs+hen+flair%3A%22Proxy+Linux+Server%22+subreddit%3Aphp&sort=hot&include_over_18=on'; + $actual = RedditBridge::createUrl('cats,dogs hen', 'Proxy,Linux Server', 'php', false, 'hot', 'single'); + $this->assertSame($expected, $actual); + } +}