From 589c97c344108e0540a7377485e986ee7d4e8a94 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Sun, 16 May 2021 15:53:23 +0500 Subject: [PATCH 1/5] [YoutubeBridge] Fix incorrectly working method for quering video info --- bridges/YoutubeBridge.php | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 90ee0499672..2827cf15fe6 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -68,29 +68,22 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); // Skip unavailable videos - if(!strpos($html->innertext, 'IS_UNAVAILABLE_PAGE')) { + if(strpos($html->innertext, 'IS_UNAVAILABLE_PAGE') !== false) { return; } - foreach($html->find('script') as $script) { - $data = trim($script->innertext); - - if(strpos($data, '{') !== 0) - continue; // Wrong script - - $json = json_decode($data); - - if(!isset($json->itemListElement)) - continue; // Wrong script - - $author = $json->itemListElement[0]->item->name; + $elAuthor = $html->find('span[itemprop=author] > link[itemprop=name]', 0); + if (!is_null($elAuthor)) { + $author = $elAuthor->getAttribute('content'); } - if(!is_null($html->find('#watch-description-text', 0))) - $desc = $html->find('#watch-description-text', 0)->innertext; + $elDescription = $html->find('meta[itemprop=description]', 0); + if(!is_null($elDescription)) + $desc = $elDescription->getAttribute('content'); - if(!is_null($html->find('meta[itemprop=datePublished]', 0))) - $time = strtotime($html->find('meta[itemprop=datePublished]', 0)->getAttribute('content')); + $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); + if(!is_null($elDatePublished)) + $time = strtotime($elDatePublished->getAttribute('content')); } private function ytBridgeAddItem($vid, $title, $author, $desc, $time){ From e6bca0d9bb9f3470ef856ea435020d2b785e8764 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Sun, 16 May 2021 16:11:27 +0500 Subject: [PATCH 2/5] [YoutubeBridge] Partially fix playlist mode. Now it does show list of maximum 100 video items instead of maximum 15 --- bridges/YoutubeBridge.php | 56 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 2827cf15fe6..2bd820693c2 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -234,16 +234,28 @@ public function collectData(){ returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); } } elseif($this->getInput('p')) { /* playlist mode */ + // TODO: this mode makes a lot of excess video query requests. + // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" + // This cache will be used to find out, which videos to fetch + // to make feed of 100 items or more, if there a lot of videos published on that date. $this->request = $this->getInput('p'); $url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request); $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); - $item_count = $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a', false); + $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; + preg_match($scriptRegex, $html, $matches, PREG_OFFSET_CAPTURE, 0); + // TODO: this method returns only first 100 video items + // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element + $json_data = json_decode($matches[1][0]); + $json_data = $json_data->contents->twoColumnBrowseResultsRenderer->tabs[0]; + $json_data = $json_data->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; + $json_data = $json_data->contents[0]->playlistVideoListRenderer->contents; + $item_count = count($json_data); if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { - $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a'); + $this->parseJsonPlaylist($json_data); } $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { @@ -277,6 +289,15 @@ private function skipFeeds() { return ($this->getInput('duration_min') || $this->getInput('duration_max')); } + public function getURI() + { + if (!is_null($this->getInput('p'))) { + return static::URI . 'playlist?list=' . $this->getInput('p'); + } + + return parent::getURI(); + } + public function getName(){ // Name depends on queriedContext: switch($this->queriedContext) { @@ -289,4 +310,35 @@ public function getName(){ return parent::getName(); } } + + private function parseJsonPlaylist($json_data) { + $duration_min = $this->getInput('duration_min') ?: -1; + $duration_min = $duration_min * 60; + + $duration_max = $this->getInput('duration_max') ?: INF; + $duration_max = $duration_max * 60; + + if($duration_max < $duration_min) { + returnClientError('Max duration must be greater than min duration!'); + } + + foreach($json_data as $item) { + if (!isset($item->playlistVideoRenderer)) { + continue; + } + $vid = $item->playlistVideoRenderer->videoId; + $title = $item->playlistVideoRenderer->title->runs[0]->text; + + $author = ''; + $desc = ''; + $time = 0; + $duration = intval($item->playlistVideoRenderer->lengthSeconds); + if($duration < $duration_min || $duration > $duration_max) { + continue; + } + + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + } + } } From 6a95bf0c55f55055600b34ecc7ebe2ee320e8b70 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Sun, 16 May 2021 16:12:57 +0500 Subject: [PATCH 3/5] [YoutubeBridge] Switch maintainer. Reference: https://github.com/RSS-Bridge/rss-bridge/issues/2113#issuecomment-841156902 --- bridges/YoutubeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 2bd820693c2..cac53a204c0 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -12,7 +12,7 @@ class YoutubeBridge extends BridgeAbstract { const URI = 'https://www.youtube.com/'; const CACHE_TIMEOUT = 10800; // 3h const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search'; - const MAINTAINER = 'mitsukarenai'; + const MAINTAINER = 'em92'; const PARAMETERS = array( 'By username' => array( From 404353b0c54212e721518b523840c8f5d6db9440 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Mon, 17 May 2021 00:43:01 +0500 Subject: [PATCH 4/5] better video description in feed and fixed playlist title --- bridges/YoutubeBridge.php | 41 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index cac53a204c0..f20916c3ed8 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -77,13 +77,26 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $author = $elAuthor->getAttribute('content'); } - $elDescription = $html->find('meta[itemprop=description]', 0); - if(!is_null($elDescription)) - $desc = $elDescription->getAttribute('content'); - $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); if(!is_null($elDatePublished)) $time = strtotime($elDatePublished->getAttribute('content')); + + $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; + preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + $jsonData = json_decode($matches[1]); + $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; + + $videoSecondaryInfo = null; + foreach($jsonData as $item) { + if (isset($item->videoSecondaryInfoRenderer)) { + $videoSecondaryInfo = $item->videoSecondaryInfoRenderer; + break; + } + } + if (!$videoSecondaryInfo) { + returnServerError('Could not find videoSecondaryInfoRenderer'); + } + $desc = nl2br($videoSecondaryInfo->description->runs[0]->text); } private function ytBridgeAddItem($vid, $title, $author, $desc, $time){ @@ -244,18 +257,18 @@ public function collectData(){ $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; - preg_match($scriptRegex, $html, $matches, PREG_OFFSET_CAPTURE, 0); + preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); // TODO: this method returns only first 100 video items // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $json_data = json_decode($matches[1][0]); - $json_data = $json_data->contents->twoColumnBrowseResultsRenderer->tabs[0]; - $json_data = $json_data->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; - $json_data = $json_data->contents[0]->playlistVideoListRenderer->contents; - $item_count = count($json_data); + $jsonData = json_decode($matches[1]); + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; + $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; + $item_count = count($jsonData); if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { - $this->parseJsonPlaylist($json_data); + $this->parseJsonPlaylist($jsonData); } $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { @@ -305,13 +318,13 @@ public function getName(){ case 'By channel id': case 'By playlist Id': case 'Search result': - return $this->feedName . ' - YouTube'; // We already know it's a bridge, right? + return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right? default: return parent::getName(); } } - private function parseJsonPlaylist($json_data) { + private function parseJsonPlaylist($jsonData) { $duration_min = $this->getInput('duration_min') ?: -1; $duration_min = $duration_min * 60; @@ -322,7 +335,7 @@ private function parseJsonPlaylist($json_data) { returnClientError('Max duration must be greater than min duration!'); } - foreach($json_data as $item) { + foreach($jsonData as $item) { if (!isset($item->playlistVideoRenderer)) { continue; } From 96bf0fba164fd17a1d1289c8c71ee30d924cc5c0 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Mon, 17 May 2021 00:56:19 +0500 Subject: [PATCH 5/5] 100 -> 15 --- bridges/YoutubeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index f20916c3ed8..798c0bb32e9 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -250,7 +250,7 @@ public function collectData(){ // TODO: this mode makes a lot of excess video query requests. // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" // This cache will be used to find out, which videos to fetch - // to make feed of 100 items or more, if there a lot of videos published on that date. + // to make feed of 15 items or more, if there a lot of videos published on that date. $this->request = $this->getInput('p'); $url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request); $url_listing = self::URI . 'playlist?list=' . urlencode($this->request);