From a4bd04310f11edb1b57f0e0b77e8c84f58d4f0cf Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Mon, 17 May 2021 01:02:45 +0500 Subject: [PATCH] [YoutubeBridge] Several fixes and switch maintainer (#2115) - Fix incorrectly working method for querying video info - Partially fix playlist mode. At least it works, if playlist has more than 15 videos. But maximum 100 video items are parsed from playlist. Reason of switching maintainer: https://github.com/RSS-Bridge/rss-bridge/issues/2113#issuecomment-841156902 --- bridges/YoutubeBridge.php | 98 +++++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 90ee0499672..798c0bb32e9 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -12,7 +12,7 @@ class YoutubeBridge extends BridgeAbstract { const URI = 'https://www.youtube.com/'; const CACHE_TIMEOUT = 10800; // 3h const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search'; - const MAINTAINER = 'mitsukarenai'; + const MAINTAINER = 'em92'; const PARAMETERS = array( 'By username' => array( @@ -68,29 +68,35 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); // Skip unavailable videos - if(!strpos($html->innertext, 'IS_UNAVAILABLE_PAGE')) { + if(strpos($html->innertext, 'IS_UNAVAILABLE_PAGE') !== false) { return; } - foreach($html->find('script') as $script) { - $data = trim($script->innertext); - - if(strpos($data, '{') !== 0) - continue; // Wrong script + $elAuthor = $html->find('span[itemprop=author] > link[itemprop=name]', 0); + if (!is_null($elAuthor)) { + $author = $elAuthor->getAttribute('content'); + } - $json = json_decode($data); + $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); + if(!is_null($elDatePublished)) + $time = strtotime($elDatePublished->getAttribute('content')); - if(!isset($json->itemListElement)) - continue; // Wrong script + $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; + preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + $jsonData = json_decode($matches[1]); + $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; - $author = $json->itemListElement[0]->item->name; + $videoSecondaryInfo = null; + foreach($jsonData as $item) { + if (isset($item->videoSecondaryInfoRenderer)) { + $videoSecondaryInfo = $item->videoSecondaryInfoRenderer; + break; + } } - - if(!is_null($html->find('#watch-description-text', 0))) - $desc = $html->find('#watch-description-text', 0)->innertext; - - if(!is_null($html->find('meta[itemprop=datePublished]', 0))) - $time = strtotime($html->find('meta[itemprop=datePublished]', 0)->getAttribute('content')); + if (!$videoSecondaryInfo) { + returnServerError('Could not find videoSecondaryInfoRenderer'); + } + $desc = nl2br($videoSecondaryInfo->description->runs[0]->text); } private function ytBridgeAddItem($vid, $title, $author, $desc, $time){ @@ -241,16 +247,28 @@ public function collectData(){ returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); } } elseif($this->getInput('p')) { /* playlist mode */ + // TODO: this mode makes a lot of excess video query requests. + // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" + // This cache will be used to find out, which videos to fetch + // to make feed of 15 items or more, if there a lot of videos published on that date. $this->request = $this->getInput('p'); $url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request); $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); - $item_count = $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a', false); + $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; + preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + // TODO: this method returns only first 100 video items + // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element + $jsonData = json_decode($matches[1]); + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; + $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; + $item_count = count($jsonData); if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { - $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a'); + $this->parseJsonPlaylist($jsonData); } $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { @@ -284,6 +302,15 @@ private function skipFeeds() { return ($this->getInput('duration_min') || $this->getInput('duration_max')); } + public function getURI() + { + if (!is_null($this->getInput('p'))) { + return static::URI . 'playlist?list=' . $this->getInput('p'); + } + + return parent::getURI(); + } + public function getName(){ // Name depends on queriedContext: switch($this->queriedContext) { @@ -291,9 +318,40 @@ public function getName(){ case 'By channel id': case 'By playlist Id': case 'Search result': - return $this->feedName . ' - YouTube'; // We already know it's a bridge, right? + return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right? default: return parent::getName(); } } + + private function parseJsonPlaylist($jsonData) { + $duration_min = $this->getInput('duration_min') ?: -1; + $duration_min = $duration_min * 60; + + $duration_max = $this->getInput('duration_max') ?: INF; + $duration_max = $duration_max * 60; + + if($duration_max < $duration_min) { + returnClientError('Max duration must be greater than min duration!'); + } + + foreach($jsonData as $item) { + if (!isset($item->playlistVideoRenderer)) { + continue; + } + $vid = $item->playlistVideoRenderer->videoId; + $title = $item->playlistVideoRenderer->title->runs[0]->text; + + $author = ''; + $desc = ''; + $time = 0; + $duration = intval($item->playlistVideoRenderer->lengthSeconds); + if($duration < $duration_min || $duration > $duration_max) { + continue; + } + + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + } + } }