diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 798c0bb32e9..f70c84e6308 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -29,6 +29,13 @@ class YoutubeBridge extends BridgeAbstract { 'required' => true ) ), + 'By custom name' => array( + 'custom' => array( + 'name' => 'custom name', + 'exampleValue' => 'nasa', + 'required' => true + ) + ), 'By playlist Id' => array( 'p' => array( 'name' => 'playlist id', @@ -43,6 +50,7 @@ class YoutubeBridge extends BridgeAbstract { 'pa' => array( 'name' => 'page', 'type' => 'number', + 'title' => 'This option is not work anymore, as YouTube will always return the same page', 'exampleValue' => 1 ) ), @@ -63,6 +71,10 @@ class YoutubeBridge extends BridgeAbstract { ); private $feedName = ''; + private $feeduri = ''; + private $channel_name = ''; + // This took from repo BetterVideoRss of VerifiedJoseph. + const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -81,9 +93,7 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ if(!is_null($elDatePublished)) $time = strtotime($elDatePublished->getAttribute('content')); - $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; - preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); - $jsonData = json_decode($matches[1]); + $jsonData = $this->getJSONData($html); $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; $videoSecondaryInfo = null; @@ -94,19 +104,50 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ } } if (!$videoSecondaryInfo) { - returnServerError('Could not find videoSecondaryInfoRenderer'); + returnServerError('Could not find videoSecondaryInfoRenderer. Error at: ' . $vid); + } + + if(isset($videoSecondaryInfo->description)) { + foreach($videoSecondaryInfo->description->runs as $description) { + if(isset($description->navigationEndpoint)) { + $metadata = $description->navigationEndpoint->commandMetadata->webCommandMetadata; + $web_type = $metadata->webPageType; + $url = $metadata->url; + $text = ''; + switch ($web_type) { + case 'WEB_PAGE_TYPE_UNKNOWN': + $url_components = parse_url($url); + if(isset($url_components['query']) && strpos($url_components['query'], '&q=') !== false) { + parse_str($url_components['query'], $params); + $url = urldecode($params['q']); + } + $text = $url; + break; + case 'WEB_PAGE_TYPE_WATCH': + case 'WEB_PAGE_TYPE_BROWSE': + $url = 'https://www.youtube.com' . $url; + $text = $description->text; + break; + } + $desc .= "$text"; + } else { + $desc .= nl2br($description->text); + } + } } - $desc = nl2br($videoSecondaryInfo->description->runs[0]->text); } - private function ytBridgeAddItem($vid, $title, $author, $desc, $time){ + private function ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail = ''){ $item = array(); $item['id'] = $vid; $item['title'] = $title; $item['author'] = $author; $item['timestamp'] = $time; $item['uri'] = self::URI . 'watch?v=' . $vid; - $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/0.jpg'; + if(!$thumbnail) { + $thumbnail = '0'; // Fallback to default thumbnail if there aren't any provided. + } + $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail . '.jpg'; $item['content'] = '
' . $desc; $this->items[] = $item; } @@ -120,7 +161,7 @@ private function ytBridgeParseXmlFeed($xml) { // Make sure the description is easy on the eye :) $desc = htmlspecialchars($desc); $desc = nl2br($desc); - $desc = preg_replace('/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims', + $desc = preg_replace(self::URI_REGEX, '$1 ', $desc); @@ -132,56 +173,6 @@ private function ytBridgeParseXmlFeed($xml) { $this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName() } - private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector, $add_parsed_items = true) { - $count = 0; - - $duration_min = $this->getInput('duration_min') ?: -1; - $duration_min = $duration_min * 60; - - $duration_max = $this->getInput('duration_max') ?: INF; - $duration_max = $duration_max * 60; - - if($duration_max < $duration_min) { - returnClientError('Max duration must be greater than min duration!'); - } - - foreach($html->find($element_selector) as $element) { - $author = ''; - $desc = ''; - $time = 0; - $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href); - $vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid)); - $title = trim($this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext)); - - if (strpos($vid, 'googleads') !== false - || $title == '[Private video]' - || $title == '[Deleted video]' - ) { - continue; - } - - // The duration comes in one of the formats: - // hh:mm:ss / mm:ss / m:ss - // 01:03:30 / 15:06 / 1:24 - $durationText = trim($element->find('div.timestamp span', 0)->plaintext); - $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); - - sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); - $duration = $hours * 3600 + $minutes * 60 + $seconds; - - if($duration < $duration_min || $duration > $duration_max) { - continue; - } - - if ($add_parsed_items) { - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } - $count++; - } - return $count; - } - private function ytBridgeFixTitle($title) { // convert both Ӓ and " to UTF-8 return html_entity_decode($title, ENT_QUOTES, 'UTF-8'); @@ -221,6 +212,86 @@ private function ytGetSimpleHTMLDOM($url, $cached = false){ $defaultSpanText); } + private function getJSONData($html) { + $scriptRegex = '/var ytInitialData = (.*?);<\/script>/'; + preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + return json_decode($matches[1]); + } + + private function parseJSONListing($jsonData) { + $duration_min = $this->getInput('duration_min') ?: -1; + $duration_min = $duration_min * 60; + + $duration_max = $this->getInput('duration_max') ?: INF; + $duration_max = $duration_max * 60; + + if($duration_max < $duration_min) { + returnClientError('Max duration must be greater than min duration!'); + } + + // $vid_list = ''; + + foreach($jsonData as $item) { + $wrapper = null; + if(isset($item->gridVideoRenderer)) { + $wrapper = $item->gridVideoRenderer; + } elseif(isset($item->videoRenderer)) { + $wrapper = $item->videoRenderer; + } elseif(isset($item->playlistVideoRenderer)) { + $wrapper = $item->playlistVideoRenderer; + } else + continue; + + $vid = $wrapper->videoId; + $title = $wrapper->title->runs[0]->text; + if(isset($wrapper->ownerText)) { + $this->channel_name = $wrapper->ownerText->runs[0]->text; + } elseif(isset($wrapper->shortBylineText)) { + $this->channel_name = $wrapper->shortBylineText->runs[0]->text; + } + + $author = ''; + $desc = ''; + $time = ''; + + // The duration comes in one of the formats: + // hh:mm:ss / mm:ss / m:ss + // 01:03:30 / 15:06 / 1:24 + $durationText = 0; + if(isset($wrapper->lengthText)) { + $durationText = $wrapper->lengthText; + } else { + foreach($wrapper->thumbnailOverlays as $overlay) { + if(isset($overlay->thumbnailOverlayTimeStatusRenderer)) { + $durationText = $overlay->thumbnailOverlayTimeStatusRenderer->text; + break; + } + } + } + + if(isset($durationText->simpleText)) { + $durationText = trim($durationText->simpleText); + } else { + $durationText = 0; + } + + if(preg_match('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', $durationText)) { + $durationText = preg_replace('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); + } else { + $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); + } + sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); + $duration = $hours * 3600 + $minutes * 60 + $seconds; + if($duration < $duration_min || $duration > $duration_max) { + continue; + } + + // $vid_list .= $vid . ','; + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + } + } + public function collectData(){ $xml = ''; @@ -236,16 +307,38 @@ public function collectData(){ $this->request = $this->getInput('c'); $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); $url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos'; + } elseif($this->getInput('custom')) { + $this->request = $this->getInput('custom'); + $url_listing = self::URI . urlencode($this->request) . '/videos'; } - if(!empty($url_feed) && !empty($url_listing)) { - if(!$this->skipFeeds() && $xml = $this->ytGetSimpleHTMLDOM($url_feed)) { - $this->ytBridgeParseXmlFeed($xml); - } elseif($html = $this->ytGetSimpleHTMLDOM($url_listing)) { - $this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3'); + if(!empty($url_feed) || !empty($url_listing)) { + $this->feeduri = $url_listing; + if(!empty($this->getInput('custom'))) { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; + } + if(!$this->skipFeeds()) { + $html = $this->ytGetSimpleHTMLDOM($url_feed); + $this->ytBridgeParseXmlFeed($html); } else { - returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); + if(empty($this->getInput('custom'))) { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + } + $channel_id = ''; + if(isset($jsonData->contents)) { + $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; + $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + $this->parseJSONListing($jsonData); + } else { + returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); + } } + $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } elseif($this->getInput('p')) { /* playlist mode */ // TODO: this mode makes a lot of excess video query requests. // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" @@ -256,42 +349,49 @@ public function collectData(){ $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); - $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; - preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + $jsonData = $this->getJSONData($html); // TODO: this method returns only first 100 video items // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $jsonData = json_decode($matches[1]); $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; $item_count = count($jsonData); + if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { - $this->parseJsonPlaylist($jsonData); + $this->parseJSONListing($jsonData); } $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { + if(!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) { + $item1['timestamp'] = strtotime($item1['timestamp']); + $item2['timestamp'] = strtotime($item2['timestamp']); + } return $item2['timestamp'] - $item1['timestamp']; }); } elseif($this->getInput('s')) { /* search mode */ $this->request = $this->getInput('s'); - $page = 1; - if($this->getInput('pa')) - $page = (int)preg_replace('/[^0-9]/', '', $this->getInput('pa')); - $url_listing = self::URI . 'results?search_query=' . urlencode($this->request) - . '&page=' - . $page - . '&filters=video&search_sort=video_date_uploaded'; + . '&sp=CAI%253D'; $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); - $this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3 > a'); - $this->feedName = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() + $jsonData = $this->getJSONData($html); + $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; + $jsonData = $jsonData->sectionListRenderer->contents; + foreach($jsonData as $data) { // Search result includes some ads, have to filter them + if(isset($data->itemSectionRenderer->contents[0]->videoRenderer)) { + $jsonData = $data->itemSectionRenderer->contents; + break; + } + } + $this->parseJSONListing($jsonData); + $this->feeduri = $url_listing; + $this->feedName = 'Search: ' . $this->request; // feedName will be used by getName() } else { /* no valid mode */ returnClientError("You must either specify either:\n - YouTube username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)"); @@ -306,6 +406,8 @@ public function getURI() { if (!is_null($this->getInput('p'))) { return static::URI . 'playlist?list=' . $this->getInput('p'); + } elseif($this->feeduri) { + return $this->feeduri; } return parent::getURI(); @@ -316,6 +418,7 @@ public function getName(){ switch($this->queriedContext) { case 'By username': case 'By channel id': + case 'By custom name': case 'By playlist Id': case 'Search result': return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right? @@ -323,35 +426,4 @@ public function getName(){ return parent::getName(); } } - - private function parseJsonPlaylist($jsonData) { - $duration_min = $this->getInput('duration_min') ?: -1; - $duration_min = $duration_min * 60; - - $duration_max = $this->getInput('duration_max') ?: INF; - $duration_max = $duration_max * 60; - - if($duration_max < $duration_min) { - returnClientError('Max duration must be greater than min duration!'); - } - - foreach($jsonData as $item) { - if (!isset($item->playlistVideoRenderer)) { - continue; - } - $vid = $item->playlistVideoRenderer->videoId; - $title = $item->playlistVideoRenderer->title->runs[0]->text; - - $author = ''; - $desc = ''; - $time = 0; - $duration = intval($item->playlistVideoRenderer->lengthSeconds); - if($duration < $duration_min || $duration > $duration_max) { - continue; - } - - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } - } }