From 076e6adfc42b14524055a58ee340d3191575f673 Mon Sep 17 00:00:00 2001 From: Jim Date: Sat, 10 Jul 2021 16:09:43 +0700 Subject: [PATCH 01/24] [YoutubeBridge] Fix bridge - Add support for custom channel name. - Add new method to fetch JSON data. - Search, listing now parsing data through JSON, HTML is not work anymore. - Remove page number input on the Search endpoint. --- bridges/YoutubeBridge.php | 237 ++++++++++++++++++++++++++------------ 1 file changed, 166 insertions(+), 71 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 798c0bb32e9..d696e8c9fb9 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -39,11 +39,6 @@ class YoutubeBridge extends BridgeAbstract { 's' => array( 'name' => 'search keyword', 'exampleValue' => 'test' - ), - 'pa' => array( - 'name' => 'page', - 'type' => 'number', - 'exampleValue' => 1 ) ), 'global' => array( @@ -63,6 +58,7 @@ class YoutubeBridge extends BridgeAbstract { ); private $feedName = ''; + private $feeduri = ''; private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -81,9 +77,8 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ if(!is_null($elDatePublished)) $time = strtotime($elDatePublished->getAttribute('content')); - $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; - preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); - $jsonData = json_decode($matches[1]); + + $jsonData = $this->getJSONData($html); $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; $videoSecondaryInfo = null; @@ -94,9 +89,14 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ } } if (!$videoSecondaryInfo) { - returnServerError('Could not find videoSecondaryInfoRenderer'); + returnServerError('Could not find videoSecondaryInfoRenderer. Error at: ' . $vid); + } + + if(isset($videoSecondaryInfo->description)) { + foreach($videoSecondaryInfo->description->runs as $description) { + $desc .= nl2br($description->text); + } } - $desc = nl2br($videoSecondaryInfo->description->runs[0]->text); } private function ytBridgeAddItem($vid, $title, $author, $desc, $time){ @@ -132,55 +132,55 @@ private function ytBridgeParseXmlFeed($xml) { $this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName() } - private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector, $add_parsed_items = true) { - $count = 0; - - $duration_min = $this->getInput('duration_min') ?: -1; - $duration_min = $duration_min * 60; - - $duration_max = $this->getInput('duration_max') ?: INF; - $duration_max = $duration_max * 60; - - if($duration_max < $duration_min) { - returnClientError('Max duration must be greater than min duration!'); - } - - foreach($html->find($element_selector) as $element) { - $author = ''; - $desc = ''; - $time = 0; - $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href); - $vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid)); - $title = trim($this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext)); - - if (strpos($vid, 'googleads') !== false - || $title == '[Private video]' - || $title == '[Deleted video]' - ) { - continue; - } - - // The duration comes in one of the formats: - // hh:mm:ss / mm:ss / m:ss - // 01:03:30 / 15:06 / 1:24 - $durationText = trim($element->find('div.timestamp span', 0)->plaintext); - $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); - - sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); - $duration = $hours * 3600 + $minutes * 60 + $seconds; - - if($duration < $duration_min || $duration > $duration_max) { - continue; - } - - if ($add_parsed_items) { - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } - $count++; - } - return $count; - } + // private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector, $add_parsed_items = true) { + // $count = 0; + + // $duration_min = $this->getInput('duration_min') ?: -1; + // $duration_min = $duration_min * 60; + + // $duration_max = $this->getInput('duration_max') ?: INF; + // $duration_max = $duration_max * 60; + + // if($duration_max < $duration_min) { + // returnClientError('Max duration must be greater than min duration!'); + // } + + // foreach($html->find($element_selector) as $element) { + // $author = ''; + // $desc = ''; + // $time = 0; + // $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href); + // $vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid)); + // $title = trim($this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext)); + + // if (strpos($vid, 'googleads') !== false + // || $title == '[Private video]' + // || $title == '[Deleted video]' + // ) { + // continue; + // } + + // // The duration comes in one of the formats: + // // hh:mm:ss / mm:ss / m:ss + // // 01:03:30 / 15:06 / 1:24 + // $durationText = trim($element->find('span#text', 0)->plaintext); + // $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); + + // sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); + // $duration = $hours * 3600 + $minutes * 60 + $seconds; + + // if($duration < $duration_min || $duration > $duration_max) { + // continue; + // } + + // if ($add_parsed_items) { + // $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + // $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + // } + // $count++; + // } + // return $count; + // } private function ytBridgeFixTitle($title) { // convert both Ӓ and " to UTF-8 @@ -221,17 +221,80 @@ private function ytGetSimpleHTMLDOM($url, $cached = false){ $defaultSpanText); } + private function getJSONData($html) { + $scriptRegex = '/var ytInitialData = (.*?);<\/script>/'; + preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + return json_decode($matches[1]); + } + + private function parseJSONListing($jsonData) { + $duration_min = $this->getInput('duration_min') ?: -1; + $duration_min = $duration_min * 60; + + $duration_max = $this->getInput('duration_max') ?: INF; + $duration_max = $duration_max * 60; + + if($duration_max < $duration_min) { + returnClientError('Max duration must be greater than min duration!'); + } + + foreach($jsonData as $item) { + $wrapper = null; + if(isset($item->gridVideoRenderer)) { + $wrapper = $item->gridVideoRenderer; + } elseif(isset($item->videoRenderer)) { + $wrapper = $item->videoRenderer; + } else + continue; + + $vid = $wrapper->videoId; + $title = $wrapper->title->runs[0]->text; + + $author = ''; + $desc = ''; + $time = 0; + + // The duration comes in one of the formats: + // hh:mm:ss / mm:ss / m:ss + // 01:03:30 / 15:06 / 1:24 + $durationText = ''; + if(isset($wrapper->lengthText)) { + $durationText = $wrapper->lengthText; + } else { + foreach($wrapper->thumbnailOverlays as $overlay) { + if(isset($overlay->thumbnailOverlayTimeStatusRenderer)) { + $durationText = $wrapper->thumbnailOverlays[0]->thumbnailOverlayTimeStatusRenderer->text; + break; + } + } + } + + $durationText = trim($durationText->simpleText); + $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); + sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); + $duration = $hours * 3600 + $minutes * 60 + $seconds; + if($duration < $duration_min || $duration > $duration_max) { + continue; + } + + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + } + } + public function collectData(){ $xml = ''; $html = ''; $url_feed = ''; $url_listing = ''; + $custom_url = ''; if($this->getInput('u')) { /* User and Channel modes */ $this->request = $this->getInput('u'); $url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request); $url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos'; + $custom_url = self::URI . urlencode($this->request) . '/videos'; } elseif($this->getInput('c')) { $this->request = $this->getInput('c'); $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); @@ -239,12 +302,38 @@ public function collectData(){ } if(!empty($url_feed) && !empty($url_listing)) { - if(!$this->skipFeeds() && $xml = $this->ytGetSimpleHTMLDOM($url_feed)) { + $this->feeduri = $url_listing; + if(!$this->skipFeeds()) { + $xml = ''; + try { + $xml = $this->ytGetSimpleHTMLDOM($url_feed); + } catch(Exception $e) { + $html = $this->ytGetSimpleHTMLDOM($custom_url); + $jsonData = $this->getJSONData($html); + $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; + $xml = $this->ytGetSimpleHTMLDOM($url_feed); + $this->feeduri = $custom_url; + } $this->ytBridgeParseXmlFeed($xml); - } elseif($html = $this->ytGetSimpleHTMLDOM($url_listing)) { - $this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3'); } else { - returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); + $html = ''; + try { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + } catch(Exception $e) { + $html = $this->ytGetSimpleHTMLDOM($custom_url); + $this->feeduri = $custom_url; + } + $jsonData = $this->getJSONData($html); + if(isset($jsonData->contents)) { + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; + $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + } else { + returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); + } + + $this->parseJSONListing($jsonData); + $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } } elseif($this->getInput('p')) { /* playlist mode */ // TODO: this mode makes a lot of excess video query requests. @@ -276,21 +365,25 @@ public function collectData(){ }); } elseif($this->getInput('s')) { /* search mode */ $this->request = $this->getInput('s'); - $page = 1; - if($this->getInput('pa')) - $page = (int)preg_replace('/[^0-9]/', '', $this->getInput('pa')); - $url_listing = self::URI . 'results?search_query=' . urlencode($this->request) - . '&page=' - . $page - . '&filters=video&search_sort=video_date_uploaded'; + . '&sp=CAI%253D'; $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); - $this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3 > a'); + $jsonData = $this->getJSONData($html); + $jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents; + $jsonData = $jsonData->sectionListRenderer->contents; + foreach($jsonData as $data) { // Search result includes some ads, have to filter them + if(isset($data->itemSectionRenderer->contents[0]->videoRenderer)) { + $jsonData = $data->itemSectionRenderer->contents; + break; + } + } + $this->parseJSONListing($jsonData); + $this->feeduri = $url_listing; $this->feedName = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() } else { /* no valid mode */ returnClientError("You must either specify either:\n - YouTube @@ -306,6 +399,8 @@ public function getURI() { if (!is_null($this->getInput('p'))) { return static::URI . 'playlist?list=' . $this->getInput('p'); + } elseif(!is_null($this->feeduri)) { + return $this->feeduri; } return parent::getURI(); From 589c918d3be44092237cffcc22a872953cfcdadb Mon Sep 17 00:00:00 2001 From: Jim Date: Sat, 10 Jul 2021 16:18:35 +0700 Subject: [PATCH 02/24] [YoutubeBridge] Fix getURI() --- bridges/YoutubeBridge.php | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index d696e8c9fb9..4d7df9d0035 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -58,7 +58,7 @@ class YoutubeBridge extends BridgeAbstract { ); private $feedName = ''; - private $feeduri = ''; + private $feeduri = self::URI; private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -359,6 +359,7 @@ public function collectData(){ } else { $this->parseJsonPlaylist($jsonData); } + $this->feeduri = $url_listing; $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { return $item2['timestamp'] - $item1['timestamp']; @@ -397,13 +398,7 @@ private function skipFeeds() { public function getURI() { - if (!is_null($this->getInput('p'))) { - return static::URI . 'playlist?list=' . $this->getInput('p'); - } elseif(!is_null($this->feeduri)) { - return $this->feeduri; - } - - return parent::getURI(); + return $this->feed_uri; } public function getName(){ From a7876338cfc6fda5486d8af609dbf207bf357e6d Mon Sep 17 00:00:00 2001 From: Jim Date: Sat, 10 Jul 2021 16:21:33 +0700 Subject: [PATCH 03/24] Revert "[YoutubeBridge] Fix getURI()" This reverts commit 589c918d3be44092237cffcc22a872953cfcdadb. --- bridges/YoutubeBridge.php | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 4d7df9d0035..d696e8c9fb9 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -58,7 +58,7 @@ class YoutubeBridge extends BridgeAbstract { ); private $feedName = ''; - private $feeduri = self::URI; + private $feeduri = ''; private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -359,7 +359,6 @@ public function collectData(){ } else { $this->parseJsonPlaylist($jsonData); } - $this->feeduri = $url_listing; $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { return $item2['timestamp'] - $item1['timestamp']; @@ -398,7 +397,13 @@ private function skipFeeds() { public function getURI() { - return $this->feed_uri; + if (!is_null($this->getInput('p'))) { + return static::URI . 'playlist?list=' . $this->getInput('p'); + } elseif(!is_null($this->feeduri)) { + return $this->feeduri; + } + + return parent::getURI(); } public function getName(){ From 78026b9286a0f07306426e3b894e7ee77b59eed7 Mon Sep 17 00:00:00 2001 From: Jim Date: Sat, 10 Jul 2021 16:23:17 +0700 Subject: [PATCH 04/24] [YoutubeBridge] Fix getURI() --- bridges/YoutubeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index d696e8c9fb9..ba555a6e7ef 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -58,7 +58,7 @@ class YoutubeBridge extends BridgeAbstract { ); private $feedName = ''; - private $feeduri = ''; + private $feeduri = null; private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); From 1adaf6edc2b34e8b1485c5a5aeb74a8238222fa4 Mon Sep 17 00:00:00 2001 From: Jim Date: Sat, 10 Jul 2021 16:25:10 +0700 Subject: [PATCH 05/24] [YoutubeBridge] Fix lint --- bridges/YoutubeBridge.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index ba555a6e7ef..9199f000017 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -77,7 +77,6 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ if(!is_null($elDatePublished)) $time = strtotime($elDatePublished->getAttribute('content')); - $jsonData = $this->getJSONData($html); $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; @@ -244,7 +243,7 @@ private function parseJSONListing($jsonData) { $wrapper = $item->gridVideoRenderer; } elseif(isset($item->videoRenderer)) { $wrapper = $item->videoRenderer; - } else + } else continue; $vid = $wrapper->videoId; @@ -279,7 +278,7 @@ private function parseJSONListing($jsonData) { $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } + } } public function collectData(){ From fbe2ff5d83ae89672363dc3a2105b0b07ad09082 Mon Sep 17 00:00:00 2001 From: Jim Date: Sat, 10 Jul 2021 17:00:03 +0700 Subject: [PATCH 06/24] [YoutubeBridge] Fix getURI(), fix regex --- bridges/YoutubeBridge.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 9199f000017..b5f7820be65 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -58,7 +58,7 @@ class YoutubeBridge extends BridgeAbstract { ); private $feedName = ''; - private $feeduri = null; + private $feeduri = ''; private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -262,14 +262,18 @@ private function parseJSONListing($jsonData) { } else { foreach($wrapper->thumbnailOverlays as $overlay) { if(isset($overlay->thumbnailOverlayTimeStatusRenderer)) { - $durationText = $wrapper->thumbnailOverlays[0]->thumbnailOverlayTimeStatusRenderer->text; + $durationText = $overlay->thumbnailOverlayTimeStatusRenderer->text; break; } } } $durationText = trim($durationText->simpleText); - $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); + if(preg_match('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', $durationText)) { + $durationText = preg_replace('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); + } else { + $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); + } sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); $duration = $hours * 3600 + $minutes * 60 + $seconds; if($duration < $duration_min || $duration > $duration_max) { @@ -398,7 +402,7 @@ public function getURI() { if (!is_null($this->getInput('p'))) { return static::URI . 'playlist?list=' . $this->getInput('p'); - } elseif(!is_null($this->feeduri)) { + } elseif($this->feeduri) { return $this->feeduri; } From e8bf84907f39774e329d791af5fb133aa73f6c09 Mon Sep 17 00:00:00 2001 From: Jim Date: Sun, 11 Jul 2021 11:24:19 +0700 Subject: [PATCH 07/24] [YoutubeBridge] Fix edge case, add anchor to description --- bridges/YoutubeBridge.php | 67 ++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index b5f7820be65..873c0d837a6 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -93,7 +93,17 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ if(isset($videoSecondaryInfo->description)) { foreach($videoSecondaryInfo->description->runs as $description) { - $desc .= nl2br($description->text); + if(isset($description->navigationEndpoint->urlEndpoint)) { + $url = $description->navigationEndpoint->urlEndpoint->url; + $url_components = parse_url($url); + if(isset($url_components['query'])) { + parse_str($url_components['query'], $params); + $url = urldecode($params['q']); + } + $desc .= "$description->text"; + } else { + $desc .= nl2br($description->text); + } } } } @@ -306,38 +316,45 @@ public function collectData(){ if(!empty($url_feed) && !empty($url_listing)) { $this->feeduri = $url_listing; - if(!$this->skipFeeds()) { - $xml = ''; - try { + $xml = ''; + $html = ''; + try { + if(!$this->skipFeeds()) { $xml = $this->ytGetSimpleHTMLDOM($url_feed); - } catch(Exception $e) { + } else { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + // Throw an error right here if it doesn't have anything. + // Sometimes, Youtube user page have a weird case + // For example: NASA. When user write 'nasa' into the username and add limit for duration + // Bridge immediately find its user page (/user/nasa) and then nothing happen. + // Digging into the data, it appear it's another account, not from NASA itself. + // If you use feed, it works normally cause it already raise 404 error + if(!isset($jsonData->content)) { + returnServerError(''); // Throw an empty one to trigger try catch + } + } + } catch(Exception $e) { $html = $this->ytGetSimpleHTMLDOM($custom_url); $jsonData = $this->getJSONData($html); $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; $xml = $this->ytGetSimpleHTMLDOM($url_feed); $this->feeduri = $custom_url; - } - $this->ytBridgeParseXmlFeed($xml); + } + if(!$this->skipFeeds()) { + return $this->ytBridgeParseXmlFeed($xml); + } + + if(isset($jsonData->contents)) { + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; + $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; } else { - $html = ''; - try { - $html = $this->ytGetSimpleHTMLDOM($url_listing); - } catch(Exception $e) { - $html = $this->ytGetSimpleHTMLDOM($custom_url); - $this->feeduri = $custom_url; - } - $jsonData = $this->getJSONData($html); - if(isset($jsonData->contents)) { - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; - $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; - $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; - } else { - returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); - } - - $this->parseJSONListing($jsonData); - $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); } + + $this->parseJSONListing($jsonData); + $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } elseif($this->getInput('p')) { /* playlist mode */ // TODO: this mode makes a lot of excess video query requests. // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" From 6199164ff8e23be54cc7b1fa395d073c759ce6c8 Mon Sep 17 00:00:00 2001 From: Jim Date: Sun, 11 Jul 2021 11:31:57 +0700 Subject: [PATCH 08/24] [YoutubeBridge] Fix lint --- bridges/YoutubeBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 873c0d837a6..4aa97b47558 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -330,7 +330,7 @@ public function collectData(){ // Bridge immediately find its user page (/user/nasa) and then nothing happen. // Digging into the data, it appear it's another account, not from NASA itself. // If you use feed, it works normally cause it already raise 404 error - if(!isset($jsonData->content)) { + if(!isset($jsonData->content)) { returnServerError(''); // Throw an empty one to trigger try catch } } @@ -344,7 +344,7 @@ public function collectData(){ if(!$this->skipFeeds()) { return $this->ytBridgeParseXmlFeed($xml); } - + if(isset($jsonData->contents)) { $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; From d4dfe56c777ee223c2a1ccf91ffa275460a00102 Mon Sep 17 00:00:00 2001 From: Jim Date: Sun, 11 Jul 2021 11:46:44 +0700 Subject: [PATCH 09/24] [YoutubeBridge] Add target=_blank for anchor tag --- bridges/YoutubeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 4aa97b47558..dd4d2e9e7a2 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -100,7 +100,7 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ parse_str($url_components['query'], $params); $url = urldecode($params['q']); } - $desc .= "$description->text"; + $desc .= "$description->text"; } else { $desc .= nl2br($description->text); } From 7413f4168d2acf1f08f6c3107891da0233076d8d Mon Sep 17 00:00:00 2001 From: Jim Date: Sun, 11 Jul 2021 11:59:13 +0700 Subject: [PATCH 10/24] [YoutubeBridge] Fix YouTube subscribe URL being parsed, not redirect URL --- bridges/YoutubeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index dd4d2e9e7a2..cc62e096438 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -96,7 +96,7 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ if(isset($description->navigationEndpoint->urlEndpoint)) { $url = $description->navigationEndpoint->urlEndpoint->url; $url_components = parse_url($url); - if(isset($url_components['query'])) { + if(isset($url_components['query']) && strpos($url_components['query'], '&q=') !== false) { parse_str($url_components['query'], $params); $url = urldecode($params['q']); } From 0cb169c861067d249ba74cf6f978fb2976e3d972 Mon Sep 17 00:00:00 2001 From: Jim Date: Sun, 11 Jul 2021 19:13:32 +0700 Subject: [PATCH 11/24] [YoutubeBridge] Add better timestamp, fix typo --- bridges/YoutubeBridge.php | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index cc62e096438..bec33d18cd8 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -59,6 +59,7 @@ class YoutubeBridge extends BridgeAbstract { private $feedName = ''; private $feeduri = ''; + private $channel_name = ''; private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -73,9 +74,11 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $author = $elAuthor->getAttribute('content'); } - $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); - if(!is_null($elDatePublished)) - $time = strtotime($elDatePublished->getAttribute('content')); + if(!$time) { + $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); + if(!is_null($elDatePublished)) + $time = strtotime($elDatePublished->getAttribute('content')); + } $jsonData = $this->getJSONData($html); $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; @@ -258,10 +261,20 @@ private function parseJSONListing($jsonData) { $vid = $wrapper->videoId; $title = $wrapper->title->runs[0]->text; + $view_count = $wrapper->viewCountText->simpleText; + $accessibilityData = $wrapper->title->accessibility->accessibilityData->label; + if(isset($wrapper->ownerText)) { + $this->channel_name = $wrapper->ownerText->runs[0]->text; + } + // Attempted to get correct timestamp from the string + $timestamp = explode($title, $accessibilityData); + $timestamp = explode($view_count, $timestamp[1]); + $timestamp = explode($this->channel_name, $timestamp[0]); + $timestamp = strtotime(trim($timestamp[1])); $author = ''; $desc = ''; - $time = 0; + $time = $timestamp; // The duration comes in one of the formats: // hh:mm:ss / mm:ss / m:ss @@ -330,22 +343,27 @@ public function collectData(){ // Bridge immediately find its user page (/user/nasa) and then nothing happen. // Digging into the data, it appear it's another account, not from NASA itself. // If you use feed, it works normally cause it already raise 404 error - if(!isset($jsonData->content)) { + if(!isset($jsonData->contents)) { returnServerError(''); // Throw an empty one to trigger try catch } } } catch(Exception $e) { + if($custom_url) { $html = $this->ytGetSimpleHTMLDOM($custom_url); $jsonData = $this->getJSONData($html); $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; $xml = $this->ytGetSimpleHTMLDOM($url_feed); $this->feeduri = $custom_url; + } else { + returnServerError($e->getMessage()); + } } if(!$this->skipFeeds()) { return $this->ytBridgeParseXmlFeed($xml); } if(isset($jsonData->contents)) { + $this->channel_name = $jsonData->metadata->channelMetadataRenderer->title; $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; From 603ecbbf05bfd4ba33b33963798782a1536bad81 Mon Sep 17 00:00:00 2001 From: Jim Date: Mon, 12 Jul 2021 09:46:44 +0700 Subject: [PATCH 12/24] [YoutubeBridge] New change - Merge two parseJSON into one. - Remove ytBridgeParseHtmlParsing() due to YouTube not populating HTML in the first run. - Add ability to get better timestamp. - Handle the case of 'Livestream' video. --- bridges/YoutubeBridge.php | 113 ++++++++------------------------------ 1 file changed, 22 insertions(+), 91 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index bec33d18cd8..4cea0f28b78 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -103,7 +103,7 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ parse_str($url_components['query'], $params); $url = urldecode($params['q']); } - $desc .= "$description->text"; + $desc .= "$url"; } else { $desc .= nl2br($description->text); } @@ -144,56 +144,6 @@ private function ytBridgeParseXmlFeed($xml) { $this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName() } - // private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector, $add_parsed_items = true) { - // $count = 0; - - // $duration_min = $this->getInput('duration_min') ?: -1; - // $duration_min = $duration_min * 60; - - // $duration_max = $this->getInput('duration_max') ?: INF; - // $duration_max = $duration_max * 60; - - // if($duration_max < $duration_min) { - // returnClientError('Max duration must be greater than min duration!'); - // } - - // foreach($html->find($element_selector) as $element) { - // $author = ''; - // $desc = ''; - // $time = 0; - // $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href); - // $vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid)); - // $title = trim($this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext)); - - // if (strpos($vid, 'googleads') !== false - // || $title == '[Private video]' - // || $title == '[Deleted video]' - // ) { - // continue; - // } - - // // The duration comes in one of the formats: - // // hh:mm:ss / mm:ss / m:ss - // // 01:03:30 / 15:06 / 1:24 - // $durationText = trim($element->find('span#text', 0)->plaintext); - // $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); - - // sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); - // $duration = $hours * 3600 + $minutes * 60 + $seconds; - - // if($duration < $duration_min || $duration > $duration_max) { - // continue; - // } - - // if ($add_parsed_items) { - // $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - // $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - // } - // $count++; - // } - // return $count; - // } - private function ytBridgeFixTitle($title) { // convert both Ӓ and " to UTF-8 return html_entity_decode($title, ENT_QUOTES, 'UTF-8'); @@ -256,20 +206,31 @@ private function parseJSONListing($jsonData) { $wrapper = $item->gridVideoRenderer; } elseif(isset($item->videoRenderer)) { $wrapper = $item->videoRenderer; + } elseif(isset($item->playlistVideoRenderer)) { + $wrapper = $item->playlistVideoRenderer; } else continue; $vid = $wrapper->videoId; $title = $wrapper->title->runs[0]->text; - $view_count = $wrapper->viewCountText->simpleText; $accessibilityData = $wrapper->title->accessibility->accessibilityData->label; if(isset($wrapper->ownerText)) { $this->channel_name = $wrapper->ownerText->runs[0]->text; + } elseif(isset($wrapper->shortBylineText)) { + $this->channel_name = $wrapper->shortBylineText->runs[0]->text; } // Attempted to get correct timestamp from the string $timestamp = explode($title, $accessibilityData); - $timestamp = explode($view_count, $timestamp[1]); - $timestamp = explode($this->channel_name, $timestamp[0]); + if(isset($wrapper->viewCountText->simpleText)) { + $view_count = $wrapper->viewCountText->simpleText; + $timestamp = explode($view_count, $timestamp[1]); + $timestamp = explode($this->channel_name, $timestamp[0]); + } else { + $timestamp = explode($this->channel_name, $timestamp[1]); + } + if(strpos($timestamp[1], 'Streamed') !== false) { + $timestamp = explode('Streamed', $timestamp[1]); + } $timestamp = strtotime(trim($timestamp[1])); $author = ''; @@ -279,7 +240,7 @@ private function parseJSONListing($jsonData) { // The duration comes in one of the formats: // hh:mm:ss / mm:ss / m:ss // 01:03:30 / 15:06 / 1:24 - $durationText = ''; + $durationText = 0; if(isset($wrapper->lengthText)) { $durationText = $wrapper->lengthText; } else { @@ -291,7 +252,9 @@ private function parseJSONListing($jsonData) { } } - $durationText = trim($durationText->simpleText); + if(isset($durationText->simpleText)) { + $durationText = trim($durationText->simpleText); + } if(preg_match('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', $durationText)) { $durationText = preg_replace('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); } else { @@ -383,11 +346,9 @@ public function collectData(){ $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); - $scriptRegex = '/var ytInitialData = (.*);<\/script>/'; - preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); + $jsonData = $this->getJSONData($html); // TODO: this method returns only first 100 video items // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $jsonData = json_decode($matches[1]); $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; @@ -395,7 +356,7 @@ public function collectData(){ if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { - $this->parseJsonPlaylist($jsonData); + $this->parseJSONListing($jsonData); } $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { @@ -456,35 +417,5 @@ public function getName(){ return parent::getName(); } } - - private function parseJsonPlaylist($jsonData) { - $duration_min = $this->getInput('duration_min') ?: -1; - $duration_min = $duration_min * 60; - - $duration_max = $this->getInput('duration_max') ?: INF; - $duration_max = $duration_max * 60; - - if($duration_max < $duration_min) { - returnClientError('Max duration must be greater than min duration!'); - } - - foreach($jsonData as $item) { - if (!isset($item->playlistVideoRenderer)) { - continue; - } - $vid = $item->playlistVideoRenderer->videoId; - $title = $item->playlistVideoRenderer->title->runs[0]->text; - - $author = ''; - $desc = ''; - $time = 0; - $duration = intval($item->playlistVideoRenderer->lengthSeconds); - if($duration < $duration_min || $duration > $duration_max) { - continue; - } - - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } - } } + From 0d13b1f5c59bf6c6dec5fdb772c4306c1eda5e03 Mon Sep 17 00:00:00 2001 From: Jim Date: Mon, 12 Jul 2021 10:19:25 +0700 Subject: [PATCH 13/24] [YoutubeBridge] Add support for more URL type on Youtube --- bridges/YoutubeBridge.php | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 4cea0f28b78..e4473c4164a 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -96,14 +96,27 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ if(isset($videoSecondaryInfo->description)) { foreach($videoSecondaryInfo->description->runs as $description) { - if(isset($description->navigationEndpoint->urlEndpoint)) { - $url = $description->navigationEndpoint->urlEndpoint->url; - $url_components = parse_url($url); - if(isset($url_components['query']) && strpos($url_components['query'], '&q=') !== false) { - parse_str($url_components['query'], $params); - $url = urldecode($params['q']); + if(isset($description->navigationEndpoint)) { + $metadata = $description->navigationEndpoint->commandMetadata->webCommandMetadata; + $web_type = $metadata->webPageType; + $url = $metadata->url; + $text = ''; + switch ($web_type) { + case 'WEB_PAGE_TYPE_UNKNOWN': + $url_components = parse_url($url); + if(isset($url_components['query']) && strpos($url_components['query'], '&q=') !== false) { + parse_str($url_components['query'], $params); + $url = urldecode($params['q']); + } + $text = $url; + break; + case 'WEB_PAGE_TYPE_WATCH': + case 'WEB_PAGE_TYPE_BROWSE': + $url = 'https://www.youtube.com' . $url; + $text = $description->text; + break; } - $desc .= "$url"; + $desc .= "$text"; } else { $desc .= nl2br($description->text); } @@ -200,7 +213,12 @@ private function parseJSONListing($jsonData) { returnClientError('Max duration must be greater than min duration!'); } + $count = 0; foreach($jsonData as $item) { + $count++; + if($count = 20) { + break; + } $wrapper = null; if(isset($item->gridVideoRenderer)) { $wrapper = $item->gridVideoRenderer; @@ -418,4 +436,3 @@ public function getName(){ } } } - From 8c30d34a4573b72a3841238701d5b8fc0598ab75 Mon Sep 17 00:00:00 2001 From: Jim Date: Tue, 13 Jul 2021 09:32:14 +0700 Subject: [PATCH 14/24] [YoutubeBridge] New fix - Fix condition error that make some feed stop working. - Fix feed name in the search feed. - Add back the page number parameter on the search feed. --- bridges/YoutubeBridge.php | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index e4473c4164a..9f7eb46212d 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -39,6 +39,12 @@ class YoutubeBridge extends BridgeAbstract { 's' => array( 'name' => 'search keyword', 'exampleValue' => 'test' + ), + 'pa' => array( + 'name' => 'page', + 'type' => 'number', + 'title' => 'This option is not work anymore, as YouTube will always return the same page', + 'exampleValue' => 1 ) ), 'global' => array( @@ -216,7 +222,7 @@ private function parseJSONListing($jsonData) { $count = 0; foreach($jsonData as $item) { $count++; - if($count = 20) { + if($count == 20) { break; } $wrapper = null; @@ -401,7 +407,7 @@ public function collectData(){ } $this->parseJSONListing($jsonData); $this->feeduri = $url_listing; - $this->feedName = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() + $this->feedName = 'Search: ' . $this->request; // feedName will be used by getName() } else { /* no valid mode */ returnClientError("You must either specify either:\n - YouTube username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)"); From 08064833f5635baf8c397d70d4bad9fb8336a92f Mon Sep 17 00:00:00 2001 From: Jim Date: Wed, 14 Jul 2021 11:02:19 +0700 Subject: [PATCH 15/24] [YoutubeBridge] Remove codes extracted timestamp from accessibilityData, set timestamp back to those in meta tag. --- bridges/YoutubeBridge.php | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 9f7eb46212d..db4d66a2470 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -80,11 +80,9 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $author = $elAuthor->getAttribute('content'); } - if(!$time) { - $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); + $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); if(!is_null($elDatePublished)) $time = strtotime($elDatePublished->getAttribute('content')); - } $jsonData = $this->getJSONData($html); $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; @@ -243,23 +241,10 @@ private function parseJSONListing($jsonData) { } elseif(isset($wrapper->shortBylineText)) { $this->channel_name = $wrapper->shortBylineText->runs[0]->text; } - // Attempted to get correct timestamp from the string - $timestamp = explode($title, $accessibilityData); - if(isset($wrapper->viewCountText->simpleText)) { - $view_count = $wrapper->viewCountText->simpleText; - $timestamp = explode($view_count, $timestamp[1]); - $timestamp = explode($this->channel_name, $timestamp[0]); - } else { - $timestamp = explode($this->channel_name, $timestamp[1]); - } - if(strpos($timestamp[1], 'Streamed') !== false) { - $timestamp = explode('Streamed', $timestamp[1]); - } - $timestamp = strtotime(trim($timestamp[1])); $author = ''; $desc = ''; - $time = $timestamp; + $time = ''; // The duration comes in one of the formats: // hh:mm:ss / mm:ss / m:ss From 6d23fba1b141e29a79d76b798b17d48d4c804fbd Mon Sep 17 00:00:00 2001 From: Jim Date: Wed, 14 Jul 2021 11:03:28 +0700 Subject: [PATCH 16/24] [YoutubeBridge] Fix indent --- bridges/YoutubeBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index db4d66a2470..cf07c63a243 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -81,8 +81,8 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ } $elDatePublished = $html->find('meta[itemprop=datePublished]', 0); - if(!is_null($elDatePublished)) - $time = strtotime($elDatePublished->getAttribute('content')); + if(!is_null($elDatePublished)) + $time = strtotime($elDatePublished->getAttribute('content')); $jsonData = $this->getJSONData($html); $jsonData = $jsonData->contents->twoColumnWatchNextResults->results->results->contents; From ea7ad8db2efe1dd5f2d9b6355b329c2aacb942af Mon Sep 17 00:00:00 2001 From: Jim Date: Wed, 14 Jul 2021 14:31:15 +0700 Subject: [PATCH 17/24] [YoutubeBridge] Remove counter --- bridges/YoutubeBridge.php | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index cf07c63a243..0d97bc42d25 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -217,12 +217,7 @@ private function parseJSONListing($jsonData) { returnClientError('Max duration must be greater than min duration!'); } - $count = 0; foreach($jsonData as $item) { - $count++; - if($count == 20) { - break; - } $wrapper = null; if(isset($item->gridVideoRenderer)) { $wrapper = $item->gridVideoRenderer; From 29bc0f3d6048fe6f59674d0074a8151222e605cf Mon Sep 17 00:00:00 2001 From: Jim Date: Tue, 20 Jul 2021 16:19:37 +0700 Subject: [PATCH 18/24] [YoutubeBridge] New change - Add support for Youtube Data API v3. - Fix playlists that have more than 15 items not display correctly. - Add new custom name parameter. - Fix sorting on playlist not function correctly if use API. --- bridges/YoutubeBridge.php | 223 ++++++++++++++++++++++++++++++-------- 1 file changed, 177 insertions(+), 46 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 0d97bc42d25..8abd6ed2c43 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -29,6 +29,13 @@ class YoutubeBridge extends BridgeAbstract { 'required' => true ) ), + 'By custom name' => array( + 'custom' => array( + 'name' => 'custom name', + 'exampleValue' => 'nasa', + 'required' => true + ) + ), 'By playlist Id' => array( 'p' => array( 'name' => 'playlist id', @@ -66,7 +73,16 @@ class YoutubeBridge extends BridgeAbstract { private $feedName = ''; private $feeduri = ''; private $channel_name = ''; - + // This took from repo BetterVideoRss of VerifiedJoseph. + const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; + + /* + * This allow you to use Youtube Data API. + * Enter your API key here. + * To get one, please check out https://developers.google.com/youtube/v3/getting-started + */ + const API_KEY = ''; // Remember to remove it when commit. + private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -128,14 +144,17 @@ private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ } } - private function ytBridgeAddItem($vid, $title, $author, $desc, $time){ + private function ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail = ''){ $item = array(); $item['id'] = $vid; $item['title'] = $title; $item['author'] = $author; $item['timestamp'] = $time; $item['uri'] = self::URI . 'watch?v=' . $vid; - $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/0.jpg'; + if(!$thumbnail) { + $thumbnail = '0'; // Fallback to default thumbnail if there aren't any provided. + } + $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail .'.jpg'; $item['content'] = '
' . $desc; $this->items[] = $item; } @@ -149,7 +168,7 @@ private function ytBridgeParseXmlFeed($xml) { // Make sure the description is easy on the eye :) $desc = htmlspecialchars($desc); $desc = nl2br($desc); - $desc = preg_replace('/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims', + $desc = preg_replace(self::URI_REGEX, '$1 ', $desc); @@ -200,6 +219,89 @@ private function ytGetSimpleHTMLDOM($url, $cached = false){ $defaultSpanText); } + private function getAPIData($endpoint, $query) { + $base_url = 'https://www.googleapis.com/youtube/v3/'; + $url = $base_url . $endpoint . '?' . http_build_query($query); + return json_decode(getContents($url)); + } + + private function ytBridgeAPIQueryVideosData($id) { + $duration_min = $this->getInput('duration_min') ?: -1; + $duration_min = $duration_min * 60; + + $duration_max = $this->getInput('duration_max') ?: INF; + $duration_max = $duration_max * 60; + + if($duration_max < $duration_min) { + returnClientError('Max duration must be greater than min duration!'); + } + + $vid_list = ''; + $api_key = self::API_KEY; + $base_query = array( + 'part' => 'contentDetails', + 'key' => $api_key + ); + + if (!empty($this->getInput('s'))) { + $vid_list = $id; + } else { + if(!empty($this->getInput('u')) || !empty($this->getInput('c')) || !empty($this->getInput('custom'))) { + $query = array_merge($base_query, array( + 'id' => $id + )); + $jsonData = $this->getAPIData('channels', $query); + $id = $jsonData->items[0]->contentDetails->relatedPlaylists->uploads; + } + + $query = array_merge($base_query, array( + 'maxResults' => 50, + 'playlistId' => $id + )); + $count = 0; + $jsonData = $this->getAPIData('playlistItems', $query); + foreach($jsonData->items as $item) { + $vid_list .= $item->contentDetails->videoId . ','; + } + $vid_list = substr($vid_list, 0, -1); + } + + $query = array_merge($base_query, array( + 'part' => 'snippet,liveStreamingDetails,contentDetails', + 'id' => $vid_list + )); + + $jsonData = $this->getAPIData('videos', $query); + foreach($jsonData->items as $item) { + if(empty($this->getInput('s'))) { // Videos from search result won't be filtered again + $interval = new DateInterval($item->contentDetails->duration); + $duration = $interval->h * 3600 + $interval->i * 60 + $interval->s; + if($duration < $duration_min || $duration > $duration_max) { + continue; + } + } + + $snippet = $item->snippet; + $title = $snippet->title; + $vid = $item->id; + $author = $snippet->channelTitle; + $time = $snippet->publishedAt; + $desc = nl2br($snippet->description); + $desc = preg_replace(self::URI_REGEX, + '$1 ', + $desc); + + $thumbnail = ''; + if(isset($snippet->thumbnails->maxres)) { + $thumbnail = 'maxresdefault'; + } elseif(isset($snippet->thumbnails->standard)) { + $thumbnail = 'sddefault'; + } + + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail); + } + } + private function getJSONData($html) { $scriptRegex = '/var ytInitialData = (.*?);<\/script>/'; preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); @@ -217,6 +319,10 @@ private function parseJSONListing($jsonData) { returnClientError('Max duration must be greater than min duration!'); } + $vid_list = ''; + $count = 0; + $total = count($jsonData); + foreach($jsonData as $item) { $wrapper = null; if(isset($item->gridVideoRenderer)) { @@ -230,7 +336,6 @@ private function parseJSONListing($jsonData) { $vid = $wrapper->videoId; $title = $wrapper->title->runs[0]->text; - $accessibilityData = $wrapper->title->accessibility->accessibilityData->label; if(isset($wrapper->ownerText)) { $this->channel_name = $wrapper->ownerText->runs[0]->text; } elseif(isset($wrapper->shortBylineText)) { @@ -258,7 +363,10 @@ private function parseJSONListing($jsonData) { if(isset($durationText->simpleText)) { $durationText = trim($durationText->simpleText); + } else { + $durationText = 0; } + if(preg_match('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', $durationText)) { $durationText = preg_replace('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); } else { @@ -270,9 +378,17 @@ private function parseJSONListing($jsonData) { continue; } - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + $vid_list .= $vid . ','; + if(!self::API_KEY) { + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + } } + + if(self::API_KEY) { + $vid_list = substr($vid_list, 0, -1); + $this->ytBridgeAPIQueryVideosData($vid_list); + } else return; } public function collectData(){ @@ -281,13 +397,11 @@ public function collectData(){ $html = ''; $url_feed = ''; $url_listing = ''; - $custom_url = ''; if($this->getInput('u')) { /* User and Channel modes */ $this->request = $this->getInput('u'); $url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request); $url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos'; - $custom_url = self::URI . urlencode($this->request) . '/videos'; } elseif($this->getInput('c')) { $this->request = $this->getInput('c'); $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); @@ -296,50 +410,57 @@ public function collectData(){ if(!empty($url_feed) && !empty($url_listing)) { $this->feeduri = $url_listing; - $xml = ''; - $html = ''; - try { - if(!$this->skipFeeds()) { - $xml = $this->ytGetSimpleHTMLDOM($url_feed); - } else { - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - // Throw an error right here if it doesn't have anything. - // Sometimes, Youtube user page have a weird case - // For example: NASA. When user write 'nasa' into the username and add limit for duration - // Bridge immediately find its user page (/user/nasa) and then nothing happen. - // Digging into the data, it appear it's another account, not from NASA itself. - // If you use feed, it works normally cause it already raise 404 error - if(!isset($jsonData->contents)) { - returnServerError(''); // Throw an empty one to trigger try catch + if(!$this->skipFeeds()) { + $html = $this->ytGetSimpleHTMLDOM($url_feed); + $this->ytBridgeParseXmlFeed($html); + } else { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + + $channel_id = ''; + if(isset($jsonData->contents)) { + $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; + if(self::API_KEY) { + $this->ytBridgeAPIQueryVideosData($channel_id); + } else { + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; + $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + $this->parseJSONListing($jsonData); } - } - } catch(Exception $e) { - if($custom_url) { - $html = $this->ytGetSimpleHTMLDOM($custom_url); - $jsonData = $this->getJSONData($html); - $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; - $xml = $this->ytGetSimpleHTMLDOM($url_feed); - $this->feeduri = $custom_url; } else { - returnServerError($e->getMessage()); + returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); } } + $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + } elseif($this->getInput('custom')) { // Custom channel name + $this->request = $this->getInput('custom'); + $url_listing = self::URI . urlencode($this->request) . '/videos'; + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $channel_id = ''; + $url_feed = ''; + $jsonData = $this->getJSONData($html); + $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; + $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; if(!$this->skipFeeds()) { - return $this->ytBridgeParseXmlFeed($xml); - } - - if(isset($jsonData->contents)) { - $this->channel_name = $jsonData->metadata->channelMetadataRenderer->title; - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; - $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; - $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + $xml = $this->ytGetSimpleHTMLDOM($url_feed); + $this->ytBridgeParseXmlFeed($xml); } else { - returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); + if(isset($jsonData->contents)) { + if(self::API_KEY) { + $this->ytBridgeAPIQueryVideosData($channel_id); + } else { + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; + $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + $this->parseJSONListing($jsonData); + } + } else { + returnServerError('Unable to get data from YouTube. Custom name: ' . $this->request); + } } - - $this->parseJSONListing($jsonData); $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + $this->feeduri = $url_listing; } elseif($this->getInput('p')) { /* playlist mode */ // TODO: this mode makes a lot of excess video query requests. // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" @@ -357,13 +478,23 @@ public function collectData(){ $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; $item_count = count($jsonData); + if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { - $this->parseJSONListing($jsonData); + if(self::API_KEY) { + // This method returns only first 50 video items + $this->ytBridgeAPIQueryVideosData($this->request); + } else { + $this->parseJSONListing($jsonData); + } } $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { + if(!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) { + $item1['timestamp'] = strtotime($item1['timestamp']); + $item2['timestamp'] = strtotime($item2['timestamp']); + } return $item2['timestamp'] - $item1['timestamp']; }); } elseif($this->getInput('s')) { /* search mode */ From a56c90615c6b4d4ee6dcf7c63e6a7b77810b1c4a Mon Sep 17 00:00:00 2001 From: Jim Date: Tue, 20 Jul 2021 16:30:32 +0700 Subject: [PATCH 19/24] [YoutubeBridge] Fix lint --- bridges/YoutubeBridge.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 8abd6ed2c43..013da8cca6c 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -75,14 +75,13 @@ class YoutubeBridge extends BridgeAbstract { private $channel_name = ''; // This took from repo BetterVideoRss of VerifiedJoseph. const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; - /* * This allow you to use Youtube Data API. * Enter your API key here. * To get one, please check out https://developers.google.com/youtube/v3/getting-started */ const API_KEY = ''; // Remember to remove it when commit. - + private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -154,7 +153,7 @@ private function ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail if(!$thumbnail) { $thumbnail = '0'; // Fallback to default thumbnail if there aren't any provided. } - $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail .'.jpg'; + $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail . '.jpg'; $item['content'] = '
' . $desc; $this->items[] = $item; } @@ -297,7 +296,7 @@ private function ytBridgeAPIQueryVideosData($id) { } elseif(isset($snippet->thumbnails->standard)) { $thumbnail = 'sddefault'; } - + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail); } } @@ -366,7 +365,7 @@ private function parseJSONListing($jsonData) { } else { $durationText = 0; } - + if(preg_match('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', $durationText)) { $durationText = preg_replace('/([\d]{1,2}):([\d]{1,2})\:([\d]{2})/', '$1:$2:$3', $durationText); } else { @@ -478,7 +477,7 @@ public function collectData(){ $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; $item_count = count($jsonData); - + if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { From 41324597cfdae8b9c9d97f5eb4adb94f80898f36 Mon Sep 17 00:00:00 2001 From: Jim Date: Tue, 20 Jul 2021 16:32:23 +0700 Subject: [PATCH 20/24] [YoutubeBridge] Fix lint --- bridges/YoutubeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 013da8cca6c..f69657d16b8 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -74,7 +74,7 @@ class YoutubeBridge extends BridgeAbstract { private $feeduri = ''; private $channel_name = ''; // This took from repo BetterVideoRss of VerifiedJoseph. - const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; + const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore /* * This allow you to use Youtube Data API. * Enter your API key here. From 246ff7f8d7edaaee2d26657dbc1c28171c1a8df8 Mon Sep 17 00:00:00 2001 From: Jim Date: Tue, 20 Jul 2021 16:35:39 +0700 Subject: [PATCH 21/24] [YoutubeBridge] Remove unused variable --- bridges/YoutubeBridge.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index f69657d16b8..5034672f0b5 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -319,8 +319,6 @@ private function parseJSONListing($jsonData) { } $vid_list = ''; - $count = 0; - $total = count($jsonData); foreach($jsonData as $item) { $wrapper = null; From ac8cc806f607b09db34905ce57e6f7266e2e1c00 Mon Sep 17 00:00:00 2001 From: Jim Date: Wed, 28 Jul 2021 14:31:51 +0700 Subject: [PATCH 22/24] [YoutubeBridge] Add feed name for custom name, move some codebase to avoid repeat it --- bridges/YoutubeBridge.php | 46 ++++++++++++--------------------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 5034672f0b5..bfb36e4ef6e 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -403,17 +403,26 @@ public function collectData(){ $this->request = $this->getInput('c'); $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); $url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos'; + } elseif($this->getInput('custom')) { + $this->request = $this->getInput('custom'); + $url_listing = self::URI . urlencode($this->request) . '/videos'; } - if(!empty($url_feed) && !empty($url_listing)) { + if(!empty($url_feed) || !empty($url_listing)) { $this->feeduri = $url_listing; + if(!empty($this->getInput('custom'))) { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; + } if(!$this->skipFeeds()) { $html = $this->ytGetSimpleHTMLDOM($url_feed); $this->ytBridgeParseXmlFeed($html); } else { - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $jsonData = $this->getJSONData($html); - + if(empty($this->getInput('custom'))) { + $html = $this->ytGetSimpleHTMLDOM($url_listing); + $jsonData = $this->getJSONData($html); + } $channel_id = ''; if(isset($jsonData->contents)) { $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; @@ -430,34 +439,6 @@ public function collectData(){ } } $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); - } elseif($this->getInput('custom')) { // Custom channel name - $this->request = $this->getInput('custom'); - $url_listing = self::URI . urlencode($this->request) . '/videos'; - $html = $this->ytGetSimpleHTMLDOM($url_listing); - $channel_id = ''; - $url_feed = ''; - $jsonData = $this->getJSONData($html); - $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; - $url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl; - if(!$this->skipFeeds()) { - $xml = $this->ytGetSimpleHTMLDOM($url_feed); - $this->ytBridgeParseXmlFeed($xml); - } else { - if(isset($jsonData->contents)) { - if(self::API_KEY) { - $this->ytBridgeAPIQueryVideosData($channel_id); - } else { - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; - $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; - $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; - $this->parseJSONListing($jsonData); - } - } else { - returnServerError('Unable to get data from YouTube. Custom name: ' . $this->request); - } - } - $this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); - $this->feeduri = $url_listing; } elseif($this->getInput('p')) { /* playlist mode */ // TODO: this mode makes a lot of excess video query requests. // To make less requests, we need to cache following dictionary "videoId -> datePublished, duration" @@ -542,6 +523,7 @@ public function getName(){ switch($this->queriedContext) { case 'By username': case 'By channel id': + case 'By custom name': case 'By playlist Id': case 'Search result': return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right? From cf8fc9ed3bf43c74ec5c1dc0af81b1c108da4b2c Mon Sep 17 00:00:00 2001 From: Jim Date: Mon, 6 Sep 2021 15:15:55 +0700 Subject: [PATCH 23/24] [YoutubeBridge] Remove support for YouTube Data API v3 --- bridges/YoutubeBridge.php | 117 ++------------------------------------ 1 file changed, 5 insertions(+), 112 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index bfb36e4ef6e..ccda924fe5a 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -75,12 +75,6 @@ class YoutubeBridge extends BridgeAbstract { private $channel_name = ''; // This took from repo BetterVideoRss of VerifiedJoseph. const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore - /* - * This allow you to use Youtube Data API. - * Enter your API key here. - * To get one, please check out https://developers.google.com/youtube/v3/getting-started - */ - const API_KEY = ''; // Remember to remove it when commit. private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); @@ -218,89 +212,6 @@ private function ytGetSimpleHTMLDOM($url, $cached = false){ $defaultSpanText); } - private function getAPIData($endpoint, $query) { - $base_url = 'https://www.googleapis.com/youtube/v3/'; - $url = $base_url . $endpoint . '?' . http_build_query($query); - return json_decode(getContents($url)); - } - - private function ytBridgeAPIQueryVideosData($id) { - $duration_min = $this->getInput('duration_min') ?: -1; - $duration_min = $duration_min * 60; - - $duration_max = $this->getInput('duration_max') ?: INF; - $duration_max = $duration_max * 60; - - if($duration_max < $duration_min) { - returnClientError('Max duration must be greater than min duration!'); - } - - $vid_list = ''; - $api_key = self::API_KEY; - $base_query = array( - 'part' => 'contentDetails', - 'key' => $api_key - ); - - if (!empty($this->getInput('s'))) { - $vid_list = $id; - } else { - if(!empty($this->getInput('u')) || !empty($this->getInput('c')) || !empty($this->getInput('custom'))) { - $query = array_merge($base_query, array( - 'id' => $id - )); - $jsonData = $this->getAPIData('channels', $query); - $id = $jsonData->items[0]->contentDetails->relatedPlaylists->uploads; - } - - $query = array_merge($base_query, array( - 'maxResults' => 50, - 'playlistId' => $id - )); - $count = 0; - $jsonData = $this->getAPIData('playlistItems', $query); - foreach($jsonData->items as $item) { - $vid_list .= $item->contentDetails->videoId . ','; - } - $vid_list = substr($vid_list, 0, -1); - } - - $query = array_merge($base_query, array( - 'part' => 'snippet,liveStreamingDetails,contentDetails', - 'id' => $vid_list - )); - - $jsonData = $this->getAPIData('videos', $query); - foreach($jsonData->items as $item) { - if(empty($this->getInput('s'))) { // Videos from search result won't be filtered again - $interval = new DateInterval($item->contentDetails->duration); - $duration = $interval->h * 3600 + $interval->i * 60 + $interval->s; - if($duration < $duration_min || $duration > $duration_max) { - continue; - } - } - - $snippet = $item->snippet; - $title = $snippet->title; - $vid = $item->id; - $author = $snippet->channelTitle; - $time = $snippet->publishedAt; - $desc = nl2br($snippet->description); - $desc = preg_replace(self::URI_REGEX, - '$1 ', - $desc); - - $thumbnail = ''; - if(isset($snippet->thumbnails->maxres)) { - $thumbnail = 'maxresdefault'; - } elseif(isset($snippet->thumbnails->standard)) { - $thumbnail = 'sddefault'; - } - - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time, $thumbnail); - } - } - private function getJSONData($html) { $scriptRegex = '/var ytInitialData = (.*?);<\/script>/'; preg_match($scriptRegex, $html, $matches) or returnServerError('Could not find ytInitialData'); @@ -376,16 +287,7 @@ private function parseJSONListing($jsonData) { } $vid_list .= $vid . ','; - if(!self::API_KEY) { - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } } - - if(self::API_KEY) { - $vid_list = substr($vid_list, 0, -1); - $this->ytBridgeAPIQueryVideosData($vid_list); - } else return; } public function collectData(){ @@ -426,14 +328,10 @@ public function collectData(){ $channel_id = ''; if(isset($jsonData->contents)) { $channel_id = $jsonData->metadata->channelMetadataRenderer->externalId; - if(self::API_KEY) { - $this->ytBridgeAPIQueryVideosData($channel_id); - } else { - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; - $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; - $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; - $this->parseJSONListing($jsonData); - } + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1]; + $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]; + $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items; + $this->parseJSONListing($jsonData); } else { returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request); } @@ -460,12 +358,7 @@ public function collectData(){ if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { - if(self::API_KEY) { - // This method returns only first 50 video items - $this->ytBridgeAPIQueryVideosData($this->request); - } else { - $this->parseJSONListing($jsonData); - } + $this->parseJSONListing($jsonData); } $this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName() usort($this->items, function ($item1, $item2) { From 311773bb3dc453cb0da95c468861db834fd6f45f Mon Sep 17 00:00:00 2001 From: Jim Date: Thu, 9 Sep 2021 16:17:47 +0700 Subject: [PATCH 24/24] [YoutubeBridge] Fix no results were displayed after scraping. --- bridges/YoutubeBridge.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index ccda924fe5a..f70c84e6308 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -229,7 +229,7 @@ private function parseJSONListing($jsonData) { returnClientError('Max duration must be greater than min duration!'); } - $vid_list = ''; + // $vid_list = ''; foreach($jsonData as $item) { $wrapper = null; @@ -286,7 +286,9 @@ private function parseJSONListing($jsonData) { continue; } - $vid_list .= $vid . ','; + // $vid_list .= $vid . ','; + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); } }