Move stuff from extractVideoPreviewInfo() into YoutubeStreamInfoItemE…

…xtractor and partially fix search
TeamNewPipe · Feb 25, 2020 · b88188d · b88188d
1 parent af49b3c
commit b88188d
Show file tree

Hide file tree

Showing 4 changed files with 117 additions and 353 deletions.
diff --git a/...va/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/...va/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java
@@ -4,6 +4,7 @@
 import com.grack.nanojson.JsonObject;
 import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;
+
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -23,9 +24,10 @@
 import org.schabi.newpipe.extractor.utils.Parser;
 import org.schabi.newpipe.extractor.utils.Utils;
 
+import java.io.IOException;
+
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
-import java.io.IOException;
 
 @SuppressWarnings("WeakerAccess")
 public class YoutubePlaylistExtractor extends PlaylistExtractor {
@@ -318,7 +320,7 @@ public String getUploaderUrl() throws ParsingException {
                 }
 
                 @Override
-                public String getTextualUploadDate() throws ParsingException {
+                public String getTextualUploadDate() {
                     return "";
                 }
 

diff --git a/...java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/...java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java
@@ -1,5 +1,10 @@
 package org.schabi.newpipe.extractor.services.youtube.extractors;
 
+import com.grack.nanojson.JsonArray;
+import com.grack.nanojson.JsonObject;
+import com.grack.nanojson.JsonParser;
+import com.grack.nanojson.JsonParserException;
+
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -16,12 +21,13 @@
 import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
 import org.schabi.newpipe.extractor.utils.Parser;
 
-import javax.annotation.Nonnull;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URL;
 
+import javax.annotation.Nonnull;
+
 /*
  * Created by Christian Schabesberger on 22.07.2018
  *
@@ -45,6 +51,7 @@
 public class YoutubeSearchExtractor extends SearchExtractor {
 
     private Document doc;
+    private JsonObject ytInitialData;
 
     public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
         super(service, linkHandler);
@@ -55,6 +62,7 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
         final String url = getUrl();
         final Response response = downloader.get(url, getExtractorLocalization());
         doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
+        ytInitialData = getInitialData();
     }
 
     @Nonnull
@@ -86,6 +94,7 @@ public String getNextPageUrl() throws ExtractionException {
 
     @Override
     public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
+        // TODO: Get extracting next pages working
         final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
         doc = Jsoup.parse(response, pageUrl);
 
@@ -108,37 +117,33 @@ private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundE
         InfoItemsSearchCollector collector = getInfoItemSearchCollector();
         collector.reset();
 
-        Element list = doc.select("ol[class=\"item-section\"]").first();
         final TimeAgoParser timeAgoParser = getTimeAgoParser();
 
-        for (Element item : list.children()) {
-            /* First we need to determine which kind of item we are working with.
-               Youtube depicts five different kinds of items on its search result page. These are
-               regular videos, playlists, channels, two types of video suggestions, and a "no video
-               found" item. Since we only want videos, we need to filter out all the others.
-               An example for this can be seen here:
-               https://www.youtube.com/results?search_query=asdf&page=1
-
-               We already applied a filter to the url, so we don't need to care about channels and
-               playlists now.
-            */
-
-            Element el;
-
-            if ((el = item.select("div[class*=\"search-message\"]").first()) != null) {
-                throw new NothingFoundException(el.text());
-
-                // video item type
-            } else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
-                collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser));
-            } else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
-                collector.commit(new YoutubeChannelInfoItemExtractor(el));
-            } else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
-                    item.select(".yt-pl-icon-mix").isEmpty()) {
-                collector.commit(new YoutubePlaylistInfoItemExtractor(el));
+        JsonArray list = ytInitialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
+                .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
+                .getObject(0).getObject("itemSectionRenderer").getArray("contents");
+
+        for (Object item : list) {
+            if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
+                throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
+                        .getObject("bodyText").getArray("runs").getObject(0).getString("text"));
+            } else if (((JsonObject) item).getObject("videoRenderer") != null) {
+                collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
+            } else if (((JsonObject) item).getObject("channelRenderer") != null) {
+//                collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
+            } else if (((JsonObject) item).getObject("playlistRenderer") != null) {
+//                collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
             }
         }
-
         return collector;
     }
+
+    private JsonObject getInitialData() throws ParsingException {
+        try {
+            String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
+            return JsonParser.object().from(initialData);
+        } catch (JsonParserException | Parser.RegexException e) {
+            throw new ParsingException("Could not get ytInitialData", e);
+        }
+    }
 }
diff --git a/...java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/...java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
@@ -4,10 +4,10 @@
 import com.grack.nanojson.JsonObject;
 import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;
+
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
 import org.mozilla.javascript.Context;
 import org.mozilla.javascript.Function;
 import org.mozilla.javascript.ScriptableObject;
@@ -25,24 +25,38 @@
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.localization.TimeAgoParser;
 import org.schabi.newpipe.extractor.services.youtube.ItagItem;
-import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
 import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
-import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
-import org.schabi.newpipe.extractor.stream.*;
+import org.schabi.newpipe.extractor.stream.AudioStream;
+import org.schabi.newpipe.extractor.stream.Description;
+import org.schabi.newpipe.extractor.stream.Frameset;
+import org.schabi.newpipe.extractor.stream.Stream;
+import org.schabi.newpipe.extractor.stream.StreamExtractor;
+import org.schabi.newpipe.extractor.stream.StreamInfoItem;
+import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
+import org.schabi.newpipe.extractor.stream.StreamType;
+import org.schabi.newpipe.extractor.stream.SubtitlesStream;
+import org.schabi.newpipe.extractor.stream.VideoStream;
 import org.schabi.newpipe.extractor.utils.JsonUtils;
 import org.schabi.newpipe.extractor.utils.Parser;
 import org.schabi.newpipe.extractor.utils.Utils;
 
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
 /*
  * Created by Christian Schabesberger on 06.08.15.
  *
@@ -663,7 +677,7 @@ public StreamInfoItem getNextStream() throws IOException, ExtractionException {
             final TimeAgoParser timeAgoParser = getTimeAgoParser();
             StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
 
-            collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser));
+            collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
             return collector.getItems().get(0);
         } catch (Exception e) {
             throw new ParsingException("Could not get next video", e);
@@ -684,7 +698,7 @@ public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException {
             for (Object ul : results) {
                 final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");
 
-                if (videoInfo != null) collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser));
+                if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
             }
             return collector;
         } catch (Exception e) {
@@ -1064,123 +1078,6 @@ private Map<String, ItagItem> getItags(String streamingDataKey, ItagItem.ItagTyp
         return urlAndItags;
     }
 
-    /**
-     * Provides information about links to other videos on the video page, such as related videos.
-     * This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
-     */
-    private StreamInfoItemExtractor extractVideoPreviewInfo(final JsonObject videoInfo, final TimeAgoParser timeAgoParser) {
-        return new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) {
-            @Override
-            public StreamType getStreamType() {
-                try {
-                    if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
-                        return StreamType.LIVE_STREAM;
-                    }
-                } catch (Exception ignored) {}
-                return StreamType.VIDEO_STREAM;
-            }
-
-            @Override
-            public boolean isAd() {
-                return false;
-            }
-
-            @Override
-            public String getUrl() throws ParsingException {
-                try {
-                    String videoId = videoInfo.getString("videoId");
-                    return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
-                } catch (Exception e) {
-                    throw new ParsingException("Could not get url", e);
-                }
-            }
-
-            @Override
-            public String getName() throws ParsingException {
-                String name = null;
-                try {
-                    name = videoInfo.getObject("title").getString("simpleText");
-                } catch (Exception ignored) {}
-                if (name != null && !name.isEmpty()) return name;
-                throw new ParsingException("Could not get title");
-            }
-
-            @Override
-            public long getDuration() throws ParsingException {
-                try {
-                    if (getStreamType() == StreamType.LIVE_STREAM) return -1;
-                    return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
-                } catch (Exception e) {
-                    throw new ParsingException("Could not get duration", e);
-                }
-            }
-
-            @Override
-            public String getUploaderUrl() throws ParsingException {
-                try {
-                    String id = videoInfo.getObject("longBylineText").getArray("runs")
-                            .getObject(0).getObject("navigationEndpoint")
-                            .getObject("browseEndpoint").getString("browseId");
-                    if (id == null || id.isEmpty()) {
-                        throw new IllegalArgumentException("is empty");
-                    }
-                    return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
-                } catch (Exception e) {
-                    throw new ParsingException("Could not get uploader url");
-                }
-            }
-
-            @Nullable
-            @Override
-            public String getTextualUploadDate() {
-                return null;
-            }
-
-            @Nullable
-            @Override
-            public DateWrapper getUploadDate() {
-                return null;
-            }
-
-            @Override
-            public long getViewCount() throws ParsingException {
-                try {
-                    String viewCount;
-                    if (getStreamType() == StreamType.LIVE_STREAM)  {
-                        viewCount = videoInfo.getObject("viewCountText")
-                                .getArray("runs").getObject(0).getString("text");
-                    } else {
-                        viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
-                    }
-                    if (viewCount.equals("Recommended for you")) return -1;
-                    return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
-                } catch (Exception e) {
-                    throw new ParsingException("Could not get view count", e);
-                }
-            }
-
-            @Override
-            public String getUploaderName() throws ParsingException {
-                try {
-                    return videoInfo.getObject("longBylineText").getArray("runs")
-                            .getObject(0).getString("text");
-                } catch (Exception e) {
-                    throw new ParsingException("Could not get uploader name", e);
-                }
-            }
-
-            @Override
-            public String getThumbnailUrl() throws ParsingException {
-                try {
-                    return videoInfo.getObject("thumbnail").getArray("thumbnails")
-                            .getObject(0).getString("url");
-                } catch (Exception e) {
-                    throw new ParsingException("Could not get thumbnail url", e);
-                }
-            }
-        };
-    }
-
     @Nonnull
     @Override
     public List<Frameset> getFrames() throws ExtractionException {