Skip to content

Commit

Permalink
Move stuff from extractVideoPreviewInfo() into YoutubeStreamInfoItemE…
Browse files Browse the repository at this point in the history
…xtractor and partially fix search
  • Loading branch information
wb9688 authored and TobiGr committed Feb 25, 2020
1 parent af49b3c commit b88188d
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 353 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
Expand All @@ -23,9 +24,10 @@
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;

import java.io.IOException;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;

@SuppressWarnings("WeakerAccess")
public class YoutubePlaylistExtractor extends PlaylistExtractor {
Expand Down Expand Up @@ -318,7 +320,7 @@ public String getUploaderUrl() throws ParsingException {
}

@Override
public String getTextualUploadDate() throws ParsingException {
public String getTextualUploadDate() {
return "";
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
Expand All @@ -16,12 +21,13 @@
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Parser;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;

import javax.annotation.Nonnull;

/*
* Created by Christian Schabesberger on 22.07.2018
*
Expand All @@ -45,6 +51,7 @@
public class YoutubeSearchExtractor extends SearchExtractor {

private Document doc;
private JsonObject ytInitialData;

public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
super(service, linkHandler);
Expand All @@ -55,6 +62,7 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
ytInitialData = getInitialData();
}

@Nonnull
Expand Down Expand Up @@ -86,6 +94,7 @@ public String getNextPageUrl() throws ExtractionException {

@Override
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
// TODO: Get extracting next pages working
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
doc = Jsoup.parse(response, pageUrl);

Expand All @@ -108,37 +117,33 @@ private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundE
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
collector.reset();

Element list = doc.select("ol[class=\"item-section\"]").first();
final TimeAgoParser timeAgoParser = getTimeAgoParser();

for (Element item : list.children()) {
/* First we need to determine which kind of item we are working with.
Youtube depicts five different kinds of items on its search result page. These are
regular videos, playlists, channels, two types of video suggestions, and a "no video
found" item. Since we only want videos, we need to filter out all the others.
An example for this can be seen here:
https://www.youtube.com/results?search_query=asdf&page=1
We already applied a filter to the url, so we don't need to care about channels and
playlists now.
*/

Element el;

if ((el = item.select("div[class*=\"search-message\"]").first()) != null) {
throw new NothingFoundException(el.text());

// video item type
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser));
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
collector.commit(new YoutubeChannelInfoItemExtractor(el));
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
item.select(".yt-pl-icon-mix").isEmpty()) {
collector.commit(new YoutubePlaylistInfoItemExtractor(el));
JsonArray list = ytInitialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
.getObject(0).getObject("itemSectionRenderer").getArray("contents");

for (Object item : list) {
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
} else if (((JsonObject) item).getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
} else if (((JsonObject) item).getObject("channelRenderer") != null) {
// collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
// collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
}
}

return collector;
}

private JsonObject getInitialData() throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
Expand All @@ -25,24 +25,38 @@
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.stream.AudioStream;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.Frameset;
import org.schabi.newpipe.extractor.stream.Stream;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

/*
* Created by Christian Schabesberger on 06.08.15.
*
Expand Down Expand Up @@ -663,7 +677,7 @@ public StreamInfoItem getNextStream() throws IOException, ExtractionException {
final TimeAgoParser timeAgoParser = getTimeAgoParser();
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());

collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser));
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
return collector.getItems().get(0);
} catch (Exception e) {
throw new ParsingException("Could not get next video", e);
Expand All @@ -684,7 +698,7 @@ public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException {
for (Object ul : results) {
final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");

if (videoInfo != null) collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser));
if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
}
return collector;
} catch (Exception e) {
Expand Down Expand Up @@ -1064,123 +1078,6 @@ private Map<String, ItagItem> getItags(String streamingDataKey, ItagItem.ItagTyp
return urlAndItags;
}

/**
* Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
*/
private StreamInfoItemExtractor extractVideoPreviewInfo(final JsonObject videoInfo, final TimeAgoParser timeAgoParser) {
return new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) {
@Override
public StreamType getStreamType() {
try {
if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
return StreamType.LIVE_STREAM;
}
} catch (Exception ignored) {}
return StreamType.VIDEO_STREAM;
}

@Override
public boolean isAd() {
return false;
}

@Override
public String getUrl() throws ParsingException {
try {
String videoId = videoInfo.getString("videoId");
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
} catch (Exception e) {
throw new ParsingException("Could not get url", e);
}
}

@Override
public String getName() throws ParsingException {
String name = null;
try {
name = videoInfo.getObject("title").getString("simpleText");
} catch (Exception ignored) {}
if (name != null && !name.isEmpty()) return name;
throw new ParsingException("Could not get title");
}

@Override
public long getDuration() throws ParsingException {
try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
} catch (Exception e) {
throw new ParsingException("Could not get duration", e);
}
}

@Override
public String getUploaderUrl() throws ParsingException {
try {
String id = videoInfo.getObject("longBylineText").getArray("runs")
.getObject(0).getObject("navigationEndpoint")
.getObject("browseEndpoint").getString("browseId");
if (id == null || id.isEmpty()) {
throw new IllegalArgumentException("is empty");
}
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
} catch (Exception e) {
throw new ParsingException("Could not get uploader url");
}
}

@Nullable
@Override
public String getTextualUploadDate() {
return null;
}

@Nullable
@Override
public DateWrapper getUploadDate() {
return null;
}

@Override
public long getViewCount() throws ParsingException {
try {
String viewCount;
if (getStreamType() == StreamType.LIVE_STREAM) {
viewCount = videoInfo.getObject("viewCountText")
.getArray("runs").getObject(0).getString("text");
} else {
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
}
if (viewCount.equals("Recommended for you")) return -1;
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
} catch (Exception e) {
throw new ParsingException("Could not get view count", e);
}
}

@Override
public String getUploaderName() throws ParsingException {
try {
return videoInfo.getObject("longBylineText").getArray("runs")
.getObject(0).getString("text");
} catch (Exception e) {
throw new ParsingException("Could not get uploader name", e);
}
}

@Override
public String getThumbnailUrl() throws ParsingException {
try {
return videoInfo.getObject("thumbnail").getArray("thumbnails")
.getObject(0).getString("url");
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
};
}

@Nonnull
@Override
public List<Frameset> getFrames() throws ExtractionException {
Expand Down
Loading

0 comments on commit b88188d

Please sign in to comment.