From 2fb9922a15da62916085d1ac854edd4af61dadbf Mon Sep 17 00:00:00 2001 From: TobiGr Date: Fri, 14 Jul 2023 17:43:09 +0200 Subject: [PATCH 1/4] [SoundCloud] Detect whether there are any more search results Add test for this edge case. --- .../extractors/SoundcloudSearchExtractor.java | 48 ++++++++++++++----- .../search/SoundcloudSearchExtractorTest.java | 24 ++++++++++ 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java index 5ec7fb72da..c0d1e1bf77 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java @@ -33,7 +33,9 @@ import javax.annotation.Nonnull; public class SoundcloudSearchExtractor extends SearchExtractor { - private JsonArray initialSearchCollection; + private JsonObject initialSearchObject; + private static final String COLLECTION = "collection"; + private static final String TOTAL_RESULTS = "total_results"; public SoundcloudSearchExtractor(final StreamingService service, final SearchQueryHandler linkHandler) { @@ -60,9 +62,15 @@ public List getMetaInfo() { @Nonnull @Override public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - return new InfoItemsPage<>( - collectItems(initialSearchCollection), - getNextPageFromCurrentUrl(getUrl(), currentOffset -> ITEMS_PER_PAGE)); + if (initialSearchObject.getInt(TOTAL_RESULTS) > ITEMS_PER_PAGE) { + return new InfoItemsPage<>( + collectItems(initialSearchObject.getArray(COLLECTION)), + getNextPageFromCurrentUrl(getUrl(), currentOffset -> ITEMS_PER_PAGE)); + } else { + return new InfoItemsPage<>( + collectItems(initialSearchObject.getArray(COLLECTION)), null); + } + } @Override @@ -74,17 +82,29 @@ public InfoItemsPage getPage(final Page page) throws IOException, final Downloader dl = getDownloader(); final JsonArray searchCollection; + final int totalResults; try { final String response = dl.get(page.getUrl(), getExtractorLocalization()) .responseBody(); - searchCollection = JsonParser.object().from(response).getArray("collection"); + final JsonObject result = JsonParser.object().from(response); + searchCollection = result.getArray(COLLECTION); + totalResults = result.getInt(TOTAL_RESULTS); } catch (final JsonParserException e) { throw new ParsingException("Could not parse json response", e); } + final boolean hasNextPage; + try { + hasNextPage = getOffsetFromUrl(page.getUrl()) + ITEMS_PER_PAGE <= totalResults; + } catch (MalformedURLException | UnsupportedEncodingException e) { + throw new ParsingException("Could not get offset from page URL", e); + } + if (hasNextPage) { + return new InfoItemsPage<>(collectItems(searchCollection), + getNextPageFromCurrentUrl(page.getUrl(), + currentOffset -> currentOffset + ITEMS_PER_PAGE)); + } + return new InfoItemsPage<>(collectItems(searchCollection), null); - return new InfoItemsPage<>(collectItems(searchCollection), - getNextPageFromCurrentUrl(page.getUrl(), - currentOffset -> currentOffset + ITEMS_PER_PAGE)); } @Override @@ -94,12 +114,12 @@ public void onFetchPage(@Nonnull final Downloader downloader) throws IOException final String url = getUrl(); try { final String response = dl.get(url, getExtractorLocalization()).responseBody(); - initialSearchCollection = JsonParser.object().from(response).getArray("collection"); + initialSearchObject = JsonParser.object().from(response); } catch (final JsonParserException e) { throw new ParsingException("Could not parse json response", e); } - if (initialSearchCollection.isEmpty()) { + if (initialSearchObject.getArray(COLLECTION).isEmpty()) { throw new SearchExtractor.NothingFoundException("Nothing found"); } } @@ -134,12 +154,16 @@ private InfoItemsCollector collectItems( private Page getNextPageFromCurrentUrl(final String currentUrl, final IntUnaryOperator newPageOffsetCalculator) throws MalformedURLException, UnsupportedEncodingException { - final int currentPageOffset = Integer.parseInt( - Parser.compatParseMap(new URL(currentUrl).getQuery()).get("offset")); + final int currentPageOffset = getOffsetFromUrl(currentUrl); return new Page( currentUrl.replace( "&offset=" + currentPageOffset, "&offset=" + newPageOffsetCalculator.applyAsInt(currentPageOffset))); } + + private int getOffsetFromUrl(final String url) + throws MalformedURLException, UnsupportedEncodingException { + return Integer.parseInt(Parser.compatParseMap(new URL(url).getQuery()).get("offset")); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/search/SoundcloudSearchExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/search/SoundcloudSearchExtractorTest.java index c79a4505e0..173cf1b0b9 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/search/SoundcloudSearchExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/search/SoundcloudSearchExtractorTest.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.soundcloud.search; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; import static org.schabi.newpipe.extractor.services.DefaultTests.assertNoDuplicatedItems; @@ -181,4 +182,27 @@ void testIsVerified() throws IOException, ExtractionException { assertTrue(verified); } } + + public static class NoNextPage extends DefaultSearchExtractorTest { + + private static SearchExtractor extractor; + private static final String QUERY = "Dan at hor#berlgbd"; + + @BeforeAll + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = SoundCloud.getSearchExtractor(QUERY); + extractor.fetchPage(); + } + + @Override public boolean expectedHasMoreItems() { return false; } + @Override public SearchExtractor extractor() throws Exception { return extractor; } + @Override public StreamingService expectedService() throws Exception { return SoundCloud; } + @Override public String expectedName() throws Exception { return QUERY; } + @Override public String expectedId() throws Exception { return QUERY; } + @Override public String expectedUrlContains() { return "soundcloud.com/search?q=" + urlEncode(QUERY); } + @Override public String expectedOriginalUrlContains() { return "soundcloud.com/search?q=" + urlEncode(QUERY); } + @Override public String expectedSearchString() { return QUERY; } + @Nullable @Override public String expectedSearchSuggestion() { return null; } + } } From aa6c17dc77dda965506df360c646d6124ead1328 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 3 Aug 2023 14:37:08 +0200 Subject: [PATCH 2/4] [SoundCloud] Deduplicate some code --- .../soundcloud/SoundcloudParsingHelper.java | 37 ++++++++----------- .../SoundcloudCommentsExtractor.java | 28 +++++--------- 2 files changed, 24 insertions(+), 41 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java index a0bd239df1..b926da6a88 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java @@ -1,14 +1,9 @@ package org.schabi.newpipe.extractor.services.soundcloud; -import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; -import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; -import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; - import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; - import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -28,6 +23,7 @@ import org.schabi.newpipe.extractor.utils.Parser.RegexException; import org.schabi.newpipe.extractor.utils.Utils; +import javax.annotation.Nonnull; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -38,7 +34,9 @@ import java.util.List; import java.util.Map; -import javax.annotation.Nonnull; +import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; +import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; public final class SoundcloudParsingHelper { private static String clientId; @@ -200,6 +198,7 @@ public static String getUsersFromApiMinItems(final int minItems, * * @return the next streams url, empty if don't have */ + @Nonnull public static String getUsersFromApi(final ChannelInfoItemsCollector collector, final String apiUrl) throws IOException, ReCaptchaException, ParsingException { @@ -221,17 +220,7 @@ public static String getUsersFromApi(final ChannelInfoItemsCollector collector, } } - String nextPageUrl; - try { - nextPageUrl = responseObject.getString("next_href"); - if (!nextPageUrl.contains("client_id=")) { - nextPageUrl += "&client_id=" + SoundcloudParsingHelper.clientId(); - } - } catch (final Exception ignored) { - nextPageUrl = ""; - } - - return nextPageUrl; + return getNextPageUrl(responseObject); } /** @@ -261,6 +250,7 @@ public static String getStreamsFromApiMinItems(final int minItems, * * @return the next streams url, empty if don't have */ + @Nonnull public static String getStreamsFromApi(final StreamInfoItemsCollector collector, final String apiUrl, final boolean charts) throws IOException, @@ -288,17 +278,20 @@ public static String getStreamsFromApi(final StreamInfoItemsCollector collector, } } - String nextPageUrl; + return getNextPageUrl(responseObject); + } + + @Nonnull + private static String getNextPageUrl(@Nonnull final JsonObject response) { try { - nextPageUrl = responseObject.getString("next_href"); + String nextPageUrl = response.getString("next_href"); if (!nextPageUrl.contains("client_id=")) { nextPageUrl += "&client_id=" + SoundcloudParsingHelper.clientId(); } + return nextPageUrl; } catch (final Exception ignored) { - nextPageUrl = ""; + return ""; } - - return nextPageUrl; } public static String getStreamsFromApi(final StreamInfoItemsCollector collector, diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java index b02a3ea802..7dba601bca 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java @@ -15,6 +15,7 @@ import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import java.io.IOException; @@ -33,22 +34,7 @@ public SoundcloudCommentsExtractor(final StreamingService service, @Override public InfoItemsPage getInitialPage() throws ExtractionException, IOException { - final Downloader downloader = NewPipe.getDownloader(); - final Response response = downloader.get(getUrl()); - - final JsonObject json; - try { - json = JsonParser.object().from(response.responseBody()); - } catch (final JsonParserException e) { - throw new ParsingException("Could not parse json", e); - } - - final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( - getServiceId()); - - collectStreamsFrom(collector, json.getArray("collection")); - - return new InfoItemsPage<>(collector, new Page(json.getString("next_href"))); + return getPage(getUrl()); } @Override @@ -57,9 +43,14 @@ public InfoItemsPage getPage(final Page page) throws Extractio if (page == null || isNullOrEmpty(page.getUrl())) { throw new IllegalArgumentException("Page doesn't contain an URL"); } + return getPage(page.getUrl()); + } + @Nonnull + private InfoItemsPage getPage(@Nonnull final String url) + throws ParsingException, IOException, ReCaptchaException { final Downloader downloader = NewPipe.getDownloader(); - final Response response = downloader.get(page.getUrl()); + final Response response = downloader.get(url); final JsonObject json; try { @@ -72,8 +63,7 @@ public InfoItemsPage getPage(final Page page) throws Extractio getServiceId()); collectStreamsFrom(collector, json.getArray("collection")); - - return new InfoItemsPage<>(collector, new Page(json.getString("next_href"))); + return new InfoItemsPage<>(collector, new Page(json.getString("next_href", null))); } @Override From 485bfbca9d6d1584c40873d2943721fcedb5ffa6 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sun, 6 Aug 2023 11:35:37 +0200 Subject: [PATCH 3/4] [SoundCloud] Move try-catch inside getOffsetFromUrl --- .../extractors/SoundcloudSearchExtractor.java | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java index c0d1e1bf77..db076ebb19 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java @@ -92,19 +92,13 @@ public InfoItemsPage getPage(final Page page) throws IOException, } catch (final JsonParserException e) { throw new ParsingException("Could not parse json response", e); } - final boolean hasNextPage; - try { - hasNextPage = getOffsetFromUrl(page.getUrl()) + ITEMS_PER_PAGE <= totalResults; - } catch (MalformedURLException | UnsupportedEncodingException e) { - throw new ParsingException("Could not get offset from page URL", e); - } - if (hasNextPage) { + + if (getOffsetFromUrl(page.getUrl()) + ITEMS_PER_PAGE <= totalResults) { return new InfoItemsPage<>(collectItems(searchCollection), getNextPageFromCurrentUrl(page.getUrl(), currentOffset -> currentOffset + ITEMS_PER_PAGE)); } return new InfoItemsPage<>(collectItems(searchCollection), null); - } @Override @@ -153,7 +147,7 @@ private InfoItemsCollector collectItems( private Page getNextPageFromCurrentUrl(final String currentUrl, final IntUnaryOperator newPageOffsetCalculator) - throws MalformedURLException, UnsupportedEncodingException { + throws ParsingException { final int currentPageOffset = getOffsetFromUrl(currentUrl); return new Page( @@ -162,8 +156,11 @@ private Page getNextPageFromCurrentUrl(final String currentUrl, "&offset=" + newPageOffsetCalculator.applyAsInt(currentPageOffset))); } - private int getOffsetFromUrl(final String url) - throws MalformedURLException, UnsupportedEncodingException { - return Integer.parseInt(Parser.compatParseMap(new URL(url).getQuery()).get("offset")); + private int getOffsetFromUrl(final String url) throws ParsingException { + try { + return Integer.parseInt(Parser.compatParseMap(new URL(url).getQuery()).get("offset")); + } catch (MalformedURLException | UnsupportedEncodingException e) { + throw new ParsingException("Could not get offset from page URL", e); + } } } From 29472571110d1074f4c08af87d13f1db6ac8e950 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sun, 6 Aug 2023 11:38:22 +0200 Subject: [PATCH 4/4] [SoundCloud] Properly calculate if results have finished --- .../soundcloud/extractors/SoundcloudSearchExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java index db076ebb19..b326235ff0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudSearchExtractor.java @@ -93,7 +93,7 @@ public InfoItemsPage getPage(final Page page) throws IOException, throw new ParsingException("Could not parse json response", e); } - if (getOffsetFromUrl(page.getUrl()) + ITEMS_PER_PAGE <= totalResults) { + if (getOffsetFromUrl(page.getUrl()) + ITEMS_PER_PAGE < totalResults) { return new InfoItemsPage<>(collectItems(searchCollection), getNextPageFromCurrentUrl(page.getUrl(), currentOffset -> currentOffset + ITEMS_PER_PAGE));