Skip to content

Commit

Permalink
proper topic for history episodes
Browse files Browse the repository at this point in the history
  • Loading branch information
codingPF committed Feb 17, 2024
1 parent 8273e69 commit 7e6ef2c
Show file tree
Hide file tree
Showing 10 changed files with 1,670 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@ public final class OrfOnConstants {
//
private OrfOnConstants() {}
//
public static String createMaxLimmitUrl(String plainUrl) {
return plainUrl + "?limit=" + OrfOnConstants.PAGE_SIZE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ protected RecursiveTask<Set<Film>> createCrawlerTask() {
try {
// Sendungen Verpasst (letzten 14 Tage)
// TAG > Episode > Episode2Film
//final Set<OrfOnVideoInfoDTO> epsiodesFromDay = processDayUrlsToCrawl();
//allVideos.addAll(epsiodesFromDay);
final Set<OrfOnVideoInfoDTO> epsiodesFromDay = processDayUrlsToCrawl();
allVideos.addAll(epsiodesFromDay);
//
// Sendungen a-z
// Buchstabe > Episoden > Episode2Film
//final Set<OrfOnVideoInfoDTO> videosFromTopics = processAZUrlsToCrawl();
//allVideos.addAll(videosFromTopics);
final Set<OrfOnVideoInfoDTO> videosFromTopics = processAZUrlsToCrawl();
allVideos.addAll(videosFromTopics);
//
// History (top categories) > children >
final Set<OrfOnVideoInfoDTO> historyVideos = processHistoryUrlToCrawl();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ public Optional<String> getTitleWithDate() {
public Optional<String> getTopic() {
return topic;
}
public void setTopic(Optional<String> newTopic) {
topic = newTopic;
}
public Optional<LocalDateTime> getAired() {
return aired;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,18 @@ public OrfOnVideoInfoDTO deserialize(

if (aFilm.getVideoUrls().isEmpty()){
LOG.debug("#####videoUrlEmpty#######");
LOG.debug("{} (id)", aFilm.getId().get());
LOG.debug("{} (genre_title)", JsonUtils.getElementValueAsString(jsonElement, "genre_title").get());
LOG.debug("{} (headline)", JsonUtils.getElementValueAsString(jsonElement, "headline").get());
LOG.debug("{} (profile_title*)", JsonUtils.getElementValueAsString(jsonElement, "profile_title").get());
LOG.debug("{} (title*)", JsonUtils.getElementValueAsString(jsonElement, "title").get());
LOG.debug("{} (sub_headline)", JsonUtils.getElementValueAsString(jsonElement, "sub_headline").get());
LOG.debug("{} (share_subject)", JsonUtils.getElementValueAsString(jsonElement, "share_subject").get());
LOG.debug("{} (TAG_RIGHT)", parseGeoLocations(JsonUtils.getElementValueAsString(jsonElement, TAG_RIGHT)));
LOG.debug("{} (url)", parseUrl(jsonElement));
LOG.debug("{} (segments)", JsonUtils.getElementValueAsString(jsonElement, TAG_SEGMENTS));
LOG.debug("{}",jsonElement );
LOG.debug("############");
LOG.debug("{} (id)", aFilm.getId().get());
LOG.debug("{} (genre_title)", JsonUtils.getElementValueAsString(jsonElement, "genre_title").get());
LOG.debug("{} (headline)", JsonUtils.getElementValueAsString(jsonElement, "headline").get());
LOG.debug("{} (profile_title*)", JsonUtils.getElementValueAsString(jsonElement, "profile_title").get());
LOG.debug("{} (title*)", JsonUtils.getElementValueAsString(jsonElement, "title").get());
LOG.debug("{} (sub_headline)", JsonUtils.getElementValueAsString(jsonElement, "sub_headline").get());
LOG.debug("{} (share_subject)", JsonUtils.getElementValueAsString(jsonElement, "share_subject").get());
LOG.debug("{} (TAG_RIGHT)", parseGeoLocations(JsonUtils.getElementValueAsString(jsonElement, TAG_RIGHT)));
LOG.debug("{} (url)", parseUrl(jsonElement));
LOG.debug("{} (segments)", JsonUtils.getElementValueAsString(jsonElement, TAG_SEGMENTS));
LOG.debug("{}",jsonElement );
LOG.debug("############");
}
// "genre_title": "Wetter",
// "headline": "Wetter Tirol vom 05.01.2024",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import de.mediathekview.mserver.base.utils.JsonUtils;
import de.mediathekview.mserver.crawler.basic.PagedElementListDTO;
import de.mediathekview.mserver.crawler.orfon.OrfOnBreadCrumsUrlDTO;
import de.mediathekview.mserver.crawler.orfon.OrfOnConstants;

import java.lang.reflect.Type;
import java.util.Optional;
Expand Down Expand Up @@ -32,32 +33,32 @@ public PagedElementListDTO<OrfOnBreadCrumsUrlDTO> deserialize(
Optional<JsonElement> itemArrayTop = JsonUtils.getElement(jsonElement, TAG_ITEM_ARRAY);
if (itemArrayTop.isPresent() && itemArrayTop.get().isJsonArray()) {
for (JsonElement item : itemArrayTop.get().getAsJsonArray()) {
Optional<String> url = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL);
Optional<String> url2 = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL2);
if (url.isPresent()) {
Optional<String> videoItemUrl = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL);
Optional<String> childrenUrl = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL2);
Optional<String> title = JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE);
if (videoItemUrl.isPresent()) {
page.addElement(new OrfOnBreadCrumsUrlDTO(
JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE).get(),
url.get()
title.orElse("MISSING TITLE"),
OrfOnConstants.createMaxLimmitUrl(videoItemUrl.get())
));
} else if (url2.isPresent()) {
} else if (childrenUrl.isPresent()) {
page.addElement(new OrfOnBreadCrumsUrlDTO(
JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE).get(),
url2.get()
title.orElse("MISSING TITLE"),
OrfOnConstants.createMaxLimmitUrl(childrenUrl.get())
));
} else {
LOG.info("No video_items or children tag found {}",JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE) );
}
LOG.debug("{} - {} - {}",
/*
LOG.debug("OrfOnHistoryChildrenDeserializer {} - {} - {}",
JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE),
JsonUtils.getElementValueAsString(item, TAG_TARGET_URL),
JsonUtils.getElementValueAsString(item, TAG_TARGET_URL2));
JsonUtils.getElementValueAsString(item, TAG_TARGET_URL2));*/

}
}
//
return page;
}




}
Original file line number Diff line number Diff line change
Expand Up @@ -31,34 +31,35 @@ public PagedElementListDTO<OrfOnBreadCrumsUrlDTO> deserialize(
//
Optional<JsonElement> itemArrayTop = JsonUtils.getElement(jsonElement, TAG_ITEM_ARRAY_TOP);
if (itemArrayTop.isPresent() && itemArrayTop.get().isJsonArray()) {
for (JsonElement item : itemArrayTop.get().getAsJsonArray()) {
Optional<String> url = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL);
if (url.isPresent()) {
page.addElement(new OrfOnBreadCrumsUrlDTO(
JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE).orElse("EMPTY"),
url.get()
));
}
LOG.debug("{} {}", JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE), JsonUtils.getElementValueAsString(item, TAG_TARGET_URL));
}
page.addElements(parseSection(itemArrayTop.get().getAsJsonArray()).getElements());
}
//
Optional<JsonElement> itemArrayButtom = JsonUtils.getElement(jsonElement, TAG_ITEM_ARRAY_BUTTOM);
if (itemArrayButtom.isPresent() &&itemArrayButtom.get().isJsonArray()) {
for (JsonElement item : itemArrayButtom.get().getAsJsonArray()) {
Optional<String> url = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL);
if (url.isPresent()) {
page.addElement(new OrfOnBreadCrumsUrlDTO(
JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE).orElse("EMPTY"),
url.get()
));
}
LOG.debug("{} {}", JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE), JsonUtils.getElementValueAsString(item, TAG_TARGET_URL));
}
if (itemArrayButtom.isPresent() && itemArrayButtom.get().isJsonArray()) {
page.addElements(parseSection(itemArrayButtom.get().getAsJsonArray()).getElements());
}
//
return page;
}

public PagedElementListDTO<OrfOnBreadCrumsUrlDTO> parseSection(JsonArray itemArray) {
PagedElementListDTO<OrfOnBreadCrumsUrlDTO> items = new PagedElementListDTO<>();
for (JsonElement item : itemArray) {
Optional<String> url = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL);
Optional<String> title = JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE);
if (url.isPresent()) {
items.addElement(new OrfOnBreadCrumsUrlDTO(
title.orElse("EMPTY"),
url.get()
));
} else {
LOG.debug("missing url for {}", title);
}
//LOG.debug("History Item {} {}", title, url);
}
return items;
}



}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;


public class OrfOnHistoryVideoItemDeserializer implements JsonDeserializer<PagedElementListDTO<OrfOnBreadCrumsUrlDTO>> {
private static final Logger LOG = LogManager.getLogger(OrfOnHistoryVideoItemDeserializer.class);
private String[] TAG_NEXT_PAGE = { "next" };
private String[] TAG_ITEM_ARRAY = { "_items" };
private String[] TAG_ITEM_TITLE = {"title"};
private String[] TAG_TARGET_URL = {"_links", "self", "href"};
private String[] TAG_TARGET_URL_EPISODE = {"_links", "episode", "href"};

protected final Logger LOG = LogManager.getLogger(this.getClass());

@Override
public PagedElementListDTO<OrfOnBreadCrumsUrlDTO> deserialize(
Expand All @@ -31,19 +32,28 @@ public PagedElementListDTO<OrfOnBreadCrumsUrlDTO> deserialize(
Optional<JsonElement> itemArrayTop = JsonUtils.getElement(jsonElement, TAG_ITEM_ARRAY);
if (itemArrayTop.isPresent() && itemArrayTop.get().isJsonArray()) {
for (JsonElement item : itemArrayTop.get().getAsJsonArray()) {
Optional<String> url = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL);
if (url.isPresent()) {
Optional<String> urlSelf = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL);
Optional<String> urlEpisode = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL_EPISODE);
Optional<String> title = JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE);
// self should be an episode but in some cases a segment - only in this cases we have an additional episode element
if (urlSelf.isPresent() && !urlSelf.get().contains("/segment/")) {
page.addElement(new OrfOnBreadCrumsUrlDTO(
JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE).get(),
JsonUtils.getElementValueAsString(item, TAG_TARGET_URL).get()
title.orElse("MISSING TITLE"),
urlSelf.get()
));
}
} else if (urlEpisode.isPresent()) {
page.addElement(new OrfOnBreadCrumsUrlDTO(
title.orElse("MISSING TITLE"),
urlEpisode.get()
));
}
}
}
//
return page;
}




}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.lang.reflect.Type;
import java.net.URI;
import java.util.Optional;
import java.util.Queue;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -52,7 +53,13 @@ protected void postProcessing(OrfOnVideoInfoDTO aResponseObj, OrfOnBreadCrumsUrl
LOG.debug("Missing videoUrls for {}", aDTO);
return;
}
LOG.debug(" bread crums {}", String.join("|", aDTO.getBreadCrums()) + " # " + aResponseObj.getTitle().get());
// ARCHIVE
// archive does not have a proper topic
if (aResponseObj.getTopic().get().equalsIgnoreCase("Archiv") && aDTO.getBreadCrums().size() > 1) {
aResponseObj.setTopic(Optional.of(aDTO.getBreadCrums().get(1)));
}

LOG.debug(" bread crums {} # {} # {}", String.join("|", aDTO.getBreadCrums()), aResponseObj.getTopic().get(), aResponseObj.getTitle().get());
taskResults.add(aResponseObj);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,15 @@ protected Type getType() {
@Override
protected void postProcessingElements(Set<OrfOnBreadCrumsUrlDTO> elements, OrfOnBreadCrumsUrlDTO originalDTO) {
for (OrfOnBreadCrumsUrlDTO element : elements) {
if (element.getUrl().endsWith("children")) {
if (element.getUrl().contains("/children")) {
final Queue<OrfOnBreadCrumsUrlDTO> moreContentOnNewPage = new ConcurrentLinkedQueue<>();
moreContentOnNewPage.add(element);
AbstractRecursiveConverterTask<OrfOnBreadCrumsUrlDTO, OrfOnBreadCrumsUrlDTO> resolveChildren = createNewOwnInstance(moreContentOnNewPage);
resolveChildren.fork();
taskResults.addAll(resolveChildren.join());
for(OrfOnBreadCrumsUrlDTO moreElements : resolveChildren.join()) {
moreElements.setBreadCrumsPath(originalDTO.getBreadCrums());
taskResults.add(moreElements);
}
} else {
element.setBreadCrumsPath(originalDTO.getBreadCrums());
taskResults.add(element);
Expand Down
Loading

0 comments on commit 7e6ef2c

Please sign in to comment.