Skip to content

Commit

Permalink
Add:Podcast quickmatch attempts quick matching unmatched episodes #983
Browse files Browse the repository at this point in the history
  • Loading branch information
advplyr committed Sep 15, 2022
1 parent b91b320 commit 70ef09f
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 55 deletions.
36 changes: 9 additions & 27 deletions server/controllers/PodcastController.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
const axios = require('axios')
const fs = require('../libs/fsExtra')
const Path = require('path')
const Logger = require('../Logger')
const { parsePodcastRssFeedXml } = require('../utils/podcastUtils')
const { getPodcastFeed, findMatchingEpisodes } = require('../utils/podcastUtils')
const LibraryItem = require('../objects/LibraryItem')
const { getFileTimestampsWithIno, sanitizeFilename } = require('../utils/fileUtils')
const { getFileTimestampsWithIno } = require('../utils/fileUtils')
const filePerms = require('../utils/filePerms')

class PodcastController {
Expand Down Expand Up @@ -91,32 +90,17 @@ class PodcastController {
}
}

getPodcastFeed(req, res) {
async getPodcastFeed(req, res) {
var url = req.body.rssFeed
if (!url) {
return res.status(400).send('Bad request')
}
var includeRaw = req.query.raw == 1 // Include raw json

axios.get(url).then(async (data) => {
if (!data || !data.data) {
Logger.error('Invalid podcast feed request response')
return res.status(500).send('Bad response from feed request')
}
Logger.debug(`[PodcastController] Podcast feed size ${(data.data.length / 1024 / 1024).toFixed(2)}MB`)
var payload = await parsePodcastRssFeedXml(data.data, false, includeRaw)
if (!payload) {
return res.status(500).send('Invalid podcast RSS feed')
}

// RSS feed may be a private RSS feed
payload.podcast.metadata.feedUrl = url

res.json(payload)
}).catch((error) => {
console.error('Failed', error)
res.status(500).send(error)
})
const podcast = await getPodcastFeed(url)
if (!podcast) {
return res.status(404).send('Podcast RSS feed request failed or invalid response data')
}
res.json({ podcast })
}

async getOPMLFeeds(req, res) {
Expand Down Expand Up @@ -177,9 +161,7 @@ class PodcastController {
if (!searchTitle) {
return res.sendStatus(500)
}
searchTitle = searchTitle.toLowerCase().trim()

const episodes = await this.podcastManager.findEpisode(rssFeedUrl, searchTitle)
const episodes = await findMatchingEpisodes(rssFeedUrl, searchTitle)
res.json({
episodes: episodes || []
})
Expand Down
28 changes: 4 additions & 24 deletions server/managers/PodcastManager.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
const fs = require('../libs/fsExtra')
const axios = require('axios')

const { parsePodcastRssFeedXml } = require('../utils/podcastUtils')
const { getPodcastFeed } = require('../utils/podcastUtils')
const Logger = require('../Logger')

const { downloadFile, removeFile } = require('../utils/fileUtils')
Expand Down Expand Up @@ -226,7 +225,7 @@ class PodcastManager {
Logger.error(`[PodcastManager] checkPodcastForNewEpisodes no feed url for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`)
return false
}
var feed = await this.getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl)
var feed = await getPodcastFeed(podcastLibraryItem.media.metadata.feedUrl)
if (!feed || !feed.episodes) {
Logger.error(`[PodcastManager] checkPodcastForNewEpisodes invalid feed payload for ${podcastLibraryItem.media.metadata.title} (ID: ${podcastLibraryItem.id})`, feed)
return false
Expand Down Expand Up @@ -262,7 +261,7 @@ class PodcastManager {
}

async findEpisode(rssFeedUrl, searchTitle) {
const feed = await this.getPodcastFeed(rssFeedUrl).catch(() => {
const feed = await getPodcastFeed(rssFeedUrl).catch(() => {
return null
})
if (!feed || !feed.episodes) {
Expand Down Expand Up @@ -292,25 +291,6 @@ class PodcastManager {
return matches.sort((a, b) => a.levenshtein - b.levenshtein)
}

getPodcastFeed(feedUrl, excludeEpisodeMetadata = false) {
Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}"`)
return axios.get(feedUrl, { timeout: 5000 }).then(async (data) => {
if (!data || !data.data) {
Logger.error('Invalid podcast feed request response')
return false
}
Logger.debug(`[PodcastManager] getPodcastFeed for "${feedUrl}" success - parsing xml`)
var payload = await parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata)
if (!payload) {
return false
}
return payload.podcast
}).catch((error) => {
Logger.error('[PodcastManager] getPodcastFeed Error', error)
return false
})
}

async getOPMLFeeds(opmlText) {
var extractedFeeds = opmlParser.parse(opmlText)
if (!extractedFeeds || !extractedFeeds.length) {
Expand All @@ -323,7 +303,7 @@ class PodcastManager {
var rssFeedData = []

for (let feed of extractedFeeds) {
var feedData = await this.getPodcastFeed(feed.feedUrl, true)
var feedData = await getPodcastFeed(feed.feedUrl, true)
if (feedData) {
feedData.metadata.feedUrl = feed.feedUrl
rssFeedData.push(feedData)
Expand Down
3 changes: 3 additions & 0 deletions server/objects/entities/PodcastEpisode.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ class PodcastEpisode {
if (this.episode) return `${this.episode} - ${this.title}`
return this.title
}
get enclosureUrl() {
return this.enclosure ? this.enclosure.url : null
}

setData(data, index = 1) {
this.id = getId('ep')
Expand Down
68 changes: 65 additions & 3 deletions server/scanner/Scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const { groupFilesIntoLibraryItemPaths, getLibraryItemFileData, scanFolder } = r
const { comparePaths } = require('../utils/index')
const { getIno } = require('../utils/fileUtils')
const { ScanResult, LogLevel } = require('../utils/constants')
const { findMatchingEpisodesInFeed, getPodcastFeed } = require('../utils/podcastUtils')

const MediaFileScanner = require('./MediaFileScanner')
const BookFinder = require('../finders/BookFinder')
Expand Down Expand Up @@ -684,7 +685,7 @@ class Scanner {
var updatePayload = {}
var hasUpdated = false

if (libraryItem.mediaType === 'book') {
if (libraryItem.isBook) {
var searchISBN = options.isbn || libraryItem.media.metadata.isbn
var searchASIN = options.asin || libraryItem.media.metadata.asin

Expand All @@ -708,7 +709,7 @@ class Scanner {
}

updatePayload = await this.quickMatchBookBuildUpdatePayload(libraryItem, matchData, options)
} else { // Podcast quick match
} else if (libraryItem.isPodcast) { // Podcast quick match
var results = await this.podcastFinder.search(searchTitle)
if (!results.length) {
return {
Expand Down Expand Up @@ -739,6 +740,10 @@ class Scanner {
}

if (hasUpdated) {
if (libraryItem.isPodcast && libraryItem.media.metadata.feedUrl) { // Quick match all unmatched podcast episodes
await this.quickMatchPodcastEpisodes(libraryItem, options)
}

await this.db.updateLibraryItem(libraryItem)
this.emitter('item_updated', libraryItem.toJSONExpanded())
}
Expand All @@ -762,16 +767,18 @@ class Scanner {
itunesArtistId: matchData.artistId || null,
releaseDate: matchData.releaseDate || null,
imageUrl: matchData.cover || null,
feedUrl: matchData.feedUrl || null,
description: matchData.descriptionPlain || null
}

for (const key in matchDataTransformed) {
if (matchDataTransformed[key]) {
if (key === 'genres') {
if ((!libraryItem.media.metadata.genres || options.overrideDetails)) {
// TODO: Genres array or string?
updatePayload.metadata[key] = matchDataTransformed[key].split(',').map(v => v.trim()).filter(v => !!v)
}
} else if (!libraryItem.media.metadata[key] || options.overrideDetails) {
} else if (libraryItem.media.metadata[key] !== matchDataTransformed[key] && (!libraryItem.media.metadata[key] || options.overrideDetails)) {
updatePayload.metadata[key] = matchDataTransformed[key]
}
}
Expand Down Expand Up @@ -854,6 +861,61 @@ class Scanner {
return updatePayload
}

async quickMatchPodcastEpisodes(libraryItem, options = {}) {
const episodesToQuickMatch = libraryItem.media.episodes.filter(ep => !ep.enclosureUrl) // Only quick match episodes without enclosure
if (!episodesToQuickMatch.length) return false

const feed = await getPodcastFeed(libraryItem.media.metadata.feedUrl)
if (!feed) {
Logger.error(`[Scanner] quickMatchPodcastEpisodes: Unable to quick match episodes feed not found for "${libraryItem.media.metadata.feedUrl}"`)
return false
}

var episodesWereUpdated = false
for (const episode of episodesToQuickMatch) {
const episodeMatches = findMatchingEpisodesInFeed(feed, episode.title)
if (episodeMatches && episodeMatches.length) {
const wasUpdated = this.updateEpisodeWithMatch(libraryItem, episode, episodeMatches[0].episode, options)
if (wasUpdated) episodesWereUpdated = true
}
}
return episodesWereUpdated
}

updateEpisodeWithMatch(libraryItem, episode, episodeToMatch, options = {}) {
Logger.debug(`[Scanner] quickMatchPodcastEpisodes: Found episode match for "${episode.title}" => ${episodeToMatch.title}`)
const matchDataTransformed = {
title: episodeToMatch.title || '',
subtitle: episodeToMatch.subtitle || '',
description: episodeToMatch.description || '',
enclosure: episodeToMatch.enclosure || null,
episode: episodeToMatch.episode || '',
episodeType: episodeToMatch.episodeType || '',
season: episodeToMatch.season || '',
pubDate: episodeToMatch.pubDate || '',
publishedAt: episodeToMatch.publishedAt
}
const updatePayload = {}
for (const key in matchDataTransformed) {
if (matchDataTransformed[key]) {
if (key === 'enclosure') {
if (!episode.enclosure || JSON.stringify(episode.enclosure) !== JSON.stringify(matchDataTransformed.enclosure)) {
updatePayload[key] = {
...matchDataTransformed.enclosure
}
}
} else if (episode[key] !== matchDataTransformed[key] && (!episode[key] || options.overrideDetails)) {
updatePayload[key] = matchDataTransformed[key]
}
}
}

if (Object.keys(updatePayload).length) {
return libraryItem.media.updateEpisode(episode.id, updatePayload)
}
return false
}

async matchLibraryItems(library) {
if (library.mediaType === 'podcast') {
Logger.error(`[Scanner] matchLibraryItems: Match all not supported for podcasts yet`)
Expand Down
64 changes: 63 additions & 1 deletion server/utils/podcastUtils.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const Logger = require('../Logger')
const { xmlToJSON } = require('./index')
const axios = require('axios')
const { xmlToJSON, levenshteinDistance } = require('./index')
const htmlSanitizer = require('../utils/htmlSanitizer')

function extractFirstArrayItem(json, key) {
Expand Down Expand Up @@ -173,4 +174,65 @@ module.exports.parsePodcastRssFeedXml = async (xml, excludeEpisodeMetadata = fal
podcast
}
}
}

module.exports.getPodcastFeed = (feedUrl, excludeEpisodeMetadata = false) => {
Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}"`)
return axios.get(feedUrl, { timeout: 6000 }).then(async (data) => {
if (!data || !data.data) {
Logger.error(`[podcastUtils] getPodcastFeed: Invalid podcast feed request response (${feedUrl})`)
return false
}
Logger.debug(`[podcastUtils] getPodcastFeed for "${feedUrl}" success - parsing xml`)
var payload = await this.parsePodcastRssFeedXml(data.data, excludeEpisodeMetadata)
if (!payload) {
return false
}

// RSS feed may be a private RSS feed
payload.podcast.metadata.feedUrl = feedUrl

return payload.podcast
}).catch((error) => {
Logger.error('[podcastUtils] getPodcastFeed Error', error)
return false
})
}

// Return array of episodes ordered by closest match (Levenshtein distance of 6 or less)
module.exports.findMatchingEpisodes = async (feedUrl, searchTitle) => {
const feed = await this.getPodcastFeed(feedUrl).catch(() => {
return null
})

return this.findMatchingEpisodesInFeed(feed, searchTitle)
}

module.exports.findMatchingEpisodesInFeed = (feed, searchTitle) => {
searchTitle = searchTitle.toLowerCase().trim()
if (!feed || !feed.episodes) {
return null
}

const matches = []
feed.episodes.forEach(ep => {
if (!ep.title) return

const epTitle = ep.title.toLowerCase().trim()
if (epTitle === searchTitle) {
matches.push({
episode: ep,
levenshtein: 0
})
} else {
const levenshtein = levenshteinDistance(searchTitle, epTitle, true)
if (levenshtein <= 6 && epTitle.length > levenshtein) {
matches.push({
episode: ep,
levenshtein
})
}
}
})
return matches.sort((a, b) => a.levenshtein - b.levenshtein)
}

0 comments on commit 70ef09f

Please sign in to comment.