-
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: add missing request handler for rss feeds (#2006)
* fix: add missing request handler for rss feeds * fix: ci issues
- Loading branch information
1 parent
b413e2e
commit 99c8e5b
Showing
9 changed files
with
186 additions
and
159 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,36 @@ | ||
import type { RssFeed } from "@homarr/cron-jobs"; | ||
import { createItemChannel } from "@homarr/redis"; | ||
import { rssFeedsRequestHandler } from "@homarr/request-handler/rss-feeds"; | ||
import { z } from "@homarr/validation"; | ||
|
||
import { createOneItemMiddleware } from "../../middlewares/item"; | ||
import { createTRPCRouter, publicProcedure } from "../../trpc"; | ||
|
||
export const rssFeedRouter = createTRPCRouter({ | ||
getFeeds: publicProcedure.unstable_concat(createOneItemMiddleware("rssFeed")).query(async ({ input }) => { | ||
const channel = createItemChannel<RssFeed[]>(input.itemId); | ||
return await channel.getAsync(); | ||
}), | ||
getFeeds: publicProcedure | ||
.input( | ||
z.object({ | ||
urls: z.array(z.string()), | ||
maximumAmountPosts: z.number(), | ||
}), | ||
) | ||
.query(async ({ input }) => { | ||
const rssFeeds = await Promise.all( | ||
input.urls.map(async (url) => { | ||
const innerHandler = rssFeedsRequestHandler.handler({ | ||
url, | ||
count: input.maximumAmountPosts, | ||
}); | ||
return await innerHandler.getCachedOrUpdatedDataAsync({ | ||
forceUpdate: false, | ||
}); | ||
}), | ||
); | ||
|
||
return rssFeeds | ||
.flatMap((rssFeed) => rssFeed.data.entries) | ||
.slice(0, input.maximumAmountPosts) | ||
.sort((entryA, entryB) => { | ||
return entryA.published && entryB.published | ||
? new Date(entryB.published).getTime() - new Date(entryA.published).getTime() | ||
: 0; | ||
}); | ||
}), | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,139 +1,36 @@ | ||
import type { FeedData, FeedEntry } from "@extractus/feed-extractor"; | ||
import { extract } from "@extractus/feed-extractor"; | ||
import SuperJSON from "superjson"; | ||
|
||
import type { Modify } from "@homarr/common/types"; | ||
import { EVERY_5_MINUTES } from "@homarr/cron-jobs-core/expressions"; | ||
import { db, eq } from "@homarr/db"; | ||
import { items } from "@homarr/db/schema"; | ||
import { logger } from "@homarr/log"; | ||
import { createItemChannel } from "@homarr/redis"; | ||
import { z } from "@homarr/validation"; | ||
|
||
// This import is done that way to avoid circular dependencies. | ||
import { rssFeedsRequestHandler } from "@homarr/request-handler/rss-feeds"; | ||
|
||
import type { WidgetComponentProps } from "../../../widgets"; | ||
import { createCronJob } from "../lib"; | ||
|
||
export const rssFeedsJob = createCronJob("rssFeeds", EVERY_5_MINUTES).withCallback(async () => { | ||
const itemsForIntegration = await db.query.items.findMany({ | ||
const rssItems = await db.query.items.findMany({ | ||
where: eq(items.kind, "rssFeed"), | ||
}); | ||
|
||
for (const item of itemsForIntegration) { | ||
const options = SuperJSON.parse<WidgetComponentProps<"rssFeed">["options"]>(item.options); | ||
|
||
const feeds = await Promise.all( | ||
options.feedUrls.map(async (feedUrl) => ({ | ||
feedUrl, | ||
feed: (await extract(feedUrl, { | ||
getExtraEntryFields: (feedEntry) => { | ||
const media = attemptGetImageFromEntry(feedUrl, feedEntry); | ||
if (!media) { | ||
return {}; | ||
} | ||
return { | ||
enclosure: media, | ||
}; | ||
}, | ||
})) as ExtendedFeedData, | ||
})), | ||
); | ||
|
||
const channel = createItemChannel<RssFeed[]>(item.id); | ||
await channel.publishAndUpdateLastStateAsync(feeds); | ||
} | ||
}); | ||
|
||
const attemptGetImageFromEntry = (feedUrl: string, entry: object) => { | ||
const media = getFirstMediaProperty(entry); | ||
if (media !== null) { | ||
return media; | ||
} | ||
return getImageFromStringAsFallback(feedUrl, JSON.stringify(entry)); | ||
}; | ||
|
||
const getImageFromStringAsFallback = (feedUrl: string, content: string) => { | ||
const regex = /https?:\/\/\S+?\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff)/i; | ||
const result = regex.exec(content); | ||
|
||
if (result == null) { | ||
return null; | ||
} | ||
|
||
console.debug( | ||
`Falling back to regex image search for '${feedUrl}'. Found ${result.length} matches in content: ${content}`, | ||
); | ||
return result[0]; | ||
}; | ||
|
||
const mediaProperties = [ | ||
{ | ||
path: ["enclosure", "@_url"], | ||
}, | ||
{ | ||
path: ["media:content", "@_url"], | ||
}, | ||
]; | ||
|
||
/** | ||
* The RSS and Atom standards are poorly adhered to in most of the web. | ||
* We want to show pretty background images on the posts and therefore need to extract | ||
* the enclosure (aka. media images). This function uses the dynamic properties defined above | ||
* to search through the possible paths and detect valid image URLs. | ||
* @param feedObject The object to scan for. | ||
* @returns the value of the first path that is found within the object | ||
*/ | ||
const getFirstMediaProperty = (feedObject: object) => { | ||
for (const mediaProperty of mediaProperties) { | ||
let propertyIndex = 0; | ||
let objectAtPath: object = feedObject; | ||
while (propertyIndex < mediaProperty.path.length) { | ||
const key = mediaProperty.path[propertyIndex]; | ||
if (key === undefined) { | ||
break; | ||
} | ||
const propertyEntries = Object.entries(objectAtPath); | ||
const propertyEntry = propertyEntries.find(([entryKey]) => entryKey === key); | ||
if (!propertyEntry) { | ||
break; | ||
const itemOptions = rssItems.map((item) => SuperJSON.parse<WidgetComponentProps<"rssFeed">["options"]>(item.options)); | ||
|
||
for (const option of itemOptions) { | ||
const maxAmountPosts = typeof option.maximumAmountPosts === "number" ? option.maximumAmountPosts : 100; | ||
for (const url of option.feedUrls) { | ||
try { | ||
const innerHandler = rssFeedsRequestHandler.handler({ | ||
url, | ||
count: maxAmountPosts, | ||
}); | ||
await innerHandler.getCachedOrUpdatedDataAsync({ | ||
forceUpdate: true, | ||
}); | ||
} catch (error) { | ||
logger.error("Failed to update RSS feed", { url, error }); | ||
} | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment | ||
const [_, propertyEntryValue] = propertyEntry; | ||
objectAtPath = propertyEntryValue as object; | ||
propertyIndex++; | ||
} | ||
|
||
const validationResult = z.string().url().safeParse(objectAtPath); | ||
if (!validationResult.success) { | ||
continue; | ||
} | ||
|
||
logger.debug(`Found an image in the feed entry: ${validationResult.data}`); | ||
return validationResult.data; | ||
} | ||
return null; | ||
}; | ||
|
||
/** | ||
* We extend the feed with custom properties. | ||
* This interface adds properties on top of the default ones. | ||
*/ | ||
interface ExtendedFeedEntry extends FeedEntry { | ||
enclosure?: string; | ||
} | ||
|
||
/** | ||
* We extend the feed with custom properties. | ||
* This interface omits the default entries with our custom definition. | ||
*/ | ||
type ExtendedFeedData = Modify< | ||
FeedData, | ||
{ | ||
entries?: ExtendedFeedEntry; | ||
} | ||
>; | ||
|
||
export interface RssFeed { | ||
feedUrl: string; | ||
feed: ExtendedFeedData; | ||
} | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import type { FeedData, FeedEntry } from "@extractus/feed-extractor"; | ||
import { extract } from "@extractus/feed-extractor"; | ||
import dayjs from "dayjs"; | ||
import { z } from "zod"; | ||
|
||
import type { Modify } from "@homarr/common/types"; | ||
import { logger } from "@homarr/log"; | ||
|
||
import { createCachedWidgetRequestHandler } from "./lib/cached-widget-request-handler"; | ||
|
||
export const rssFeedsRequestHandler = createCachedWidgetRequestHandler({ | ||
queryKey: "rssFeedList", | ||
widgetKind: "rssFeed", | ||
async requestAsync(input: { url: string; count: number }) { | ||
const result = (await extract(input.url, { | ||
getExtraEntryFields: (feedEntry) => { | ||
const media = attemptGetImageFromEntry(input.url, feedEntry); | ||
if (!media) { | ||
return {}; | ||
} | ||
return { | ||
enclosure: media, | ||
}; | ||
}, | ||
})) as ExtendedFeedData; | ||
|
||
return { | ||
...result, | ||
entries: result.entries?.slice(0, input.count) ?? [], | ||
}; | ||
}, | ||
cacheDuration: dayjs.duration(5, "minutes"), | ||
}); | ||
|
||
const attemptGetImageFromEntry = (feedUrl: string, entry: object) => { | ||
const media = getFirstMediaProperty(entry); | ||
if (media !== null) { | ||
return media; | ||
} | ||
return getImageFromStringAsFallback(feedUrl, JSON.stringify(entry)); | ||
}; | ||
|
||
const getImageFromStringAsFallback = (feedUrl: string, content: string) => { | ||
const regex = /https?:\/\/\S+?\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff)/i; | ||
const result = regex.exec(content); | ||
|
||
if (result == null) { | ||
return null; | ||
} | ||
|
||
console.debug( | ||
`Falling back to regex image search for '${feedUrl}'. Found ${result.length} matches in content: ${content}`, | ||
); | ||
return result[0]; | ||
}; | ||
|
||
const mediaProperties = [ | ||
{ | ||
path: ["enclosure", "@_url"], | ||
}, | ||
{ | ||
path: ["media:content", "@_url"], | ||
}, | ||
]; | ||
|
||
/** | ||
* The RSS and Atom standards are poorly adhered to in most of the web. | ||
* We want to show pretty background images on the posts and therefore need to extract | ||
* the enclosure (aka. media images). This function uses the dynamic properties defined above | ||
* to search through the possible paths and detect valid image URLs. | ||
* @param feedObject The object to scan for. | ||
* @returns the value of the first path that is found within the object | ||
*/ | ||
const getFirstMediaProperty = (feedObject: object) => { | ||
for (const mediaProperty of mediaProperties) { | ||
let propertyIndex = 0; | ||
let objectAtPath: object = feedObject; | ||
while (propertyIndex < mediaProperty.path.length) { | ||
const key = mediaProperty.path[propertyIndex]; | ||
if (key === undefined) { | ||
break; | ||
} | ||
const propertyEntries = Object.entries(objectAtPath); | ||
const propertyEntry = propertyEntries.find(([entryKey]) => entryKey === key); | ||
if (!propertyEntry) { | ||
break; | ||
} | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment | ||
const [_, propertyEntryValue] = propertyEntry; | ||
objectAtPath = propertyEntryValue as object; | ||
propertyIndex++; | ||
} | ||
|
||
const validationResult = z.string().url().safeParse(objectAtPath); | ||
if (!validationResult.success) { | ||
continue; | ||
} | ||
|
||
logger.debug(`Found an image in the feed entry: ${validationResult.data}`); | ||
return validationResult.data; | ||
} | ||
return null; | ||
}; | ||
|
||
/** | ||
* We extend the feed with custom properties. | ||
* This interface adds properties on top of the default ones. | ||
*/ | ||
interface ExtendedFeedEntry extends FeedEntry { | ||
enclosure?: string; | ||
} | ||
|
||
/** | ||
* We extend the feed with custom properties. | ||
* This interface omits the default entries with our custom definition. | ||
*/ | ||
type ExtendedFeedData = Modify< | ||
FeedData, | ||
{ | ||
entries?: ExtendedFeedEntry[]; | ||
} | ||
>; | ||
|
||
export interface RssFeed { | ||
feedUrl: string; | ||
feed: ExtendedFeedData; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.