Skip to content

Commit

Permalink
refactor: extract assets into their own database table. #215 (#220)
Browse files Browse the repository at this point in the history
* Allow downloading more content from a webpage and index it #215
added a new table that contains the information about assets for link bookmarks
created migration code that transfers the existing data into the new table

* Allow downloading more content from a webpage and index it #215
removed the old asset columns from the database
updated the UI to use the data from the linkBookmarkAssets array

* generalize the assets table to not be linked in particular to links

* fix migrations post merge

* fix missing asset ids in the getBookmarks call

---------

Co-authored-by: MohamedBassem <me@mbassem.com>
  • Loading branch information
kamtschatka and MohamedBassem authored Jun 23, 2024
1 parent 0f54a18 commit 9ce6958
Show file tree
Hide file tree
Showing 6 changed files with 1,271 additions and 52 deletions.
100 changes: 71 additions & 29 deletions apps/workers/crawlerWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ import { withTimeout } from "utils";

import type { ZCrawlLinkRequest } from "@hoarder/shared/queues";
import { db } from "@hoarder/db";
import { bookmarkAssets, bookmarkLinks, bookmarks } from "@hoarder/db/schema";
import {
assets,
AssetTypes,
bookmarkAssets,
bookmarkLinks,
bookmarks,
} from "@hoarder/db/schema";
import {
ASSET_TYPES,
deleteAsset,
Expand Down Expand Up @@ -194,7 +200,10 @@ async function changeBookmarkStatus(
async function getBookmarkDetails(bookmarkId: string) {
const bookmark = await db.query.bookmarks.findFirst({
where: eq(bookmarks.id, bookmarkId),
with: { link: true },
with: {
link: true,
assets: true,
},
});

if (!bookmark || !bookmark.link) {
Expand All @@ -203,9 +212,15 @@ async function getBookmarkDetails(bookmarkId: string) {
return {
url: bookmark.link.url,
userId: bookmark.userId,
screenshotAssetId: bookmark.link.screenshotAssetId,
imageAssetId: bookmark.link.imageAssetId,
fullPageArchiveAssetId: bookmark.link.fullPageArchiveAssetId,
screenshotAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_SCREENSHOT,
)?.id,
imageAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_BANNER_IMAGE,
)?.id,
fullPageArchiveAssetId: bookmark.assets.find(
(a) => a.assetType == AssetTypes.LINK_FULL_PAGE_ARCHIVE,
)?.id,
};
}

Expand Down Expand Up @@ -490,9 +505,9 @@ async function crawlAndParseUrl(
userId: string,
jobId: string,
bookmarkId: string,
oldScreenshotAssetId: string | null,
oldImageAssetId: string | null,
oldFullPageArchiveAssetId: string | null,
oldScreenshotAssetId: string | undefined,
oldImageAssetId: string | undefined,
oldFullPageArchiveAssetId: string | undefined,
) {
const {
htmlContent,
Expand All @@ -511,20 +526,42 @@ async function crawlAndParseUrl(
}

// TODO(important): Restrict the size of content to store
await db
.update(bookmarkLinks)
.set({
title: meta.title,
description: meta.description,
imageUrl: meta.image,
favicon: meta.logo,
content: readableContent?.textContent,
htmlContent: readableContent?.content,
screenshotAssetId,
imageAssetId,
crawledAt: new Date(),
})
.where(eq(bookmarkLinks.id, bookmarkId));
await db.transaction(async (txn) => {
await txn
.update(bookmarkLinks)
.set({
title: meta.title,
description: meta.description,
imageUrl: meta.image,
favicon: meta.logo,
content: readableContent?.textContent,
htmlContent: readableContent?.content,
crawledAt: new Date(),
})
.where(eq(bookmarkLinks.id, bookmarkId));

if (screenshotAssetId) {
if (oldScreenshotAssetId) {
await txn.delete(assets).where(eq(assets.id, oldScreenshotAssetId));
}
await txn.insert(assets).values({
id: screenshotAssetId,
assetType: AssetTypes.LINK_SCREENSHOT,
bookmarkId,
});
}

if (imageAssetId) {
if (oldImageAssetId) {
await txn.delete(assets).where(eq(assets.id, oldImageAssetId));
}
await txn.insert(assets).values({
id: imageAssetId,
assetType: AssetTypes.LINK_BANNER_IMAGE,
bookmarkId,
});
}
});

// Delete the old assets if any
await Promise.all([
Expand All @@ -545,13 +582,18 @@ async function crawlAndParseUrl(
jobId,
);

await db
.update(bookmarkLinks)
.set({
fullPageArchiveAssetId,
})
.where(eq(bookmarkLinks.id, bookmarkId));

await db.transaction(async (txn) => {
if (oldFullPageArchiveAssetId) {
await txn
.delete(assets)
.where(eq(assets.id, oldFullPageArchiveAssetId));
}
await txn.insert(assets).values({
id: fullPageArchiveAssetId,
assetType: AssetTypes.LINK_FULL_PAGE_ARCHIVE,
bookmarkId,
});
});
if (oldFullPageArchiveAssetId) {
deleteAsset({ userId, assetId: oldFullPageArchiveAssetId }).catch(
() => ({}),
Expand Down
31 changes: 31 additions & 0 deletions packages/db/drizzle/0024_premium_hammerhead.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
CREATE TABLE `assets` (
`id` text PRIMARY KEY NOT NULL,
`assetType` text NOT NULL,
`bookmarkId` text NOT NULL,
FOREIGN KEY (`bookmarkId`) REFERENCES `bookmarks`(`id`) ON UPDATE no action ON DELETE cascade
);
--> statement-breakpoint
CREATE INDEX `assets_bookmarkId_idx` ON `assets` (`bookmarkId`);
--> statement-breakpoint
CREATE INDEX `assets_assetType_idx` ON `assets` (`assetType`);
--> statement-breakpoint
INSERT INTO `assets` (`id`, `assetType`, `bookmarkId`)
SELECT `screenshotAssetId`, 'linkScreenshot', `id`
FROM `bookmarkLinks`
WHERE screenshotAssetId IS NOT NULL;
--> statement-breakpoint
INSERT INTO `assets` (`id`, `assetType`, `bookmarkId`)
SELECT `fullPageArchiveAssetId`, 'linkFullPageArchive', `id`
FROM `bookmarkLinks`
WHERE `fullPageArchiveAssetId` IS NOT NULL;
--> statement-breakpoint
INSERT INTO `assets` (`id`, `assetType`, `bookmarkId`)
SELECT `imageAssetId`, 'linkBannerImage', `id`
FROM `bookmarkLinks`
WHERE `imageAssetId` IS NOT NULL;
--> statement-breakpoint
ALTER TABLE `bookmarkLinks` DROP COLUMN `screenshotAssetId`;
--> statement-breakpoint
ALTER TABLE `bookmarkLinks` DROP COLUMN `fullPageArchiveAssetId`;
--> statement-breakpoint
ALTER TABLE `bookmarkLinks` DROP COLUMN `imageAssetId`;
Loading

0 comments on commit 9ce6958

Please sign in to comment.