From 7042f26b3d0af33ef86451b68b74683da78f1552 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 3 Nov 2024 19:08:00 +0000 Subject: [PATCH] fix: Adopt pocket's new export format. Fixes #570 --- apps/web/components/settings/ImportExport.tsx | 2 +- apps/web/lib/importBookmarkParser.ts | 39 +++++++++---------- apps/web/package.json | 2 + pnpm-lock.yaml | 20 ++++++++++ 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/apps/web/components/settings/ImportExport.tsx b/apps/web/components/settings/ImportExport.tsx index 1145a42d..b6fa6e03 100644 --- a/apps/web/components/settings/ImportExport.tsx +++ b/apps/web/components/settings/ImportExport.tsx @@ -213,7 +213,7 @@ export function ImportExportRow() { diff --git a/apps/web/lib/importBookmarkParser.ts b/apps/web/lib/importBookmarkParser.ts index 45be3004..3262b170 100644 --- a/apps/web/lib/importBookmarkParser.ts +++ b/apps/web/lib/importBookmarkParser.ts @@ -1,5 +1,6 @@ // Copied from https://gist.github.com/devster31/4e8c6548fd16ffb75c02e6f24e27f9b9 import * as cheerio from "cheerio"; +import { parse } from "csv-parse/sync"; import { BookmarkTypes } from "@hoarder/shared/types/bookmarks"; @@ -54,28 +55,24 @@ export async function parsePocketBookmarkFile( ): Promise { const textContent = await file.text(); - const $ = cheerio.load(textContent); + const records = parse(textContent, { + columns: true, + skip_empty_lines: true, + }) as { + title: string; + url: string; + time_added: string; + tags: string; + }[]; - return $("a") - .map(function (_index, a) { - const $a = $(a); - const addDate = $a.attr("time_added"); - let tags: string[] = []; - const tagsStr = $a.attr("tags"); - try { - tags = tagsStr && tagsStr.length > 0 ? tagsStr.split(",") : []; - } catch (e) { - /* empty */ - } - const url = $a.attr("href"); - return { - title: $a.text(), - content: url ? { type: BookmarkTypes.LINK as const, url } : undefined, - tags, - addDate: typeof addDate === "undefined" ? undefined : parseInt(addDate), - }; - }) - .get(); + return records.map((record) => { + return { + title: record.title, + content: { type: BookmarkTypes.LINK as const, url: record.url }, + tags: record.tags.length > 0 ? record.tags.split("|") : [], + addDate: parseInt(record.time_added), + }; + }); } export async function parseHoarderBookmarkFile( diff --git a/apps/web/package.json b/apps/web/package.json index cbc01a50..849f434a 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -47,6 +47,7 @@ "cheerio": "^1.0.0", "class-variance-authority": "^0.7.0", "clsx": "^2.1.0", + "csv-parse": "^5.5.6", "dayjs": "^1.11.10", "drizzle-orm": "^0.33.0", "fastest-levenshtein": "^1.0.16", @@ -80,6 +81,7 @@ "@hoarder/prettier-config": "workspace:^0.1.0", "@hoarder/tailwind-config": "workspace:^0.1.0", "@hoarder/tsconfig": "workspace:^0.1.0", + "@types/csv-parse": "^1.2.5", "@types/emoji-mart": "^3.0.14", "@types/react": "^18.2.55", "@types/react-dom": "^18.2.19", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ab0ac31b..2dac5a29 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -556,6 +556,9 @@ importers: clsx: specifier: ^2.1.0 version: 2.1.0 + csv-parse: + specifier: ^5.5.6 + version: 5.5.6 dayjs: specifier: ^1.11.10 version: 1.11.10 @@ -650,6 +653,9 @@ importers: '@hoarder/tsconfig': specifier: workspace:^0.1.0 version: link:../../tooling/typescript + '@types/csv-parse': + specifier: ^1.2.5 + version: 1.2.5 '@types/emoji-mart': specifier: ^3.0.14 version: 3.0.14 @@ -4237,6 +4243,10 @@ packages: '@types/cookie@0.6.0': resolution: {integrity: sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==} + '@types/csv-parse@1.2.5': + resolution: {integrity: sha512-3PoFyWeuFGqale09vFydLQ6IGdvD+mizcXcB8s6ImWv+830IF0HckvewgcGVfGnTFImqvfvhpYZYod2QqGGGdg==} + deprecated: This is a stub types definition. csv-parse provides its own type definitions, so you do not need this installed. + '@types/debug@4.1.12': resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} @@ -5905,6 +5915,9 @@ packages: csstype@3.1.3: resolution: {integrity: sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==} + csv-parse@5.5.6: + resolution: {integrity: sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==} + dag-map@1.0.2: resolution: {integrity: sha512-+LSAiGFwQ9dRnRdOeaj7g47ZFJcOUPukAP8J3A3fuZ1g9Y44BG+P1sgApjLXTQPOzC4+7S9Wr8kXsfpINM4jpw==} @@ -18604,6 +18617,11 @@ snapshots: '@types/cookie@0.6.0': dev: false + '@types/csv-parse@1.2.5': + dependencies: + csv-parse: 5.5.6 + dev: true + '@types/debug@4.1.12': dependencies: '@types/ms': 0.7.34 @@ -20951,6 +20969,8 @@ snapshots: csstype@3.1.3: {} + csv-parse@5.5.6: {} + dag-map@1.0.2: dev: false