From 51a931d9a3dcfa2b87572fe8a937fb40aabb31a5 Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Fri, 17 May 2024 09:29:38 +0800 Subject: [PATCH 1/2] feat(utils): revise stripHtml to support line break replacement --- package-lock.json | 4 +- package.json | 2 +- .../__snapshots__/utils.test.js.snap | 100 ++++++------------ src/__tests__/makeHomepage.test.ts | 34 +++--- src/utils/index.ts | 32 ++++-- 5 files changed, 78 insertions(+), 94 deletions(-) diff --git a/package-lock.json b/package-lock.json index 47a7068..29f00e1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@matters/ipns-site-generator", - "version": "0.1.5", + "version": "0.1.6", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@matters/ipns-site-generator", - "version": "0.1.4-rc.2", + "version": "0.1.6", "license": "Apache-2.0", "dependencies": { "@matters/slugify": "^0.7.3", diff --git a/package.json b/package.json index 060cc0f..e3d1825 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@matters/ipns-site-generator", - "version": "0.1.5", + "version": "0.1.6", "description": "IPNS site generator for matters.town", "author": "https://github.com/thematters ", "homepage": "https://github.com/thematters/ipns-site-generator", diff --git a/src/__tests__/__snapshots__/utils.test.js.snap b/src/__tests__/__snapshots__/utils.test.js.snap index 31199db..47ce697 100644 --- a/src/__tests__/__snapshots__/utils.test.js.snap +++ b/src/__tests__/__snapshots__/utils.test.js.snap @@ -1,79 +1,39 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP exports[`utils "makeSummary" can produce summary text from HTML 1`] = ` -" - - - Reprehenderit proident sit consectetur id consequat officia. +"Reprehenderit proident sit consectetur id consequat officia. Duis ea voluptate cupidatat ad - Elit consequat labore tempor Lore..." + Elit consequat labore tempor Lorem voluptate …" `; exports[`utils "stripHtml" can generate clean text from HTML 1`] = ` -" - - - Reprehenderit proident sit consectetur id consequat officia. - - - Duis ea voluptate cupidatat ad - Elit consequat labore tempor Lorem voluptate occaecat nostrud laborum minim. Cillum veniam ea cupidatat nulla commodo sunt amet magna amet sit culpa nulla deserunt reprehenderit duis. Et nostrud sunt ad cupidatat laboris. Reprehenderit dolor dolore elit voluptate ex. Fugiat in in officia non eiusmod irure et. Velit ut aliquip ipsum exercitation exercitation nisi voluptate enim amet exercitation. Et consectetur ex nisi anim id consequat eiusmod veniam ipsum ullamco nulla deserunt nostrud. - - - - - - - Nulla in fugiat labore ad. - - - Cupidatat amet fugiat culpa id - Et ut dolore dolore ex. Deserunt adipisicing id dolor eiusmod minim ea. Pariatur veniam velit ad culpa nisi sit. Non nostrud irure nulla pariatur ipsum irure fugiat anim id Lorem duis. Ullamco incididunt ex ullamco elit. Amet voluptate minim laborum anim duis aliquip officia enim Lorem mollit aliquip laboris. Mollit pariatur sunt pariatur occaecat deserunt esse . Est eu ut elit id nisi duis id magna commodo ex et id sint laboris . - - - Enim aliqua est proident commodo dolor incididunt eiusmod. Anim anim eu pariatur aliqua qui. Sit non commodo enim ut aute officia eu. Adipisicing proident eu velit id proident. - - - Voluptate officia adipisicing voluptate amet dolore ad tempor aliquip reprehenderit Lorem. Ad dolor id minim occaecat ea non nulla. Ullamco exercitation consectetur duis tempor incididunt qui id. Sunt voluptate qui ex do Lorem consectetur laborum mollit culpa sunt anim occaecat esse. Velit deserunt eiusmod deserunt. Anim ullamco ad minim velit nulla aliquip culpa consequat laboris quis ad Lorem pariatur. Occaecat sunt irure reprehenderit. - - query { - article( - input: { mediaHash: \\"zdpuAxP6uSfum74VS3pYmzBR9xvPbrBcX3J8BPpB3xdRGjVsX\\" } - ) { - id - title - summary - } +"Reprehenderit proident sit consectetur id consequat officia. + Duis ea voluptate cupidatat ad +Elit consequat labore tempor Lorem voluptate occaecat nostrud laborum minim. Cillum veniam ea cupidatat nulla commodo sunt amet magna amet sit culpa nulla deserunt reprehenderit duis. Et nostrud sunt ad cupidatat laboris. Reprehenderit dolor dolore elit voluptate ex. Fugiat in in officia non eiusmod irure et. Velit ut aliquip ipsum exercitation exercitation nisi voluptate enim amet exercitation. Et consectetur ex nisi anim id consequat eiusmod veniam ipsum ullamco nulla deserunt nostrud. + Nulla in fugiat labore ad. + Cupidatat amet fugiat culpa id +Et ut dolore dolore ex. Deserunt adipisicing id dolor eiusmod minim ea. Pariatur veniam velit ad culpa nisi sit. Non nostrud irure nulla pariatur ipsum irure fugiat anim id Lorem duis. Ullamco incididunt ex ullamco elit. Amet voluptate minim laborum anim duis aliquip officia enim Lorem mollit aliquip laboris. Mollit pariatur sunt pariatur occaecat deserunt esse. Est eu ut elit id nisi duis id magna commodo ex et id sint laboris. + Enim aliqua est proident commodo dolor incididunt eiusmod. Anim anim eu pariatur aliqua qui. Sit non commodo enim ut aute officia eu. Adipisicing proident eu velit id proident. +Voluptate officia adipisicing voluptate amet dolore ad tempor aliquip reprehenderit Lorem. Ad dolor id minim occaecat ea non nulla. Ullamco exercitation consectetur duis tempor incididunt qui id. Sunt voluptate qui ex do Lorem consectetur laborum mollit culpa sunt anim occaecat esse. Velit deserunt eiusmod deserunt. Anim ullamco ad minim velit nulla aliquip culpa consequat laboris quis ad Lorem pariatur. Occaecat sunt irure reprehenderit. + query { +  article( +    input: { mediaHash: \\"zdpuAxP6uSfum74VS3pYmzBR9xvPbrBcX3J8BPpB3xdRGjVsX\\" } +  ) { +    id +    title +    summary +  } } - - - Officia amet minim proident labore - Proident fugiat amet - Duis eiusmod mollit ipsum exercitation voluptate sit ullamco. - Labore aute ea irure - - - Adipisicing nisi deserunt velit proident nostrud et ipsum amet mollit. - - - Esse nostrud deserunt Lorem pariatur incididunt. - - - Non minim esse qui mollit consequat. - - - Exercitation dolor fugiat esse officia cupidatat anim. - - - Esse eu anim irure voluptate non laborum laborum dolore dolore. - - - - - Laboris et excepteur est adipisicing magna qui do sit eiusmod. - - - Qui aute voluptate - Labore dolor laboris anim. Laborum ut eiusmod et et minim duis aliquip deserunt laboris. - " + Officia amet minim proident labore + Proident fugiat amet +Duis eiusmod mollit ipsum exercitation voluptate sit ullamco. + Labore aute ea irure +Adipisicing nisi deserunt velit proident nostrud et ipsum amet mollit. +Esse nostrud deserunt Lorem pariatur incididunt. +Non minim esse qui mollit consequat. +Exercitation dolor fugiat esse officia cupidatat anim. +Esse eu anim irure voluptate non laborum laborum dolore dolore. +Laboris et excepteur est adipisicing magna qui do sit eiusmod. + Qui aute voluptate +Labore dolor laboris anim. Laborum ut eiusmod et et minim duis aliquip deserunt laboris." `; diff --git a/src/__tests__/makeHomepage.test.ts b/src/__tests__/makeHomepage.test.ts index 04b4a8a..6b2d7d9 100644 --- a/src/__tests__/makeHomepage.test.ts +++ b/src/__tests__/makeHomepage.test.ts @@ -1,5 +1,9 @@ import fetch from 'isomorphic-fetch' -import { makeHomepage, makeHomepageBundles, makeActivityPubBundles } from '../makeHomepage' +import { + makeHomepage, + makeHomepageBundles, + makeActivityPubBundles, +} from '../makeHomepage' import { MOCK_HOMEPAGE } from '../render/mock' jest.mock('isomorphic-fetch') @@ -24,18 +28,22 @@ describe('makeHomepage', () => { arrayBuffer: () => Promise.resolve(new ArrayBuffer(1)), }) - const bundles = await makeHomepageBundles( - MOCK_HOMEPAGE('matters.news') - ) + const bundles = await makeHomepageBundles(MOCK_HOMEPAGE('matters.news')) let html = '' - let xml = '' - let json = '' + let xml = '' + let json = '' for (const { path, content } of bundles) { switch (path) { - case 'index.html': html = content; break; - case 'rss.xml': xml = content; break; - case 'feed.json': json = content; break; + case 'index.html': + html = content + break + case 'rss.xml': + xml = content + break + case 'feed.json': + json = content + break } } expect(html).toMatchSnapshot() @@ -48,14 +56,14 @@ describe('makeHomepage', () => { arrayBuffer: () => Promise.resolve(new ArrayBuffer(1)), }) - const bundles = await makeActivityPubBundles( - MOCK_HOMEPAGE('matters.news') - ) + const bundles = await makeActivityPubBundles(MOCK_HOMEPAGE('matters.news')) let webfinger: string = '' for (const { path, content } of bundles) { switch (path) { - case '.well-known/webfinger': webfinger = content; break; + case '.well-known/webfinger': + webfinger = content + break } } expect(webfinger).toMatchSnapshot() diff --git a/src/utils/index.ts b/src/utils/index.ts index 3f4060f..593c123 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -16,14 +16,30 @@ export const cleanHTML = (html: string) => { } /** - * Strip html tags from html string to get text. + * Strip HTML tags from HTML string to get plain text. * @param html - html string - * @param replacement - string to replace tags + * @param tagReplacement - string to replace tags + * @param lineReplacement - string to replace tags + * + * @see {@url https://github.com/thematters/ipns-site-generator/blob/main/src/utils/index.ts} */ -export const stripHtml = (html: string, replacement = ' ') => - (String(html) || '') - .replace(/(<\/p>

| )/g, ' ') // replace line break and space first - .replace(/(<([^>]+)>)/gi, replacement) +export const stripHtml = ( + html: string, + tagReplacement = '', + lineReplacement = '\n' +) => { + html = String(html) || '' + + html = html.replace(/<(\/?p|\/?blockquote|br\/?)>/gi, lineReplacement) + + // Remove remaining HTML tags + let plainText = html.replace(/<\/?[^>]+(>|$)/g, tagReplacement) + + // Normalize multiple newlines and trim the result + plainText = plainText.replace(/\n\s*\n/g, '\n').trim() + + return plainText +} /** * Return beginning of text in html as summary, split on sentence break within buffer range. @@ -33,7 +49,7 @@ export const stripHtml = (html: string, replacement = ' ') => */ export const makeSummary = (html: string, length = 140, buffer = 20) => { // split on sentence breaks - const sections = stripHtml(html, '') + const sections = stripHtml(html, '', ' ') .replace(/([?!。?!]|(\.\s))\s*/g, '$1|') .split('|') @@ -44,7 +60,7 @@ export const makeSummary = (html: string, length = 140, buffer = 20) => { const addition = el.length + summary.length > length + buffer - ? `${el.substring(0, length - summary.length)}...` + ? `${el.substring(0, length - summary.length)}…` : el summary = summary.concat(addition) From e2a74440e92998461d9f0ed1ae397aa96cc3b689 Mon Sep 17 00:00:00 2001 From: robertu <4065233+robertu7@users.noreply.github.com> Date: Fri, 17 May 2024 09:38:41 +0800 Subject: [PATCH 2/2] feat: handle " " --- src/__tests__/__snapshots__/utils.test.js.snap | 14 +++++++------- src/utils/index.ts | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/__tests__/__snapshots__/utils.test.js.snap b/src/__tests__/__snapshots__/utils.test.js.snap index 47ce697..b520de8 100644 --- a/src/__tests__/__snapshots__/utils.test.js.snap +++ b/src/__tests__/__snapshots__/utils.test.js.snap @@ -16,13 +16,13 @@ Et ut dolore dolore ex. Deserunt adipisicing id dolor eiusmod minim ea. Pariatur Enim aliqua est proident commodo dolor incididunt eiusmod. Anim anim eu pariatur aliqua qui. Sit non commodo enim ut aute officia eu. Adipisicing proident eu velit id proident. Voluptate officia adipisicing voluptate amet dolore ad tempor aliquip reprehenderit Lorem. Ad dolor id minim occaecat ea non nulla. Ullamco exercitation consectetur duis tempor incididunt qui id. Sunt voluptate qui ex do Lorem consectetur laborum mollit culpa sunt anim occaecat esse. Velit deserunt eiusmod deserunt. Anim ullamco ad minim velit nulla aliquip culpa consequat laboris quis ad Lorem pariatur. Occaecat sunt irure reprehenderit. query { -  article( -    input: { mediaHash: \\"zdpuAxP6uSfum74VS3pYmzBR9xvPbrBcX3J8BPpB3xdRGjVsX\\" } -  ) { -    id -    title -    summary -  } + article( + input: { mediaHash: \\"zdpuAxP6uSfum74VS3pYmzBR9xvPbrBcX3J8BPpB3xdRGjVsX\\" } + ) { + id + title + summary + } } Officia amet minim proident labore Proident fugiat amet diff --git a/src/utils/index.ts b/src/utils/index.ts index 593c123..649aded 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -30,6 +30,9 @@ export const stripHtml = ( ) => { html = String(html) || '' + html = html.replace(/\ \;/g, ' ') + + // Replace block-level elements with newlines html = html.replace(/<(\/?p|\/?blockquote|br\/?)>/gi, lineReplacement) // Remove remaining HTML tags