diff --git a/src/service/extract-img/apply-replace-list.ts b/src/service/extract-img/apply-replace-list.ts index 2a36add5..1d3df963 100644 --- a/src/service/extract-img/apply-replace-list.ts +++ b/src/service/extract-img/apply-replace-list.ts @@ -1,18 +1,30 @@ import { ImgInfo } from '@/service/extract-img/get-replace-list' import { RsText } from '@/wasm' +import { FILTER_BYTE_OFFSET } from './find-img-link' +import { escapeRegExp } from 'lodash' export function applyReplaceList( text: string, replaceList: [src: ImgInfo, newLink: string][], beforeEach: (newLink: string) => void ) { + const rsCandidate = replaceList.filter(x => x[0].byteOffset !== FILTER_BYTE_OFFSET) + // replace from end - const sorted = replaceList.sort((a, b) => b[0].byteOffset - a[0].byteOffset) + const sorted = rsCandidate.sort((a, b) => b[0].byteOffset - a[0].byteOffset) for (const [src, newLink] of sorted) { beforeEach(newLink) const start = src.byteOffset const end = src.byteOffset + Buffer.from(src.data).length text = RsText.replaceWithByteOffset(text, start, end, newLink) } + + const tsCandidate = replaceList.filter(x => x[0].byteOffset === FILTER_BYTE_OFFSET) + for (const [src, newLink] of tsCandidate) { + const prefix = src.prefix ?? '' + const regex = new RegExp(escapeRegExp(String.raw`${prefix}${src.data}`), 'g') + text = text.replace(regex, prefix + newLink) + } + return text } diff --git a/src/service/extract-img/extract-img.ts b/src/service/extract-img/extract-img.ts index ec4ddf55..dd1aa355 100644 --- a/src/service/extract-img/extract-img.ts +++ b/src/service/extract-img/extract-img.ts @@ -68,6 +68,7 @@ export async function extractImg(text: string, fileDir: string, inputImgSrc?: Im }) ) ) + const replaceListLen = replaceList.length return window.withProgress( diff --git a/src/service/extract-img/find-img-link.ts b/src/service/extract-img/find-img-link.ts index acf3c065..7c2c8412 100644 --- a/src/service/extract-img/find-img-link.ts +++ b/src/service/extract-img/find-img-link.ts @@ -13,7 +13,23 @@ const imgTagDataUrlImgPat = r`(` const mkdUrlImgPat = r`(!\[[^]]*\]\()([^) ]+).*?\)` const imgTagUrlImgPat = r`(]*?>` const mkdDataUrlImgPat = r`(!\[.*?]\()(${dataUrlPat})\)` -const cnbDomain = r`\.cnblogs\.com\/` +const wikilinkImages = /!\[(\[.+?\])\][\s\S]+?(?\1:\s*)(?.*?)\s+/g +const exludeDomains = /\.cnblogs\.com/i +const webUrlPrefix = /^https?:\/\//i + +export const FILTER_BYTE_OFFSET = -9999 + +function getImagesWithTs(text: string) { + return [...text.matchAll(wikilinkImages)].map(m => { + const uri = m.groups?.uri ?? '' + return { + byteOffset: FILTER_BYTE_OFFSET, + data: uri, + src: webUrlPrefix.test(uri) ? ImgSrc.web : ImgSrc.fs, + prefix: m.groups?.prefix, + } + }) +} export function findImgLink(text: string): ImgInfo[] { const imgTagUrlImgMgs = RsRegex.matches(imgTagUrlImgPat, text) as RsMatch[] @@ -24,7 +40,7 @@ export function findImgLink(text: string): ImgInfo[] { const byteOffset = mg.byte_offset + Buffer.from(prefix).length let src - if (/https?:\/\//.test(data)) src = ImgSrc.web + if (webUrlPrefix.test(data)) src = ImgSrc.web else src = ImgSrc.fs return { @@ -48,8 +64,7 @@ export function findImgLink(text: string): ImgInfo[] { } }) - const acc = urlImgInfo.concat(dataUrlImgInfo) - - // keep links while not cnb - return acc.filter(x => !RsRegex.isMatch(cnbDomain, x.data.toLowerCase())) + let images = urlImgInfo.concat(dataUrlImgInfo) + images = images.concat(getImagesWithTs(text)) + return images.filter(x => !exludeDomains.test(x.data)) } diff --git a/src/service/extract-img/get-replace-list.ts b/src/service/extract-img/get-replace-list.ts index 6ee5e7a3..e7ddeb55 100644 --- a/src/service/extract-img/get-replace-list.ts +++ b/src/service/extract-img/get-replace-list.ts @@ -17,6 +17,7 @@ export type ImgInfo = { byteOffset: number data: string src: ImgSrc + prefix: string | undefined } export const enum ImgSrc {