Skip to content

Commit d12304c

Browse files
committed
feat: extract wikilink images
1 parent f4b4209 commit d12304c

File tree

4 files changed

+36
-7
lines changed

4 files changed

+36
-7
lines changed
+13-1
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,30 @@
11
import { ImgInfo } from '@/service/extract-img/get-replace-list'
22
import { RsText } from '@/wasm'
3+
import { FILTER_BYTE_OFFSET } from './find-img-link'
4+
import { escapeRegExp } from 'lodash'
35

46
export function applyReplaceList(
57
text: string,
68
replaceList: [src: ImgInfo, newLink: string][],
79
beforeEach: (newLink: string) => void
810
) {
11+
const rsCandidate = replaceList.filter(x => x[0].byteOffset !== FILTER_BYTE_OFFSET)
12+
913
// replace from end
10-
const sorted = replaceList.sort((a, b) => b[0].byteOffset - a[0].byteOffset)
14+
const sorted = rsCandidate.sort((a, b) => b[0].byteOffset - a[0].byteOffset)
1115
for (const [src, newLink] of sorted) {
1216
beforeEach(newLink)
1317
const start = src.byteOffset
1418
const end = src.byteOffset + Buffer.from(src.data).length
1519
text = RsText.replaceWithByteOffset(text, start, end, newLink)
1620
}
21+
22+
const tsCandidate = replaceList.filter(x => x[0].byteOffset === FILTER_BYTE_OFFSET)
23+
for (const [src, newLink] of tsCandidate) {
24+
const prefix = src.prefix ?? ''
25+
const regex = new RegExp(escapeRegExp(String.raw`${prefix}${src.data}`), 'g')
26+
text = text.replace(regex, prefix + newLink)
27+
}
28+
1729
return text
1830
}

src/service/extract-img/extract-img.ts

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ export async function extractImg(text: string, fileDir: string, inputImgSrc?: Im
6868
})
6969
)
7070
)
71+
7172
const replaceListLen = replaceList.length
7273

7374
return window.withProgress(

src/service/extract-img/find-img-link.ts

+21-6
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,23 @@ const imgTagDataUrlImgPat = r`(<img.*?src\s*=\s*")(${dataUrlPat})"[^/]*?\/?>`
1313
const mkdUrlImgPat = r`(!\[[^]]*\]\()([^) ]+).*?\)`
1414
const imgTagUrlImgPat = r`(<img\s*.*?src\s*=\s*["'])(.*?)["'][^>]*?>`
1515
const mkdDataUrlImgPat = r`(!\[.*?]\()(${dataUrlPat})\)`
16-
const cnbDomain = r`\.cnblogs\.com\/`
16+
const wikilinkImages = /!\[(\[.+?\])\][\s\S]+?(?<prefix>\1:\s*)(?<uri>.*?)\s+/g
17+
const exludeDomains = /\.cnblogs\.com/i
18+
const webUrlPrefix = /^https?:\/\//i
19+
20+
export const FILTER_BYTE_OFFSET = -9999
21+
22+
function getImagesWithTs(text: string) {
23+
return [...text.matchAll(wikilinkImages)].map(m => {
24+
const uri = m.groups?.uri ?? ''
25+
return <ImgInfo>{
26+
byteOffset: FILTER_BYTE_OFFSET,
27+
data: uri,
28+
src: webUrlPrefix.test(uri) ? ImgSrc.web : ImgSrc.fs,
29+
prefix: m.groups?.prefix,
30+
}
31+
})
32+
}
1733

1834
export function findImgLink(text: string): ImgInfo[] {
1935
const imgTagUrlImgMgs = RsRegex.matches(imgTagUrlImgPat, text) as RsMatch[]
@@ -24,7 +40,7 @@ export function findImgLink(text: string): ImgInfo[] {
2440
const byteOffset = mg.byte_offset + Buffer.from(prefix).length
2541

2642
let src
27-
if (/https?:\/\//.test(data)) src = ImgSrc.web
43+
if (webUrlPrefix.test(data)) src = ImgSrc.web
2844
else src = ImgSrc.fs
2945

3046
return <ImgInfo>{
@@ -48,8 +64,7 @@ export function findImgLink(text: string): ImgInfo[] {
4864
}
4965
})
5066

51-
const acc = urlImgInfo.concat(dataUrlImgInfo)
52-
53-
// keep links while not cnb
54-
return acc.filter(x => !RsRegex.isMatch(cnbDomain, x.data.toLowerCase()))
67+
let images = urlImgInfo.concat(dataUrlImgInfo)
68+
images = images.concat(getImagesWithTs(text))
69+
return images.filter(x => !exludeDomains.test(x.data))
5570
}

src/service/extract-img/get-replace-list.ts

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export type ImgInfo = {
1717
byteOffset: number
1818
data: string
1919
src: ImgSrc
20+
prefix: string | undefined
2021
}
2122

2223
export const enum ImgSrc {

0 commit comments

Comments
 (0)