-
-
Notifications
You must be signed in to change notification settings - Fork 74
/
dump.ts
202 lines (178 loc) · 7.09 KB
/
dump.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import urlParser from 'url'
import * as pathParser from 'path'
import async from 'async'
import * as logger from '../Logger.js'
import axios from 'axios'
import Downloader from '../Downloader.js'
import RedisStore from '../RedisStore.js'
import { getFullUrl, jsPath, cssPath } from './index.js'
import { config } from '../config.js'
import MediaWiki from '../MediaWiki.js'
import { ZimCreator, ZimArticle } from '@openzim/libzim'
import { Dump } from '../Dump.js'
import fs from 'fs'
import { DO_PROPAGATION, ALL_READY_FUNCTION, WEBP_HANDLER_URL, LOAD_PHP, RULE_TO_REDIRECT } from './const.js'
import * as path from 'path'
import { fileURLToPath } from 'url'
import urlHelper from './url.helper.js'
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
export async function getAndProcessStylesheets(downloader: Downloader, links: Array<string | DominoElement>) {
let finalCss = ''
const { filesToDownloadXPath } = RedisStore
const stylesheetQueue = async.queue(async (link: string | DominoElement, finished) => {
const cssUrl = typeof link === 'object' ? getFullUrl(link.getAttribute('href'), MediaWiki.baseUrl) : link
const linkMedia = typeof link === 'object' ? link.getAttribute('media') : null
try {
/* link might be a 'link' DOM node or an URL */
if (cssUrl && !cssUrl.match('^data')) {
const cssUrlRegexp = new RegExp('url\\([\'"]{0,1}(.+?)[\'"]{0,1}\\)', 'gi')
logger.info(`Downloading CSS from ${decodeURI(cssUrl)}`)
const { content } = await downloader.downloadContent(cssUrl)
const body = content.toString()
let rewrittenCss = `\n/* start ${cssUrl} */\n\n`
rewrittenCss += linkMedia ? `@media ${linkMedia} {\n` : '\n'
rewrittenCss += `${body}\n`
rewrittenCss += linkMedia ? `} /* @media ${linkMedia} */\n` : '\n'
rewrittenCss += `\n/* end ${cssUrl} */\n`
/* Downloading CSS dependencies */
let match
// tslint:disable-next-line:no-conditional-assignment
while ((match = cssUrlRegexp.exec(body))) {
let url = match[1]
/* Avoid 'data', so no URL dependency */
if (!url.match('^data')) {
const filePathname = urlParser.parse(url, false, true).pathname
if (filePathname) {
const filename = pathParser.basename(filePathname).replace(/-.*x./, '.')
/* Rewrite the CSS */
rewrittenCss = rewrittenCss.replace(url, filename)
/* Need a rewrite if url doesn't include protocol */
url = getFullUrl(url, cssUrl)
url = url.indexOf('%') < 0 ? encodeURI(url) : url
/* Download CSS dependency, but avoid duplicate calls */
if (!downloader.cssDependenceUrls.hasOwnProperty(url) && filename) {
downloader.cssDependenceUrls[url] = true
filesToDownloadXPath.set(config.output.dirs.mediawiki + '/' + filename, { url: urlHelper.serializeUrl(url), namespace: '-' })
}
} else {
logger.warn(`Skipping CSS [url(${url})] because the pathname could not be found [${filePathname}]`)
}
}
}
finalCss += rewrittenCss
finished()
}
} catch (err) {
logger.warn(`Failed to get CSS from [${cssUrl}]`)
finished()
}
}, Number(downloader.speed))
stylesheetQueue.push(links)
return new Promise((resolve: any) => {
stylesheetQueue.drain(resolve)
}).then(() => {
return {
finalCss,
}
})
}
export async function downloadAndSaveModule(zimCreator: ZimCreator, downloader: Downloader, dump: Dump, module: string, type: 'js' | 'css') {
const replaceCodeByRegex = (sourceText, replaceMap: Map<RegExp, string>) => {
let text: string
replaceMap.forEach((textToReplace, regEx) => {
text = sourceText.replace(regEx, textToReplace)
})
return text
}
// the function hackStartupModule changes startup script by returning true for all modules so that load.php is not called.
// it also removes requestIdleCallback as in our case window is idle after all script tags are called but those script tags
// will require the functions which would have been loaded by doPropagation.
function hackStartUpModule(jsCode: string) {
if ((!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) && !LOAD_PHP.test(jsCode)) {
throw new Error('unable to hack startup module')
}
return replaceCodeByRegex(
jsCode,
new Map([
[DO_PROPAGATION, 'doPropagation();'],
[ALL_READY_FUNCTION, 'function allReady( modules ) { return true;'],
[LOAD_PHP, 'script.src ="";'],
]),
)
}
let apiParameterOnly
let moduleApiUrl: string
if (type === 'js') {
apiParameterOnly = 'scripts'
} else if (type === 'css') {
apiParameterOnly = 'styles'
}
if (!module.includes('javascript/mobile') && !module.includes('css/mobile')) {
moduleApiUrl = encodeURI(`${MediaWiki.modulePath}debug=true&lang=en&modules=${module}&only=${apiParameterOnly}&skin=vector&version=&*`)
} else {
moduleApiUrl = encodeURI(`https:${module}`)
}
logger.info(`Getting [${type}] module [${moduleApiUrl}]`)
const { content } = await downloader.downloadContent(moduleApiUrl)
let text = content.toString()
if (type === 'js') {
switch (module) {
case 'startap':
text = hackStartUpModule(text)
break
case 'mediawiki.page.ready':
text = replaceCodeByRegex(text, new Map([[RULE_TO_REDIRECT, 'false']]))
break
}
}
try {
let articleId
const pathFunctions = {
js: jsPath,
css: cssPath,
}
const pathFunction = pathFunctions[type]
if (pathFunction) {
articleId = pathFunction(module, config.output.dirs.mediawiki)
}
const article = new ZimArticle({ url: articleId, data: text, ns: '-' })
zimCreator.addArticle(article)
logger.info(`Saved module [${module}]`)
} catch (e) {
logger.error(`Failed to get module with url [${moduleApiUrl}]\nYou may need to specify a custom --mwModulePath`, e)
throw e
}
}
// URLs should be kept the same as Kiwix JS relies on it.
export async function importPolyfillModules(zimCreator: ZimCreator) {
;[
{ name: 'webpHeroPolyfill', path: path.join(__dirname, '../../node_modules/webp-hero/dist-cjs/polyfills.js') },
{ name: 'webpHeroBundle', path: path.join(__dirname, '../../node_modules/webp-hero/dist-cjs/webp-hero.bundle.js') },
].forEach(({ name, path }) => {
const article = new ZimArticle({
url: jsPath(name),
data: fs.readFileSync(path, 'utf8').toString(),
ns: '-',
})
zimCreator.addArticle(article)
})
const content = await axios
.get(WEBP_HANDLER_URL, {
responseType: 'arraybuffer',
timeout: 60000,
validateStatus(status) {
return [200, 302, 304].indexOf(status) > -1
},
})
.then((a) => a.data)
.catch((err) => {
throw new Error(`Failed to download webpHandler from [${WEBP_HANDLER_URL}]: ${err}`)
})
const article = new ZimArticle({
url: jsPath('webpHandler'),
data: content,
ns: '-',
})
zimCreator.addArticle(article)
}