-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
executable file
·80 lines (65 loc) · 2.67 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
const fs = require('fs');
const exec = require('util').promisify(require('child_process').exec);
const config = JSON.parse(fs.readFileSync('config.json'));
const key = JSON.parse(fs.readFileSync('key.json'));
const arweave = require('arweave').init({
host: config.host,
port: config.port,
protocol: 'https'
});
async function action(action, username, password, addr) {
return await exec(`curl -v -d 'action=${action}' -k -u ${username}:${password} --anyauth --location ${addr}engine/job/Permaloom`);
}
async function draftTxs() {
const tx = await arweave.createTransaction({data: fs.readFileSync(warcPath + queuedFile)}, key);
txs.push(tx);
fee += tx.reward;
fs.unlink(warc);
if (fee > config.maxFee) throw new Error('Fee exceeded limit');
return [txs, fee];
}
(async () => {
const warcPath = `${
__dirname
.substring(0, __dirname.lastIndexOf('/'))
}/jobs/Permaloom/latest/warcs`;
if (fs.existsSync(warcPath)) fs.rmSync(warcPath);
await exec(`$HERITRIX_HOME/bin/heritrix -a ${config.username}:${config.password}`);
await exec(`curl -v -d 'createpath=Permaloom&action=create' -k -u ${config.username}:${config.password} --anyauth --location \ ${config.addr}engine`);
await exec(`curl -v -T crawler-beans.cxml -k -u ${config.username}:${config.password} --anyauth --location ${config.addr}engine/job/Permaloom/jobdir/crawler-beans.cxml`);
const actions = ['teardown', 'build', 'launch', 'unpause'];
for (i of actions) {
if (i == 'build') {
output =
(await action('build', config.username, config.password, config.addr))
.stdout
.split('<jobLogTail>')[1]
.split('</jobLogTail>')[0]
if (output.includes('SEVERE')) throw new Error(`Heritrix Error: \n${output.replace('<value>', '').replace('</value>', '')}`)
//replace isn't working here for some reason
}
else await action(i, config.username, config.password, config.addr);
}
console.log('Job started');
let txs = [];
let fee = 0;
queuedFile = '';
const watcher = fs.watch(folderPath, { recursive: true }, (eventType, filename) => {
if (eventType === 'change') {
[txs, fee] = draftTxs();
queuedFile = filename;
}
crawlStatus =
(await exec(`curl -v -k -u ${config.username}:${config.password} --anyauth --location -H \'Accept: application/xml\' ${config.addr}engine/job/Permaloom`))
.stdout
?.split('<crawlControllerState>')[1]
?.split('</crawlControllerState>')[0]
if (crawlStatus === 'FINISHED') [txs, fee] = draftTxs();
});
console.log('Uploading transactions');
for (let tx of txs) {
const uploader = await arweave.transactions.getUploader(await arweave.transactions.sign(tx, key));
while (!uploader.isComplete) await uploader.uploadChunk();
}
console.log('WARC archived');
})();