diff --git a/.gitignore b/.gitignore index c6bba59..c3b695d 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,6 @@ dist .yarn/build-state.yml .yarn/install-state.gz .pnp.* + +StateMarketDeals.json +generated/deals.json diff --git a/README.md b/README.md index fe997ab..e3242b3 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,17 @@ node scripts/fetch-ldn-clients.js ``` The output is committed to git, see [./generated/ldn-clients.csv](./generated/ldn-clients.csv) + +### Parse storage deals from StateMarketDetals.json + +1. Download the snapshot of StateMarketDeals from Glif: https://marketdeals.s3.amazonaws.com/StateMarketDeals.json.zst + +2. Decompress the file and save it to project's root dir as `StateMarketDeals.json` + +3. Run + + ```sh + node scripts/parse-market-deals.js + ``` + +The output is NOT committed to git, you can find it in `./generated/deals.json` diff --git a/package-lock.json b/package-lock.json index 49788b1..3250679 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,38 @@ "name": "fil-deal-ingester", "version": "0.0.1", "license": "(Apache-2.0 AND MIT)", + "dependencies": { + "JSONStream": "^1.3.5" + }, "devDependencies": {} + }, + "node_modules/jsonparse": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/jsonparse/-/jsonparse-1.3.1.tgz", + "integrity": "sha512-POQXvpdL69+CluYsillJ7SUhKvytYjW9vG/GKpnf+xP8UWgYEM/RaMzHHofbALDiKbbP1W8UEYmgGl39WkPZsg==", + "engines": [ + "node >= 0.2.0" + ] + }, + "node_modules/JSONStream": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/JSONStream/-/JSONStream-1.3.5.tgz", + "integrity": "sha512-E+iruNOY8VV9s4JEbe1aNEm6MiszPRr/UfcHMz0TQh1BXSxHK+ASV1R6W4HpjBhSeS+54PIsAMCBmwD06LLsqQ==", + "dependencies": { + "jsonparse": "^1.2.0", + "through": ">=2.2.7 <3" + }, + "bin": { + "JSONStream": "bin.js" + }, + "engines": { + "node": "*" + } + }, + "node_modules/through": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", + "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" } } } diff --git a/package.json b/package.json index 66e311a..15c9219 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,6 @@ }, "homepage": "https://github.com/filecoin-station/fil-deal-ingester#readme", "dependencies": { - }, - "devDependencies": { + "JSONStream": "^1.3.5" } } diff --git a/scripts/parse-market-deals.js b/scripts/parse-market-deals.js index 35e2633..e7d21de 100644 --- a/scripts/parse-market-deals.js +++ b/scripts/parse-market-deals.js @@ -6,10 +6,15 @@ import { fileURLToPath } from 'node:url' import JSONStream from 'JSONStream' import { once } from 'node:events' +// See https://docs.filecoin.io/networks/mainnet#genesis +const GENESIS_TS = new Date('2020-08-24T22:00:00Z').getTime() +const BLOCK_TIME = 30_000; // 30 seconds + const ldnClients = await loadLdnClients() -const outfile = resolve(dirname(fileURLToPath(import.meta.url)), '../generated/deals.ndjson') -const outstream = createWriteStream(outfile, 'utf-8') +const outfile = resolve(dirname(fileURLToPath(import.meta.url)), '../generated/deals.json') +const outstream = JSONStream.stringify('[\n ', ',\n ', '\n]\n') +outstream.pipe(createWriteStream(outfile, 'utf-8')) const infile = resolve(dirname(fileURLToPath(import.meta.url)), '../StateMarketDeals.json') await pipeline( @@ -18,13 +23,12 @@ await pipeline( async function * (source, { signal }) { for await (const deal of source) { signal.throwIfAborted() - parseDeal(deal) + await processDeal(deal) } } ) outstream.end() -await once(outstream, 'end') console.log('LDN deals were written to %s', relative(process.cwd(), outfile)) /** @param {{ @@ -43,11 +47,29 @@ console.log('LDN deals were written to %s', relative(process.cwd(), outfile)) ClientCollateral: string; }} deal */ -function parseDeal (deal) { +async function processDeal (deal) { if (!deal.VerifiedDeal) return - if (!deal.Label || !deal.Label.match(/^(bafy|Qm)/)) return + + // Skip deals that expire in the next 6 weeks + const expires = deal.EndEpoch * BLOCK_TIME + GENESIS_TS + const afterSixWeeks = Date.now() + 6 * 7 /* days/week */ * 24 /* hours/day */ * 3600_000 + if (expires < afterSixWeeks) return + + // Skip deals that are not part of FIL+ LDN if (!ldnClients.has(deal.Client)) return - console.log(deal) + + // Skip deals that don't have payload CID metadata + // TODO: handle other CID formats + if (!deal.Label || !deal.Label.match(/^(bafy|Qm)/)) return + + const entry = { + provider: deal.Provider, + pieceCID: deal.PieceCID['/'], + payloadCID: deal.Label, + } + if (!outstream.write(entry)) { + await once(outstream, 'drain') + } } async function loadLdnClients () {