This repository has been archived by the owner on Jun 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add back stats, remove spark specific code (#9)
* add back stats, remove spark specific code * fix db name
- Loading branch information
1 parent
53179b3
commit e33e814
Showing
18 changed files
with
511 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import { DATABASE_URL } from '../lib/config.js' | ||
import { migrateWithPgConfig } from '../lib/migrate.js' | ||
|
||
await migrateWithPgConfig({ connectionString: DATABASE_URL }) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import pg from 'pg' | ||
import Postgrator from 'postgrator' | ||
import { fileURLToPath } from 'node:url' | ||
import { dirname, join } from 'node:path' | ||
|
||
const migrationsDirectory = join( | ||
dirname(fileURLToPath(import.meta.url)), | ||
'..', | ||
'migrations' | ||
) | ||
|
||
/** | ||
*@param {pg.Config} pgConfig | ||
*/ | ||
export const migrateWithPgConfig = async (pgConfig) => { | ||
const client = new pg.Client(pgConfig) | ||
await client.connect() | ||
try { | ||
await migrateWithPgClient(client) | ||
} finally { | ||
await client.end() | ||
} | ||
} | ||
|
||
/** | ||
* @param {pg.Client} client | ||
*/ | ||
export const migrateWithPgClient = async (client) => { | ||
const postgrator = new Postgrator({ | ||
migrationPattern: join(migrationsDirectory, '*'), | ||
driver: 'pg', | ||
execQuery: (query) => client.query(query) | ||
}) | ||
console.log( | ||
'Migrating DB schema from version %s to version %s', | ||
await postgrator.getDatabaseVersion(), | ||
await postgrator.getMaxVersion() | ||
) | ||
|
||
await postgrator.migrate() | ||
|
||
console.log('Migrated DB schema to version', await postgrator.getDatabaseVersion()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import assert from 'node:assert' | ||
import createDebug from 'debug' | ||
|
||
const debug = createDebug('voyager:public-stats') | ||
|
||
/** | ||
* @param {object} args | ||
* @param {import('./typings').CreatePgClient} args.createPgClient | ||
* @param {import('./preprocess').Measurement[]} args.honestMeasurements | ||
*/ | ||
export const updatePublicStats = async ({ createPgClient, honestMeasurements }) => { | ||
const retrievalStats = { total: 0, successful: 0 } | ||
const participants = new Set() | ||
for (const m of honestMeasurements) { | ||
retrievalStats.total++ | ||
if (m.retrievalResult === 'OK') retrievalStats.successful++ | ||
|
||
participants.add(m.participantAddress) | ||
} | ||
|
||
const pgClient = await createPgClient() | ||
try { | ||
await updateRetrievalStats(pgClient, retrievalStats) | ||
await updateDailyParticipants(pgClient, participants) | ||
} finally { | ||
await pgClient.end() | ||
} | ||
} | ||
|
||
/** | ||
* @param {import('pg').Client} pgClient | ||
* @param {object} stats | ||
* @param {number} stats.total | ||
* @param {number} stats.successful | ||
*/ | ||
const updateRetrievalStats = async (pgClient, { total, successful }) => { | ||
debug('Updating public retrieval stats: total += %s successful += %s', total, successful) | ||
await pgClient.query(` | ||
INSERT INTO retrieval_stats | ||
(day, total, successful) | ||
VALUES | ||
(now(), $1, $2) | ||
ON CONFLICT(day) DO UPDATE SET | ||
total = retrieval_stats.total + $1, | ||
successful = retrieval_stats.successful + $2 | ||
`, [ | ||
total, | ||
successful | ||
]) | ||
} | ||
|
||
/** | ||
* @param {import('pg').Client} pgClient | ||
* @param {Set<string>} participants | ||
*/ | ||
export const updateDailyParticipants = async (pgClient, participants) => { | ||
debug('Updating daily participants, count=%s', participants.size) | ||
const ids = await mapParticipantsToIds(pgClient, participants) | ||
await pgClient.query(` | ||
INSERT INTO daily_participants (day, participant_id) | ||
SELECT now() as day, UNNEST($1::INT[]) AS participant_id | ||
ON CONFLICT DO NOTHING | ||
`, [ | ||
ids | ||
]) | ||
} | ||
|
||
/** | ||
* @param {import('pg').Client} pgClient | ||
* @param {Set<string>} participantsSet | ||
* @returns {Promise<string[]>} A list of participant ids. The order of ids is not defined. | ||
*/ | ||
export const mapParticipantsToIds = async (pgClient, participantsSet) => { | ||
debug('Mapping participants to id, count=%s', participantsSet.size) | ||
|
||
/** @type {string[]} */ | ||
const ids = [] | ||
|
||
// TODO: We can further optimise performance of this function by using | ||
// an in-memory LRU cache. Our network has currently ~2k participants, | ||
// we need ~50 bytes for each (address, id) pair, that's only ~100KB of data. | ||
|
||
// TODO: passing the entire list of participants as a single query parameter | ||
// will probably not scale beyond several thousands of addresses. We will | ||
// need to rework the queries to split large arrays into smaller batches. | ||
|
||
// In most rounds, we have already seen most of the participant addresses | ||
// If we use "INSERT...ON CONFLICT", then PG increments id counter even for | ||
// existing addresses where we end up skipping the insert. This could quickly | ||
// exhaust the space of all 32bit integers. | ||
// Solution: query the table for know records before running the insert. | ||
// | ||
// Caveat: In my testing, this query was not able to leverage the (unique) | ||
// index on participants.participant_address and performed a full table scan | ||
// after the array grew past ~10 items. If this becomes a problem, we can | ||
// introduce the LRU cache mentioned above. | ||
const { rows: found } = await pgClient.query( | ||
'SELECT * FROM participants WHERE participant_address = ANY($1::TEXT[])', | ||
[Array.from(participantsSet.values())] | ||
) | ||
debug('Known participants count=%s', found.length) | ||
|
||
// eslint-disable-next-line camelcase | ||
for (const { id, participant_address } of found) { | ||
ids.push(id) | ||
participantsSet.delete(participant_address) | ||
} | ||
|
||
debug('New participant addresses count=%s', participantsSet.size) | ||
|
||
// Register the new addresses. Use "INSERT...ON CONFLICT" to handle the race condition | ||
// where another client may have registered these addresses between our previous | ||
// SELECT query and the next INSERT query. | ||
const newAddresses = Array.from(participantsSet.values()) | ||
debug('Registering new participant addresses, count=%s', newAddresses.length) | ||
const { rows: created } = await pgClient.query(` | ||
INSERT INTO participants (participant_address) | ||
SELECT UNNEST($1::TEXT[]) AS participant_address | ||
ON CONFLICT(participant_address) DO UPDATE | ||
-- this no-op update is needed to populate "RETURNING id" | ||
SET participant_address = EXCLUDED.participant_address | ||
RETURNING id | ||
`, [ | ||
newAddresses | ||
]) | ||
|
||
assert.strictEqual(created.length, newAddresses.length) | ||
return ids.concat(created.map(r => r.id)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.