-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat: Dns Reconciler queue * fix: init * fix: comments * improvements * small fixes * fix: error during testing Credit: @humphd
- Loading branch information
dadolhay
authored
Mar 27, 2023
1 parent
b612baa
commit ca739e0
Showing
12 changed files
with
274 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import { StateEnumType } from '@prisma/client'; | ||
|
||
import { prisma } from '~/db.server'; | ||
|
||
import type { SystemState } from '@prisma/client'; | ||
|
||
/** | ||
* The SystemState represents global (i.e., cross-instance), shared settings among Starchart instance(s). | ||
* Initially, this includes info on whether or not the DNS Reconciler needs to be run at the next opportunity. | ||
* We use this global SystemState to make sure that we stop hitting the Route53 API when no changes were made | ||
* to our Records table | ||
*/ | ||
|
||
function initialize() { | ||
return prisma.systemState.create({ | ||
data: { | ||
unique: StateEnumType.unique, | ||
reconciliationNeeded: true, | ||
}, | ||
}); | ||
} | ||
|
||
export function getIsReconciliationNeeded(): Promise<SystemState['reconciliationNeeded']> { | ||
return prisma.systemState | ||
.findUnique({ | ||
select: { reconciliationNeeded: true }, | ||
where: { unique: StateEnumType.unique }, | ||
}) | ||
.then((data) => data?.reconciliationNeeded ?? true); | ||
} | ||
|
||
export function setIsReconciliationNeeded( | ||
reconciliationNeeded: SystemState['reconciliationNeeded'] | ||
) { | ||
try { | ||
return prisma.systemState.update({ | ||
data: { reconciliationNeeded }, | ||
where: { unique: StateEnumType.unique }, | ||
}); | ||
} catch (error) { | ||
/** | ||
* This should never happen, as the table should always be seeded. | ||
* In case it isn't, let's seed it here Next queue run will set the | ||
* correct reconciliationNeeded | ||
*/ | ||
|
||
return initialize(); | ||
} | ||
} |
2 changes: 1 addition & 1 deletion
2
.../DnsDbCompareStructureGenerator.server.ts → .../DnsDbCompareStructureGenerator.server.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import logger from '~/lib/logger.server'; | ||
import { reconcilerQueue } from './reconciler-worker.server'; | ||
|
||
declare global { | ||
var __reconciler_queue_init__: boolean; | ||
} | ||
|
||
reconcilerQueue.on('error', (err) => { | ||
logger.error('Reconciler encountered an error', err); | ||
}); | ||
|
||
// function to add jobs | ||
export const addReconcilerJob = async () => { | ||
if (process.env.NODE_ENV !== 'production' && global.__reconciler_queue_init__) { | ||
// Only do this setup once if in dev | ||
return; | ||
} | ||
|
||
global.__reconciler_queue_init__ = true; | ||
|
||
logger.info('Starting DNS reconciler queue'); | ||
|
||
const jobName = `reconciler-scheduler`; | ||
|
||
try { | ||
// Remove all previously existing repeatable jobs | ||
// This is important because multiple repeatable jobs can exist and they persist | ||
// within redis (even with the same key) | ||
const repeatableJobs = await reconcilerQueue.getRepeatableJobs(); | ||
await Promise.all(repeatableJobs.map(({ key }) => reconcilerQueue.removeRepeatableByKey(key))); | ||
|
||
await reconcilerQueue.add( | ||
jobName, | ||
{}, | ||
{ | ||
repeatJobKey: jobName, | ||
repeat: { every: 2 * 60 * 1000 }, | ||
} | ||
); | ||
} catch (err) { | ||
logger.error(`Failed to start reconciler queue: ${err}`); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
import { Worker, Queue } from 'bullmq'; | ||
import { redis } from '~/lib/redis.server'; | ||
import { executeChangeSet } from '~/lib/dns.server'; | ||
import logger from '~/lib/logger.server'; | ||
import DnsDbCompareStructureGenerator from './DnsDbCompareStructureGenerator.server'; | ||
import Route53CompareStructureGenerator from './Route53CompareStructureGenerator.server'; | ||
import { | ||
createRemovedChangeSetFromCompareStructures, | ||
createUpsertedChangeSetFromCompareStructures, | ||
} from './createChangeSetFromCompareStructures.server'; | ||
import { getIsReconciliationNeeded, setIsReconciliationNeeded } from '~/models/system-state.server'; | ||
import type { Change } from '@aws-sdk/client-route-53'; | ||
|
||
// S3 limit for a ChangeSet | ||
const CHANGE_SET_MAX_SIZE = 1000; | ||
const reconcilerQueueName = 'reconciler'; | ||
|
||
// Queue initialization | ||
export const reconcilerQueue = new Queue(reconcilerQueueName, { | ||
connection: redis, | ||
}); | ||
|
||
const createChangeSet = async (): Promise<Change[]> => { | ||
const [dbStructure, route53Structure] = await Promise.all([ | ||
new DnsDbCompareStructureGenerator().generate(), | ||
new Route53CompareStructureGenerator().generate(), | ||
]); | ||
|
||
const changeSet = [ | ||
...createRemovedChangeSetFromCompareStructures({ dbStructure, route53Structure }), | ||
...createUpsertedChangeSetFromCompareStructures({ dbStructure, route53Structure }), | ||
]; | ||
|
||
return changeSet; | ||
}; | ||
|
||
/** | ||
* NORMAL MODE | ||
* | ||
* Execute the complete changeSet at once | ||
*/ | ||
const pushChangesBulk = async (changeSet: Change[]): Promise<boolean> => { | ||
const recordSetsToPush = Math.min(CHANGE_SET_MAX_SIZE, changeSet.length); | ||
|
||
logger.debug( | ||
`Reconciler NORMAL MODE - Reconciler intends to push the following ${recordSetsToPush} changes`, | ||
{ | ||
changeSet, | ||
} | ||
); | ||
|
||
await executeChangeSet(changeSet.slice(0, CHANGE_SET_MAX_SIZE)); | ||
|
||
// Return boolean => Is additional reconciliation needed | ||
return changeSet.length > CHANGE_SET_MAX_SIZE; | ||
}; | ||
|
||
/** | ||
* LIMP MODE | ||
* | ||
* Try each change in the set one by one, isolate the offending one | ||
*/ | ||
const pushChangesIndividually = async (changeSet: Change[]) => { | ||
for (const change of changeSet) { | ||
try { | ||
logger.debug(`Reconciler LIMP MODE - Reconciler intends to push the following change`, { | ||
change, | ||
}); | ||
|
||
await executeChangeSet([change]); | ||
} catch (error) { | ||
logger.error(`Reconciler LIMP MODE - the following single change failed`, { | ||
change, | ||
error, | ||
}); | ||
} | ||
} | ||
}; | ||
|
||
const reconcilerWorker = new Worker( | ||
reconcilerQueueName, | ||
async () => { | ||
/** | ||
* When a BullMQ worker is added, it will behave as single | ||
* threaded ... will only execute one job at a time. | ||
* But, if we use multiple instances, i.e. our docker swarm, | ||
* we have one worker per instance, taking jobs from the queue. | ||
* | ||
* When you add a repeat job to a queue, it is a special thing, | ||
* not a regular job. It causes the queue system to keep adding | ||
* delayed `regular` jobs when the repeat pattern/integer | ||
* dictates it. | ||
* | ||
* In theory, it would be possible, that a job is running long | ||
* (more than our job repeat time), so the second swarm | ||
* node would pick up the next scheduled job, causing the system | ||
* to run two reconcilers in parallel. | ||
* | ||
* For this reason, I'm asking the BullMQ system ... tell me | ||
* how many active jobs are there (this includes the current | ||
* job too, that we are in right now). If the answer is > 1, | ||
* it means that there was a pre-existing job already running, | ||
* when we were started ==> we must exit to inhibit concurrency | ||
*/ | ||
|
||
const activeJobs = (await reconcilerQueue.getJobs('active')) | ||
// BullMQ bug, sometimes I get an array element with `undefined`, that should not be possible | ||
.filter((v) => !!v); | ||
|
||
if (activeJobs.length > 1) { | ||
logger.debug('Reconciler - Inhibiting concurrent run'); | ||
return; | ||
} | ||
|
||
// Only run if reconciler was explicitly requested | ||
if (!(await getIsReconciliationNeeded())) { | ||
logger.debug('Reconciler - skipping current job, reconciler not needed.'); | ||
return; | ||
} | ||
|
||
const changeSet = await createChangeSet(); | ||
|
||
if (!changeSet.length) { | ||
logger.debug('Reconciler - found no changes to be pushed'); | ||
await setIsReconciliationNeeded(false); | ||
return; | ||
} | ||
|
||
// We are defaulting to true, if everything fails, the queue will retry in 2 mins | ||
let isAdditionalReconciliationNeeded = true; | ||
try { | ||
// First, we try to bulk push all the cahnges at once. | ||
isAdditionalReconciliationNeeded = await pushChangesBulk(changeSet); | ||
} catch (error) { | ||
// If that fails, we switch to limp mode, that pushes changes one by one | ||
// This way we can pinpoint the offending change in the set | ||
logger.error('Reconciler - Change set failed, switching to limp mode', { error }); | ||
|
||
await pushChangesIndividually(changeSet); | ||
} | ||
|
||
/** | ||
* Update system state | ||
* | ||
* If changeSet is < CHANGE_SET_MAX_SIZE elements, then dns data that has been altered | ||
* have now been reconciled | ||
*/ | ||
await setIsReconciliationNeeded(isAdditionalReconciliationNeeded); | ||
logger.debug('Reconciler - job complete', { isAdditionalReconciliationNeeded }); | ||
}, | ||
{ connection: redis } | ||
); | ||
|
||
process.on('SIGINT', () => reconcilerWorker.close()); |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters