Skip to content

Commit a24b2a7

Browse files
authored
More v1 run engine fixes (#1644)
* Remove the socket count from shared queue consumer cause it can take up to 5s to complete * Add support for re-using queue snapshots across dequeues * Only consider the top N orgs when dequeuing, to help mitigate large spikes in queues (like around the hour and half hour marks)
1 parent 6da5e7a commit a24b2a7

File tree

5 files changed

+336
-18
lines changed

5 files changed

+336
-18
lines changed

Diff for: apps/webapp/app/env.server.ts

+2
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ const EnvironmentSchema = z.object({
232232
MARQS_CONCURRENCY_LIMIT_BIAS: z.coerce.number().default(0.75),
233233
MARQS_AVAILABLE_CAPACITY_BIAS: z.coerce.number().default(0.3),
234234
MARQS_QUEUE_AGE_RANDOMIZATION_BIAS: z.coerce.number().default(0.25),
235+
MARQS_REUSE_SNAPSHOT_COUNT: z.coerce.number().int().default(0),
236+
MARQS_MAXIMUM_ORG_COUNT: z.coerce.number().int().optional(),
235237

236238
PROD_TASK_HEARTBEAT_INTERVAL_MS: z.coerce.number().int().optional(),
237239

Diff for: apps/webapp/app/v3/marqs/fairDequeuingStrategy.server.ts

+111-9
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ export type FairDequeuingStrategyOptions = {
4444
* If not provided, no biasing will be applied (completely random shuffling)
4545
*/
4646
biases?: FairDequeuingStrategyBiases;
47+
reuseSnapshotCount?: number;
48+
maximumOrgCount?: number;
4749
};
4850

4951
type FairQueueConcurrency = {
@@ -90,6 +92,10 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy {
9092
}>;
9193

9294
private _rng: seedrandom.PRNG;
95+
private _reusedSnapshotForConsumer: Map<
96+
string,
97+
{ snapshot: FairQueueSnapshot; reuseCount: number }
98+
> = new Map();
9399

94100
constructor(private options: FairDequeuingStrategyOptions) {
95101
const ctx = new DefaultStatefulContext();
@@ -310,16 +316,53 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy {
310316
span.setAttribute("consumer_id", consumerId);
311317
span.setAttribute("parent_queue", parentQueue);
312318

319+
if (
320+
typeof this.options.reuseSnapshotCount === "number" &&
321+
this.options.reuseSnapshotCount > 0
322+
) {
323+
const key = `${parentQueue}:${consumerId}`;
324+
const reusedSnapshot = this._reusedSnapshotForConsumer.get(key);
325+
326+
if (reusedSnapshot) {
327+
if (reusedSnapshot.reuseCount < this.options.reuseSnapshotCount) {
328+
span.setAttribute("reused_snapshot", true);
329+
330+
this._reusedSnapshotForConsumer.set(key, {
331+
snapshot: reusedSnapshot.snapshot,
332+
reuseCount: reusedSnapshot.reuseCount + 1,
333+
});
334+
335+
return reusedSnapshot.snapshot;
336+
} else {
337+
this._reusedSnapshotForConsumer.delete(key);
338+
}
339+
}
340+
}
341+
342+
span.setAttribute("reused_snapshot", false);
343+
313344
const now = Date.now();
314345

315-
const queues = await this.#allChildQueuesByScore(parentQueue, consumerId, now);
346+
let queues = await this.#allChildQueuesByScore(parentQueue, consumerId, now);
316347

317348
span.setAttribute("parent_queue_count", queues.length);
318349

319350
if (queues.length === 0) {
320351
return emptyFairQueueSnapshot;
321352
}
322353

354+
// Apply org selection if maximumOrgCount is specified
355+
let selectedOrgIds: Set<string>;
356+
if (this.options.maximumOrgCount && this.options.maximumOrgCount > 0) {
357+
selectedOrgIds = this.#selectTopOrgs(queues, this.options.maximumOrgCount);
358+
// Filter queues to only include selected orgs
359+
queues = queues.filter((queue) => selectedOrgIds.has(queue.org));
360+
361+
span.setAttribute("selected_org_count", selectedOrgIds.size);
362+
}
363+
364+
span.setAttribute("selected_queue_count", queues.length);
365+
323366
const orgIds = new Set<string>();
324367
const envIds = new Set<string>();
325368
const envIdToOrgId = new Map<string, string>();
@@ -341,10 +384,6 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy {
341384
(org) => org.concurrency.current >= org.concurrency.limit
342385
);
343386

344-
span.setAttributes({
345-
...flattenAttributes(orgsAtFullConcurrency, "orgs_at_full_concurrency"),
346-
});
347-
348387
const orgIdsAtFullConcurrency = new Set(orgsAtFullConcurrency.map((org) => org.id));
349388

350389
const orgsSnapshot = orgs.reduce((acc, org) => {
@@ -355,6 +394,12 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy {
355394
return acc;
356395
}, {} as Record<string, { concurrency: FairQueueConcurrency }>);
357396

397+
span.setAttributes({
398+
org_count: orgs.length,
399+
orgs_at_full_concurrency_count: orgsAtFullConcurrency.length,
400+
orgs_snapshot_count: Object.keys(orgsSnapshot).length,
401+
});
402+
358403
if (Object.keys(orgsSnapshot).length === 0) {
359404
return emptyFairQueueSnapshot;
360405
}
@@ -376,10 +421,6 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy {
376421
(env) => env.concurrency.current >= env.concurrency.limit
377422
);
378423

379-
span.setAttributes({
380-
...flattenAttributes(envsAtFullConcurrency, "envs_at_full_concurrency"),
381-
});
382-
383424
const envIdsAtFullConcurrency = new Set(envsAtFullConcurrency.map((env) => env.id));
384425

385426
const envsSnapshot = envs.reduce((acc, env) => {
@@ -390,6 +431,11 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy {
390431
return acc;
391432
}, {} as Record<string, { concurrency: FairQueueConcurrency }>);
392433

434+
span.setAttributes({
435+
env_count: envs.length,
436+
envs_at_full_concurrency_count: envsAtFullConcurrency.length,
437+
});
438+
393439
const queuesSnapshot = queues.filter(
394440
(queue) =>
395441
!orgIdsAtFullConcurrency.has(queue.org) && !envIdsAtFullConcurrency.has(queue.env)
@@ -402,10 +448,66 @@ export class FairDequeuingStrategy implements MarQSFairDequeueStrategy {
402448
queues: queuesSnapshot,
403449
};
404450

451+
if (
452+
typeof this.options.reuseSnapshotCount === "number" &&
453+
this.options.reuseSnapshotCount > 0
454+
) {
455+
this._reusedSnapshotForConsumer.set(`${parentQueue}:${consumerId}`, {
456+
snapshot,
457+
reuseCount: 0,
458+
});
459+
}
460+
405461
return snapshot;
406462
});
407463
}
408464

465+
#selectTopOrgs(queues: FairQueue[], maximumOrgCount: number): Set<string> {
466+
// Group queues by org
467+
const queuesByOrg = queues.reduce((acc, queue) => {
468+
if (!acc[queue.org]) {
469+
acc[queue.org] = [];
470+
}
471+
acc[queue.org].push(queue);
472+
return acc;
473+
}, {} as Record<string, FairQueue[]>);
474+
475+
// Calculate average age for each org
476+
const orgAverageAges = Object.entries(queuesByOrg).map(([orgId, orgQueues]) => {
477+
const averageAge = orgQueues.reduce((sum, q) => sum + q.age, 0) / orgQueues.length;
478+
return { orgId, averageAge };
479+
});
480+
481+
// Perform weighted shuffle based on average ages
482+
const maxAge = Math.max(...orgAverageAges.map((o) => o.averageAge));
483+
const weightedOrgs = orgAverageAges.map((org) => ({
484+
orgId: org.orgId,
485+
weight: org.averageAge / maxAge, // Normalize weights
486+
}));
487+
488+
// Select top N orgs using weighted shuffle
489+
const selectedOrgs = new Set<string>();
490+
let remainingOrgs = [...weightedOrgs];
491+
let totalWeight = remainingOrgs.reduce((sum, org) => sum + org.weight, 0);
492+
493+
while (selectedOrgs.size < maximumOrgCount && remainingOrgs.length > 0) {
494+
let random = this._rng() * totalWeight;
495+
let index = 0;
496+
497+
while (random > 0 && index < remainingOrgs.length) {
498+
random -= remainingOrgs[index].weight;
499+
index++;
500+
}
501+
index = Math.max(0, index - 1);
502+
503+
selectedOrgs.add(remainingOrgs[index].orgId);
504+
totalWeight -= remainingOrgs[index].weight;
505+
remainingOrgs.splice(index, 1);
506+
}
507+
508+
return selectedOrgs;
509+
}
510+
409511
async #getOrgConcurrency(orgId: string): Promise<FairQueueConcurrency> {
410512
return await startSpan(this.options.tracer, "getOrgConcurrency", async (span) => {
411513
span.setAttribute("org_id", orgId);

Diff for: apps/webapp/app/v3/marqs/index.server.ts

+2
Original file line numberDiff line numberDiff line change
@@ -1625,6 +1625,8 @@ function getMarQSClient() {
16251625
availableCapacityBias: env.MARQS_AVAILABLE_CAPACITY_BIAS,
16261626
queueAgeRandomization: env.MARQS_QUEUE_AGE_RANDOMIZATION_BIAS,
16271627
},
1628+
reuseSnapshotCount: env.MARQS_REUSE_SNAPSHOT_COUNT,
1629+
maximumOrgCount: env.MARQS_MAXIMUM_ORG_COUNT,
16281630
}),
16291631
envQueuePriorityStrategy: new FairDequeuingStrategy({
16301632
tracer: tracer,

Diff for: apps/webapp/app/v3/marqs/sharedQueueConsumer.server.ts

-9
Original file line numberDiff line numberDiff line change
@@ -1122,15 +1122,6 @@ export class SharedQueueConsumer {
11221122
"emitResumeAfterDependencyWithAck",
11231123
async (span) => {
11241124
try {
1125-
const sockets = await this.#startActiveSpan("getCoordinatorSockets", async (span) => {
1126-
const sockets = await socketIo.coordinatorNamespace.fetchSockets();
1127-
1128-
span.setAttribute("socket_count", sockets.length);
1129-
1130-
return sockets;
1131-
});
1132-
1133-
span.setAttribute("socket_count", sockets.length);
11341125
span.setAttribute("attempt_id", resumableAttempt.id);
11351126
span.setAttribute(
11361127
"timeout_in_ms",

0 commit comments

Comments
 (0)