Skip to content

Commit

Permalink
fail healthcheck when batch processing gets stuck
Browse files Browse the repository at this point in the history
Issue: BB-526
  • Loading branch information
Kerkesni committed Oct 14, 2024
1 parent 52d39d7 commit 9d1905c
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const constants = {
statusUndefined: 'UNDEFINED',
statusNotReady: 'NOT_READY',
statusNotConnected: 'NOT_CONNECTED',
statusTimedOut: 'TIMED_OUT',
authTypeAssumeRole: 'assumeRole',
authTypeAccount: 'account',
authTypeService: 'service',
Expand Down
8 changes: 8 additions & 0 deletions lib/queuePopulator/LogReader.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class LogReader {
this._zkMetricsHandler = params.zkMetricsHandler;

this._batchTimeoutSeconds = parseInt(process.env.BATCH_TIMEOUT_SECONDS, 10) || 300;
this._batchTimedOut = false;
}

_setEntryBatch(entryBatch) {
Expand Down Expand Up @@ -300,6 +301,9 @@ class LogReader {
this.log.error('queue populator batch timeout', {
logStats: batchState.logStats,
});
this._batchTimedOut = true;
// S3C doesn't currently support restarts on healthcheck failure,
// so we just crash for now.
process.emit('SIGTERM');
}, this._batchTimeoutSeconds * 1000);
async.waterfall([
Expand Down Expand Up @@ -738,6 +742,10 @@ class LogReader {
});
return statuses;
}

batchProcessTimedOut() {
return this._batchTimedOut;
}
}

module.exports = LogReader;
6 changes: 6 additions & 0 deletions lib/queuePopulator/QueuePopulator.js
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,12 @@ class QueuePopulator {
});
}
});
if (reader.batchProcessTimedOut()) {
responses.push({
component: 'log reader',
status: constants.statusTimedOut,
});
}
});

log.debug('verbose liveness', verboseLiveness);
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/QueuePopulator.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ describe('QueuePopulator', () => {
const mockLogReader = sinon.spy();
mockLogReader.getProducerStatus = sinon.fake(() => prodStatus);
mockLogReader.getLogInfo = sinon.fake(() => logInfo);
mockLogReader.batchProcessTimedOut = sinon.fake(() => false);
qp.logReaders = [
mockLogReader,
];
Expand All @@ -72,6 +73,7 @@ describe('QueuePopulator', () => {
};
mockLogReader.getProducerStatus = sinon.fake(() => prodStatus);
mockLogReader.getLogInfo = sinon.fake(() => logInfo);
mockLogReader.batchProcessTimedOut = sinon.fake(() => false);
qp.logReaders = [
mockLogReader,
];
Expand All @@ -91,5 +93,31 @@ describe('QueuePopulator', () => {
])
);
});

it('returns proper details when batch process timed out', () => {
const mockLogReader = sinon.spy();
mockLogReader.getProducerStatus = sinon.fake(() => ({
topicA: true,
}));
mockLogReader.getLogInfo = sinon.fake(() => {});
mockLogReader.batchProcessTimedOut = sinon.fake(() => true);
qp.logReaders = [
mockLogReader,
];
qp.zkClient = {
getState: () => zookeeper.State.SYNC_CONNECTED,
};
qp.handleLiveness(mockRes, mockLog);
sinon.assert.calledOnceWithExactly(mockRes.writeHead, 500);
sinon.assert.calledOnceWithExactly(
mockRes.end,
JSON.stringify([
{
component: 'log reader',
status: constants.statusTimedOut,
},
])
);
});
});
});
2 changes: 2 additions & 0 deletions tests/unit/lib/queuePopulator/LogReader.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ describe('LogReader', () => {
logReader.processLogEntries({}, () => {});
setTimeout(() => {
assert.strictEqual(emmitted, true);
assert.strictEqual(logReader.batchProcessTimedOut(), true);
done();
}, 2000);
}).timeout(4000);
Expand All @@ -325,6 +326,7 @@ describe('LogReader', () => {
});
logReader.processLogEntries({}, () => {
assert.strictEqual(emmitted, false);
assert.strictEqual(logReader.batchProcessTimedOut(), false);
done();
});
});
Expand Down

0 comments on commit 9d1905c

Please sign in to comment.