@@ -11,7 +11,9 @@ const {
1111 CompatibilityErrorMessages,
1212 severityToLogLevel,
1313 checkAllowedKeys,
14- logLevel
14+ logLevel,
15+ Lock,
16+ acquireOrLog
1517} = require ( './_common' ) ;
1618const { Buffer } = require ( 'buffer' ) ;
1719
@@ -91,6 +93,15 @@ class Consumer {
9193 */
9294 #logger = new DefaultLogger ( ) ;
9395
96+ /**
97+ * A lock for consuming and disconnecting.
98+ * This lock should be held whenever we want to change the state from CONNECTED to any state other than CONNECTED.
99+ * In practical terms, this lock is held whenever we're consuming a message, or disconnecting.
100+ * We set the timeout to 5 seconds, after which we log an error, but keep trying to acquire the lock.
101+ * @type {Lock }
102+ */
103+ #lock = new Lock ( { timeout : 5000 } ) ;
104+
94105 /**
95106 * @constructor
96107 * @param {import("../../types/kafkajs").ConsumerConfig } kJSConfig
@@ -575,21 +586,21 @@ class Consumer {
575586 /* Internal polling loop. It accepts the same config object that `run` accepts. */
576587 async #runInternal( config ) {
577588 while ( this . #state === ConsumerState . CONNECTED ) {
589+
590+ /* We need to acquire a lock here, because we need to ensure that we don't
591+ * disconnect while in the middle of processing a message. */
592+ if ( ! ( await acquireOrLog ( this . #lock, this . #logger) ) )
593+ continue ;
594+
578595 const m = await this . #consumeSingle( ) . catch ( e => {
579596 /* Since this error cannot be exposed to the user in the current situation, just log and retry.
580597 * This is due to restartOnFailure being set to always true. */
581598 if ( this . #logger)
582599 this . #logger. error ( `Consumer encountered error while consuming. Retrying. Error details: ${ JSON . stringify ( e ) } ` ) ;
583600 } ) ;
584601
585- /* It's possible for the state to change as we await something.
586- * Unfortunately, we have no alternative but to break and drop the message, if it exists.
587- * TODO: fix this, possibly with a flag in disconnect that waits until the run loop winds down.
588- */
589- if ( this . #state !== ConsumerState . CONNECTED )
590- break ;
591-
592602 if ( ! m ) {
603+ this . #lock. release ( ) ;
593604 continue ;
594605 }
595606
@@ -608,27 +619,15 @@ class Consumer {
608619 const invalidateMessage = await this . #seekInternal( { topic : m . topic , partition : m . partition } ) ;
609620 if ( invalidateMessage ) {
610621 /* Don't pass this message on to the user if this topic partition was seeked to. */
622+ this . #lock. release ( ) ;
611623 continue ;
612624 }
613-
614- /* It's possible for the state to change as we await something.
615- * Unfortunately, we have no alternative but to break and drop the message.
616- * TODO: fix this, possibly with a flag in disconnect that waits until the run loop winds down.
617- */
618- if ( this . #state !== ConsumerState . CONNECTED )
619- break ;
620625 }
621626
622627 try {
623628 await config . eachMessage (
624629 this . #createPayload( m )
625630 )
626-
627- /* It's possible for the state to change as we await something.
628- * Unfortunately, we have no alternative but to break without taking any action that the user might need.
629- */
630- if ( this . #state !== ConsumerState . CONNECTED )
631- break ;
632631 } catch ( e ) {
633632 /* It's not only possible, but expected that an error will be thrown by eachMessage.
634633 * This is especially true since the pattern of pause() followed by throwing an error
@@ -645,7 +644,7 @@ class Consumer {
645644 /* Force a immediate seek here. It's possible that there are no more messages to be passed to the user,
646645 * but the user seeked in the call to eachMessage, or else we encountered the error catch block.
647646 * In that case, the results of that seek will never be reflected unless we do this. */
648- if ( this . #checkPendingSeeks && this . #state === ConsumerState . CONNECTED )
647+ if ( this . #checkPendingSeeks)
649648 await this . #seekInternal( ) ;
650649
651650 /* TODO: another check we need to do here is to see how kafkaJS is handling
@@ -654,13 +653,8 @@ class Consumer {
654653 * inside this function.
655654 */
656655
657- /* Yield for a bit to allow other scheduled tasks on the event loop to run.
658- * For instance, if disconnect() is called during/after we await eachMessage, and
659- * we don't await anything else after that, this loop will run despite needing to
660- * disconnect.
661- * It's better than any pending tasks be processed here, while we've processed one
662- * message completely, rather than between message processing. */
663- await new Promise ( ( resolve ) => setTimeout ( resolve , 0 ) ) ;
656+ /* Release the lock so that any pending disconnect can go through. */
657+ await this . #lock. release ( ) ;
664658 }
665659 }
666660
@@ -952,7 +946,13 @@ class Consumer {
952946 return ;
953947 }
954948
949+ while ( ! ( await acquireOrLog ( this . #lock, this . #logger) ) ) ; /* Just retry... */
950+
955951 this . #state = ConsumerState . DISCONNECTING ;
952+
953+ /* Since there are state-checks before everything, we are safe to proceed without the lock. */
954+ await this . #lock. release ( ) ;
955+
956956 await new Promise ( ( resolve , reject ) => {
957957 const cb = ( err ) => {
958958 if ( err ) {
0 commit comments