@@ -418,7 +418,8 @@ impl HypervLinuxDriver {
418418
419419 let interrupt_handle = Arc :: new ( LinuxInterruptHandle {
420420 running : AtomicU64 :: new ( 0 ) ,
421- cancel_requested : AtomicBool :: new ( false ) ,
421+ cancel_requested : AtomicU64 :: new ( 0 ) ,
422+ call_active : AtomicBool :: new ( false ) ,
422423 #[ cfg( gdb) ]
423424 debug_interrupt : AtomicBool :: new ( false ) ,
424425 #[ cfg( all(
@@ -761,16 +762,13 @@ impl Hypervisor for HypervLinuxDriver {
761762 self . interrupt_handle
762763 . tid
763764 . store ( unsafe { libc:: pthread_self ( ) as u64 } , Ordering :: Relaxed ) ;
764- // Note: if a `InterruptHandle::kill()` called while this thread is **here**
765- // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
766- self . interrupt_handle
767- . set_running_and_increment_generation ( )
768- . map_err ( |e| {
769- new_error ! (
770- "Error setting running state and incrementing generation: {}" ,
771- e
772- )
773- } ) ?;
765+ // Note: if `InterruptHandle::kill()` is called while this thread is **here**
766+ // (after set_running_bit but before checking cancel_requested):
767+ // - kill() will stamp cancel_requested with the current generation
768+ // - We will check cancel_requested below and skip the VcpuFd::run() call
769+ // - This is the desired behavior - the kill takes effect immediately
770+ let generation = self . interrupt_handle . set_running_bit ( ) ;
771+
774772 #[ cfg( not( gdb) ) ]
775773 let debug_interrupt = false ;
776774 #[ cfg( gdb) ]
@@ -779,14 +777,16 @@ impl Hypervisor for HypervLinuxDriver {
779777 . debug_interrupt
780778 . load ( Ordering :: Relaxed ) ;
781779
782- // Don't run the vcpu if `cancel_requested` is true
780+ // Don't run the vcpu if `cancel_requested` is set for our generation
783781 //
784- // Note: if a `InterruptHandle::kill()` called while this thread is **here**
785- // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
782+ // Note: if `InterruptHandle::kill()` is called while this thread is **here**
783+ // (after checking cancel_requested but before vcpu.run()):
784+ // - kill() will stamp cancel_requested with the current generation
785+ // - We will proceed with vcpu.run(), but signals will be sent to interrupt it
786+ // - The vcpu will be interrupted and return EINTR (handled below)
786787 let exit_reason = if self
787788 . interrupt_handle
788- . cancel_requested
789- . load ( Ordering :: Relaxed )
789+ . is_cancel_requested_for_generation ( generation)
790790 || debug_interrupt
791791 {
792792 Err ( mshv_ioctls:: MshvError :: from ( libc:: EINTR ) )
@@ -799,11 +799,12 @@ impl Hypervisor for HypervLinuxDriver {
799799 Some ( hyperlight_guest_tracing:: invariant_tsc:: read_tsc ( ) ) ;
800800 self . trace_info . guest_start_epoch = Some ( std:: time:: Instant :: now ( ) ) ;
801801 }
802- // Note: if a `InterruptHandle::kill()` called while this thread is **here**
803- // Then the vcpu will run, but we will keep sending signals to this thread
804- // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
805- // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
806- // both of which are fine.
802+ // Note: if `InterruptHandle::kill()` is called while this thread is **here**
803+ // (during vcpu.run() execution):
804+ // - kill() stamps cancel_requested with the current generation
805+ // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly
806+ // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR
807+ // - We check cancel_requested below and return Cancelled if generation matches
807808 #[ cfg( mshv2) ]
808809 {
809810 let hv_message: hv_message = Default :: default ( ) ;
@@ -812,27 +813,32 @@ impl Hypervisor for HypervLinuxDriver {
812813 #[ cfg( mshv3) ]
813814 self . vcpu_fd . run ( )
814815 } ;
815- // Note: if a `InterruptHandle::kill()` called while this thread is **here**
816- // Then signals will be sent to this thread until `running` is set to false.
817- // This is fine since the signal handler is a no-op.
816+ // Note: if `InterruptHandle::kill()` is called while this thread is **here**
817+ // (after vcpu.run() returns but before clear_running_bit):
818+ // - kill() continues sending signals to this thread (running bit is still set)
819+ // - The signals are harmless (no-op handler), we just need to check cancel_requested
820+ // - We load cancel_requested below to determine if this run was cancelled
818821 let cancel_requested = self
819822 . interrupt_handle
820- . cancel_requested
821- . load ( Ordering :: Relaxed ) ;
823+ . is_cancel_requested_for_generation ( generation) ;
822824 #[ cfg( gdb) ]
823825 let debug_interrupt = self
824826 . interrupt_handle
825827 . debug_interrupt
826828 . load ( Ordering :: Relaxed ) ;
827- // Note: if a `InterruptHandle::kill()` called while this thread is **here**
828- // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
829- // Additionally signals will be sent to this thread until `running` is set to false.
830- // This is fine since the signal handler is a no-op.
829+ // Note: if `InterruptHandle::kill()` is called while this thread is **here**
830+ // (after loading cancel_requested but before clear_running_bit):
831+ // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded)
832+ // - kill() continues sending signals until running bit is cleared
833+ // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call
834+ // - Signals sent now are harmless (no-op handler)
831835 self . interrupt_handle . clear_running_bit ( ) ;
832- // At this point, `running` is false so no more signals will be sent to this thread,
833- // but we may still receive async signals that were sent before this point.
834- // To prevent those signals from interrupting subsequent calls to `run()`,
835- // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
836+ // At this point, running bit is clear so kill() will stop sending signals.
837+ // However, we may still receive delayed signals that were sent before clear_running_bit.
838+ // These stale signals are harmless because:
839+ // - The signal handler is a no-op
840+ // - We check generation matching in cancel_requested before treating EINTR as cancellation
841+ // - If generation doesn't match, we return Retry instead of Cancelled
836842 let result = match exit_reason {
837843 Ok ( m) => match m. header . message_type {
838844 HALT_MESSAGE => {
@@ -912,14 +918,16 @@ impl Hypervisor for HypervLinuxDriver {
912918 }
913919 } ,
914920 Err ( e) => match e. errno ( ) {
915- // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
921+ // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR
916922 libc:: EINTR => {
917- // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or
918- // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
923+ // Check if cancellation was requested for THIS specific generation.
924+ // If not, the EINTR came from:
925+ // - A debug interrupt (if GDB is enabled)
926+ // - A stale signal from a previous guest call (generation mismatch)
927+ // - A signal meant for a different sandbox on the same thread
928+ // In these cases, we return Retry to continue execution.
919929 if cancel_requested {
920- self . interrupt_handle
921- . cancel_requested
922- . store ( false , Ordering :: Relaxed ) ;
930+ self . interrupt_handle . clear_cancel_requested ( ) ;
923931 HyperlightExit :: Cancelled ( )
924932 } else {
925933 #[ cfg( gdb) ]
0 commit comments