Skip to content

Commit 4ae7dc9

Browse files
Frederic WeisbeckerIngo Molnar
authored andcommitted
entry/kvm: Explicitly flush pending rcuog wakeup before last rescheduling point
Following the idle loop model, cleanly check for pending rcuog wakeup before the last rescheduling point upon resuming to guest mode. This way we can avoid to do it from rcu_user_enter() with the last resort self-IPI hack that enforces rescheduling. Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210131230548.32970-6-frederic@kernel.org
1 parent 47b8ff1 commit 4ae7dc9

File tree

4 files changed

+50
-10
lines changed

4 files changed

+50
-10
lines changed

arch/x86/kvm/x86.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
17821782

17831783
bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
17841784
{
1785+
xfer_to_guest_mode_prepare();
17851786
return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
17861787
xfer_to_guest_mode_work_pending();
17871788
}

include/linux/entry-kvm.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,20 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu,
4646
*/
4747
int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu);
4848

49+
/**
50+
* xfer_to_guest_mode_prepare - Perform last minute preparation work that
51+
* need to be handled while IRQs are disabled
52+
* upon entering to guest.
53+
*
54+
* Has to be invoked with interrupts disabled before the last call
55+
* to xfer_to_guest_mode_work_pending().
56+
*/
57+
static inline void xfer_to_guest_mode_prepare(void)
58+
{
59+
lockdep_assert_irqs_disabled();
60+
rcu_nocb_flush_deferred_wakeup();
61+
}
62+
4963
/**
5064
* __xfer_to_guest_mode_work_pending - Check if work is pending
5165
*

kernel/rcu/tree.c

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -678,9 +678,10 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
678678

679679
#ifdef CONFIG_NO_HZ_FULL
680680

681+
#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)
681682
/*
682683
* An empty function that will trigger a reschedule on
683-
* IRQ tail once IRQs get re-enabled on userspace resume.
684+
* IRQ tail once IRQs get re-enabled on userspace/guest resume.
684685
*/
685686
static void late_wakeup_func(struct irq_work *work)
686687
{
@@ -689,6 +690,37 @@ static void late_wakeup_func(struct irq_work *work)
689690
static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
690691
IRQ_WORK_INIT(late_wakeup_func);
691692

693+
/*
694+
* If either:
695+
*
696+
* 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work
697+
* 2) the task is about to enter in user mode and $ARCH doesn't support generic entry.
698+
*
699+
* In these cases the late RCU wake ups aren't supported in the resched loops and our
700+
* last resort is to fire a local irq_work that will trigger a reschedule once IRQs
701+
* get re-enabled again.
702+
*/
703+
noinstr static void rcu_irq_work_resched(void)
704+
{
705+
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
706+
707+
if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
708+
return;
709+
710+
if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
711+
return;
712+
713+
instrumentation_begin();
714+
if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
715+
irq_work_queue(this_cpu_ptr(&late_wakeup_work));
716+
}
717+
instrumentation_end();
718+
}
719+
720+
#else
721+
static inline void rcu_irq_work_resched(void) { }
722+
#endif
723+
692724
/**
693725
* rcu_user_enter - inform RCU that we are resuming userspace.
694726
*
@@ -702,22 +734,14 @@ static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
702734
*/
703735
noinstr void rcu_user_enter(void)
704736
{
705-
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
706-
707737
lockdep_assert_irqs_disabled();
708738

709739
/*
710740
* Other than generic entry implementation, we may be past the last
711741
* rescheduling opportunity in the entry code. Trigger a self IPI
712742
* that will fire and reschedule once we resume in user/guest mode.
713743
*/
714-
instrumentation_begin();
715-
if (!IS_ENABLED(CONFIG_GENERIC_ENTRY) || (current->flags & PF_VCPU)) {
716-
if (do_nocb_deferred_wakeup(rdp) && need_resched())
717-
irq_work_queue(this_cpu_ptr(&late_wakeup_work));
718-
}
719-
instrumentation_end();
720-
744+
rcu_irq_work_resched();
721745
rcu_eqs_enter(true);
722746
}
723747

kernel/rcu/tree_plugin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2197,6 +2197,7 @@ void rcu_nocb_flush_deferred_wakeup(void)
21972197
{
21982198
do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
21992199
}
2200+
EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
22002201

22012202
void __init rcu_init_nohz(void)
22022203
{

0 commit comments

Comments
 (0)