Skip to content

Commit

Permalink
rcu: Let non-offloaded idle CPUs with callbacks defer tick
Browse files Browse the repository at this point in the history
When a CPU goes idle, rcu_needs_cpu() is invoked to determine whether or
not RCU needs the scheduler-clock tick to keep interrupting.  Right now,
RCU keeps the tick on for a given idle CPU if there are any non-offloaded
callbacks queued on that CPU.

But if all of these callbacks are waiting for a grace period to finish,
there is no point in scheduling a tick before that grace period has any
reasonable chance of completing.  This commit therefore delays the tick
in the case where all the callbacks are waiting for a specific grace
period to elapse.  In theory, this should result in a 50-70% reduction in
RCU-induced scheduling-clock ticks on mostly-idle CPUs.  In practice, TBD.
/bin/bash: fm: command not found

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
  • Loading branch information
paulmckrcu committed Oct 18, 2022
1 parent 1d7d256 commit 00c153b
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 15 deletions.
2 changes: 1 addition & 1 deletion include/linux/rcutiny.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ static inline void rcu_softirq_qs(void)
rcu_tasks_qs(current, (preempt)); \
} while (0)

static inline int rcu_needs_cpu(void)
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion include/linux/rcutree.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

void rcu_softirq_qs(void);
void rcu_note_context_switch(bool preempt);
int rcu_needs_cpu(void);
int rcu_needs_cpu(u64 basemono, u64 *nextevt);
void rcu_cpu_stall_reset(void);

/*
Expand Down
44 changes: 35 additions & 9 deletions kernel/rcu/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,12 +670,40 @@ void __rcu_irq_enter_check_tick(void)
* scheduler-clock interrupt.
*
* Just check whether or not this CPU has non-offloaded RCU callbacks
* queued.
* queued that need immediate attention.
*/
int rcu_needs_cpu(void)
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
{
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
unsigned long j;
unsigned long jlast;
unsigned long jwait;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_segcblist *rsclp = &rdp->cblist;

// Disabled, empty, or offloaded means nothing to do.
if (!rcu_segcblist_is_enabled(rsclp) ||
rcu_segcblist_empty(rsclp) || rcu_rdp_is_offloaded(rdp)) {
*nextevt = KTIME_MAX;
return 0;
}

// Callbacks ready to invoke or that have not already been
// assigned a grace period need immediate attention.
if (!rcu_segcblist_segempty(rsclp, RCU_DONE_TAIL) ||
!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL))
return 1;

// There are callbacks waiting for some later grace period.
// Wait for about a grace period or two since the last tick, at which
// point there is high probability that this CPU will need to do some
// work for RCU.
j = jiffies;
jlast = __this_cpu_read(rcu_data.last_sched_clock);
jwait = READ_ONCE(jiffies_till_first_fqs) + READ_ONCE(jiffies_till_next_fqs) + 1;
if (time_after(j, jlast + jwait))
return 1;
*nextevt = basemono + TICK_NSEC * (jlast + jwait - j);
return 0;
}

/*
Expand Down Expand Up @@ -2318,11 +2346,9 @@ void rcu_sched_clock_irq(int user)
{
unsigned long j;

if (IS_ENABLED(CONFIG_PROVE_RCU)) {
j = jiffies;
WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
__this_cpu_write(rcu_data.last_sched_clock, j);
}
j = jiffies;
WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
__this_cpu_write(rcu_data.last_sched_clock, j);
trace_rcu_utilization(TPS("Start scheduler-tick"));
lockdep_assert_irqs_disabled();
raw_cpu_inc(rcu_data.ticks_this_gp);
Expand Down
10 changes: 6 additions & 4 deletions kernel/time/tick-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ static inline bool local_timer_softirq_pending(void)

static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, delta, expires;
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
unsigned long basejiff;
unsigned int seq;

Expand All @@ -807,7 +807,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* minimal delta which brings us back to this place
* immediately. Lather, rinse and repeat...
*/
if (rcu_needs_cpu() || arch_needs_cpu() ||
if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
irq_work_needs_cpu() || local_timer_softirq_pending()) {
next_tick = basemono + TICK_NSEC;
} else {
Expand All @@ -818,8 +818,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* disabled this also looks at the next expiring
* hrtimer.
*/
next_tick = get_next_timer_interrupt(basejiff, basemono);
ts->next_timer = next_tick;
next_tmr = get_next_timer_interrupt(basejiff, basemono);
ts->next_timer = next_tmr;
/* Take the next rcu event into account */
next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
}

/*
Expand Down

0 comments on commit 00c153b

Please sign in to comment.