Skip to content

Commit

Permalink
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull x86/pti updates from Thomas Gleixner:
 "A mixed bag of fixes and updates for the ghosts which are hunting us.

  The scheduler fixes have been pulled into that branch to avoid
  conflicts.

   - A set of fixes to address a khread_parkme() race which caused lost
     wakeups and loss of state.

   - A deadlock fix for stop_machine() solved by moving the wakeups
     outside of the stopper_lock held region.

   - A set of Spectre V1 array access restrictions. The possible
     problematic spots were discuvered by Dan Carpenters new checks in
     smatch.

   - Removal of an unused file which was forgotten when the rest of that
     functionality was removed"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vdso: Remove unused file
  perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr
  perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver
  perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map()
  perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_*
  perf/core: Fix possible Spectre-v1 indexing for ->aux_pages[]
  sched/autogroup: Fix possible Spectre-v1 indexing for sched_prio_to_weight[]
  sched/core: Fix possible Spectre-v1 indexing for sched_prio_to_weight[]
  sched/core: Introduce set_special_state()
  kthread, sched/wait: Fix kthread_parkme() completion issue
  kthread, sched/wait: Fix kthread_parkme() wait-loop
  sched/fair: Fix the update of blocked load when newly idle
  stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
  • Loading branch information
torvalds committed May 13, 2018
2 parents 86a4ac4 + e0f6d1a commit 66e1c94
Show file tree
Hide file tree
Showing 14 changed files with 153 additions and 78 deletions.
1 change: 0 additions & 1 deletion arch/x86/entry/vdso/vdso32/vdso-fakesections.c

This file was deleted.

8 changes: 7 additions & 1 deletion arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/device.h>
#include <linux/nospec.h>

#include <asm/apic.h>
#include <asm/stacktrace.h>
Expand Down Expand Up @@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)

config = attr->config;

cache_type = (config >> 0) & 0xff;
cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);

cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);

cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);

val = hw_cache_event_ids[cache_type][cache_op][cache_result];

Expand Down Expand Up @@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event)
if (attr->config >= x86_pmu.max_events)
return -EINVAL;

attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);

/*
* The generic map:
*/
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/events/intel/cstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
Expand Down Expand Up @@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
if (!pkg_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
Expand Down
9 changes: 6 additions & 3 deletions arch/x86/events/msr.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
#include <linux/nospec.h>
#include <asm/intel-family.h>

enum perf_msr_id {
Expand Down Expand Up @@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;

if (cfg >= PERF_MSR_EVENT_MAX)
return -EINVAL;

/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
Expand All @@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */
return -EINVAL;

if (cfg >= PERF_MSR_EVENT_MAX)
return -EINVAL;

cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);

if (!msr[cfg].attr)
return -EINVAL;

Expand Down
1 change: 1 addition & 0 deletions include/linux/kthread.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k);
void kthread_parkme(void);
void kthread_park_complete(struct task_struct *k);

int kthreadd(void *unused);
extern struct task_struct *kthreadd_task;
Expand Down
50 changes: 45 additions & 5 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,36 @@ struct task_group;

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP

/*
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
*/
#define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))

#define __set_current_state(state_value) \
do { \
WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \
current->state = (state_value); \
} while (0)

#define set_current_state(state_value) \
do { \
WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \
smp_store_mb(current->state, (state_value)); \
} while (0)

#define set_special_state(state_value) \
do { \
unsigned long flags; /* may shadow */ \
WARN_ON_ONCE(!is_special_task_state(state_value)); \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->task_state_change = _THIS_IP_; \
current->state = (state_value); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
#else
/*
* set_current_state() includes a barrier so that the write of current->state
Expand All @@ -144,8 +163,8 @@ struct task_group;
*
* The above is typically ordered against the wakeup, which does:
*
* need_sleep = false;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
* need_sleep = false;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
* Where wake_up_state() (and all other wakeup primitives) imply enough
* barriers to order the store of the variable against wakeup.
Expand All @@ -154,12 +173,33 @@ struct task_group;
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
*
* This is obviously fine, since they both store the exact same value.
* However, with slightly different timing the wakeup TASK_RUNNING store can
* also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
* a problem either because that will result in one extra go around the loop
* and our @cond test will save the day.
*
* Also see the comments of try_to_wake_up().
*/
#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
#define set_current_state(state_value) smp_store_mb(current->state, (state_value))
#define __set_current_state(state_value) \
current->state = (state_value)

#define set_current_state(state_value) \
smp_store_mb(current->state, (state_value))

/*
* set_special_state() should be used for those states when the blocking task
* can not use the regular condition based wait-loop. In that case we must
* serialize against wakeups such that any possible in-flight TASK_RUNNING stores
* will not collide with our state change.
*/
#define set_special_state(state_value) \
do { \
unsigned long flags; /* may shadow */ \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->state = (state_value); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)

#endif

/* Task command name length: */
Expand Down
2 changes: 1 addition & 1 deletion include/linux/sched/signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void)
{
spin_lock_irq(&current->sighand->siglock);
if (current->jobctl & JOBCTL_STOP_DEQUEUED)
__set_current_state(TASK_STOPPED);
set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);

schedule();
Expand Down
7 changes: 5 additions & 2 deletions kernel/events/ring_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/circ_buf.h>
#include <linux/poll.h>
#include <linux/nospec.h>

#include "internal.h"

Expand Down Expand Up @@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
return NULL;

/* AUX space */
if (pgoff >= rb->aux_pgoff)
return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
if (pgoff >= rb->aux_pgoff) {
int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
return virt_to_page(rb->aux_pages[aux_pgoff]);
}
}

return __perf_mmap_to_page(rb, pgoff);
Expand Down
50 changes: 23 additions & 27 deletions kernel/kthread.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ enum KTHREAD_BITS {
KTHREAD_IS_PER_CPU = 0,
KTHREAD_SHOULD_STOP,
KTHREAD_SHOULD_PARK,
KTHREAD_IS_PARKED,
};

static inline void set_kthread_struct(void *kthread)
Expand Down Expand Up @@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)

static void __kthread_parkme(struct kthread *self)
{
__set_current_state(TASK_PARKED);
while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
complete(&self->parked);
for (;;) {
set_current_state(TASK_PARKED);
if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
break;
schedule();
__set_current_state(TASK_PARKED);
}
clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING);
}

Expand All @@ -194,6 +191,11 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);

void kthread_park_complete(struct task_struct *k)
{
complete(&to_kthread(k)->parked);
}

static int kthread(void *_create)
{
/* Copy data: it's on kthread's stack */
Expand Down Expand Up @@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_kthread(k);

clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
* We clear the IS_PARKED bit here as we don't wait
* until the task has left the park code. So if we'd
* park before that happens we'd see the IS_PARKED bit
* which might be about to be cleared.
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again.
*/
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
/*
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again.
*/
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED);
wake_up_state(k, TASK_PARKED);
}
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED);

clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
wake_up_state(k, TASK_PARKED);
}
EXPORT_SYMBOL_GPL(kthread_unpark);

Expand All @@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k)
if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS;

if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
wait_for_completion(&kthread->parked);
}
if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
return -EBUSY;

set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
wait_for_completion(&kthread->parked);
}

return 0;
Expand Down
7 changes: 5 additions & 2 deletions kernel/sched/autogroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/*
* Auto-group scheduling implementation:
*/
#include <linux/nospec.h>
#include "sched.h"

unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
Expand Down Expand Up @@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag;
unsigned long shares;
int err;
int err, idx;

if (nice < MIN_NICE || nice > MAX_NICE)
return -EINVAL;
Expand All @@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)

next = HZ / 10 + jiffies;
ag = autogroup_task_get(p);
shares = scale_load(sched_prio_to_weight[nice + 20]);

idx = array_index_nospec(nice + 20, 40);
shares = scale_load(sched_prio_to_weight[idx]);

down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, shares);
Expand Down
Loading

0 comments on commit 66e1c94

Please sign in to comment.