Skip to content

Commit

Permalink
Merge branch 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull x86 hyperv updates from Ingo Molnar:
 "Misc updates to the hyperv guest code:

   - Rework clockevents initialization to better support hibernation

   - Allow guests to enable InvariantTSC

   - Micro-optimize send_ipi_one"

* 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/hyperv: Initialize clockevents earlier in CPU onlining
  x86/hyperv: Allow guests to enable InvariantTSC
  x86/hyperv: Micro-optimize send_ipi_one()
  • Loading branch information
torvalds committed Nov 26, 2019
2 parents cd4771f + 4df4cb9 commit 64d6a12
Show file tree
Hide file tree
Showing 10 changed files with 190 additions and 55 deletions.
16 changes: 13 additions & 3 deletions arch/x86/hyperv/hv_apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,20 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)

static bool __send_ipi_one(int cpu, int vector)
{
struct cpumask mask = CPU_MASK_NONE;
int vp = hv_cpu_number_to_vp_number(cpu);

cpumask_set_cpu(cpu, &mask);
return __send_ipi_mask(&mask, vector);
trace_hyperv_send_ipi_one(cpu, vector);

if (!hv_hypercall_pg || (vp == VP_INVAL))
return false;

if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;

if (vp >= 64)
return __send_ipi_mask_ex(cpumask_of(cpu), vector);

return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
}

static void hv_send_ipi(int cpu, int vector)
Expand Down
6 changes: 6 additions & 0 deletions arch/x86/hyperv/hv_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,12 @@ void __init hyperv_init(void)
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);

/*
* Ignore any errors in setting up stimer clockevents
* as we can run with the LAPIC timer as a fallback.
*/
(void)hv_stimer_alloc();

hv_apic_init();

x86_init.pci.arch_init = hv_pci_init;
Expand Down
5 changes: 5 additions & 0 deletions arch/x86/include/asm/hyperv-tlfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
/* AccessReenlightenmentControls privilege */
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
/* AccessTscInvariantControls privilege */
#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)

/*
* Feature identification: indicates which flags were specified at partition
Expand Down Expand Up @@ -278,6 +280,9 @@
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108

/* TSC invariant control */
#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118

/*
* Declare the MSR used to setup pages used to communicate with the hypervisor.
*/
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/include/asm/trace/hyperv.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
__entry->ncpus, __entry->vector)
);

TRACE_EVENT(hyperv_send_ipi_one,
TP_PROTO(int cpu,
int vector),
TP_ARGS(cpu, vector),
TP_STRUCT__entry(
__field(int, cpu)
__field(int, vector)
),
TP_fast_assign(__entry->cpu = cpu;
__entry->vector = vector;
),
TP_printk("cpu %d vector %x",
__entry->cpu, __entry->vector)
);

#endif /* CONFIG_HYPERV */

#undef TRACE_INCLUDE_PATH
Expand Down
7 changes: 6 additions & 1 deletion arch/x86/kernel/cpu/mshyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
mark_tsc_unstable("running on Hyper-V");
if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
} else {
mark_tsc_unstable("running on Hyper-V");
}

/*
* Generation 2 instances don't support reading the NMI status from
Expand Down
154 changes: 124 additions & 30 deletions drivers/clocksource/hyperv_timer.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <linux/clocksource.h>
#include <linux/sched_clock.h>
#include <linux/mm.h>
#include <linux/cpuhotplug.h>
#include <clocksource/hyperv_timer.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
Expand All @@ -30,6 +31,15 @@ static u64 hv_sched_clock_offset __ro_after_init;
* mechanism is used when running on older versions of Hyper-V
* that don't support Direct Mode. While Hyper-V provides
* four stimer's per CPU, Linux uses only stimer0.
*
* Because Direct Mode does not require processing a VMbus
* message, stimer interrupts can be enabled earlier in the
* process of booting a CPU, and consistent with when timer
* interrupts are enabled for other clocksource drivers.
* However, for legacy versions of Hyper-V when Direct Mode
* is not enabled, setting up stimer interrupts must be
* delayed until VMbus is initialized and can process the
* interrupt message.
*/
static bool direct_mode_enabled;

Expand Down Expand Up @@ -102,17 +112,12 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
/*
* hv_stimer_init - Per-cpu initialization of the clockevent
*/
void hv_stimer_init(unsigned int cpu)
static int hv_stimer_init(unsigned int cpu)
{
struct clock_event_device *ce;

/*
* Synthetic timers are always available except on old versions of
* Hyper-V on x86. In that case, just return as Linux will use a
* clocksource based on emulated PIT or LAPIC timer hardware.
*/
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
return;
if (!hv_clock_event)
return 0;

ce = per_cpu_ptr(hv_clock_event, cpu);
ce->name = "Hyper-V clockevent";
Expand All @@ -127,28 +132,55 @@ void hv_stimer_init(unsigned int cpu)
HV_CLOCK_HZ,
HV_MIN_DELTA_TICKS,
HV_MAX_MAX_DELTA_TICKS);
return 0;
}
EXPORT_SYMBOL_GPL(hv_stimer_init);

/*
* hv_stimer_cleanup - Per-cpu cleanup of the clockevent
*/
void hv_stimer_cleanup(unsigned int cpu)
int hv_stimer_cleanup(unsigned int cpu)
{
struct clock_event_device *ce;

/* Turn off clockevent device */
if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
ce = per_cpu_ptr(hv_clock_event, cpu);
if (!hv_clock_event)
return 0;

/*
* In the legacy case where Direct Mode is not enabled
* (which can only be on x86/64), stimer cleanup happens
* relatively early in the CPU offlining process. We
* must unbind the stimer-based clockevent device so
* that the LAPIC timer can take over until clockevents
* are no longer needed in the offlining process. Note
* that clockevents_unbind_device() eventually calls
* hv_ce_shutdown().
*
* The unbind should not be done when Direct Mode is
* enabled because we may be on an architecture where
* there are no other clockevent devices to fallback to.
*/
ce = per_cpu_ptr(hv_clock_event, cpu);
if (direct_mode_enabled)
hv_ce_shutdown(ce);
}
else
clockevents_unbind_device(ce, cpu);

return 0;
}
EXPORT_SYMBOL_GPL(hv_stimer_cleanup);

/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
int hv_stimer_alloc(int sint)
int hv_stimer_alloc(void)
{
int ret;
int ret = 0;

/*
* Synthetic timers are always available except on old versions of
* Hyper-V on x86. In that case, return as error as Linux will use a
* clockevent based on emulated LAPIC timer hardware.
*/
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
return -EINVAL;

hv_clock_event = alloc_percpu(struct clock_event_device);
if (!hv_clock_event)
Expand All @@ -159,22 +191,78 @@ int hv_stimer_alloc(int sint)
if (direct_mode_enabled) {
ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
hv_stimer0_isr);
if (ret) {
free_percpu(hv_clock_event);
hv_clock_event = NULL;
return ret;
}
if (ret)
goto free_percpu;

/*
* Since we are in Direct Mode, stimer initialization
* can be done now with a CPUHP value in the same range
* as other clockevent devices.
*/
ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
"clockevents/hyperv/stimer:starting",
hv_stimer_init, hv_stimer_cleanup);
if (ret < 0)
goto free_stimer0_irq;
}
return ret;

stimer0_message_sint = sint;
return 0;
free_stimer0_irq:
hv_remove_stimer0_irq(stimer0_irq);
stimer0_irq = 0;
free_percpu:
free_percpu(hv_clock_event);
hv_clock_event = NULL;
return ret;
}
EXPORT_SYMBOL_GPL(hv_stimer_alloc);

/*
* hv_stimer_legacy_init -- Called from the VMbus driver to handle
* the case when Direct Mode is not enabled, and the stimer
* must be initialized late in the CPU onlining process.
*
*/
void hv_stimer_legacy_init(unsigned int cpu, int sint)
{
if (direct_mode_enabled)
return;

/*
* This function gets called by each vCPU, so setting the
* global stimer_message_sint value each time is conceptually
* not ideal, but the value passed in is always the same and
* it avoids introducing yet another interface into this
* clocksource driver just to set the sint in the legacy case.
*/
stimer0_message_sint = sint;
(void)hv_stimer_init(cpu);
}
EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);

/*
* hv_stimer_legacy_cleanup -- Called from the VMbus driver to
* handle the case when Direct Mode is not enabled, and the
* stimer must be cleaned up early in the CPU offlining
* process.
*/
void hv_stimer_legacy_cleanup(unsigned int cpu)
{
if (direct_mode_enabled)
return;
(void)hv_stimer_cleanup(cpu);
}
EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);


/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
void hv_stimer_free(void)
{
if (direct_mode_enabled && (stimer0_irq != 0)) {
if (!hv_clock_event)
return;

if (direct_mode_enabled) {
cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
hv_remove_stimer0_irq(stimer0_irq);
stimer0_irq = 0;
}
Expand All @@ -190,14 +278,20 @@ EXPORT_SYMBOL_GPL(hv_stimer_free);
void hv_stimer_global_cleanup(void)
{
int cpu;
struct clock_event_device *ce;

if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
for_each_present_cpu(cpu) {
ce = per_cpu_ptr(hv_clock_event, cpu);
clockevents_unbind_device(ce, cpu);
}
/*
* hv_stime_legacy_cleanup() will stop the stimer if Direct
* Mode is not enabled, and fallback to the LAPIC timer.
*/
for_each_present_cpu(cpu) {
hv_stimer_legacy_cleanup(cpu);
}

/*
* If Direct Mode is enabled, the cpuhp teardown callback
* (hv_stimer_cleanup) will be run on all CPUs to stop the
* stimers.
*/
hv_stimer_free();
}
EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
Expand Down
4 changes: 2 additions & 2 deletions drivers/hv/hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ int hv_synic_init(unsigned int cpu)
{
hv_synic_enable_regs(cpu);

hv_stimer_init(cpu);
hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);

return 0;
}
Expand Down Expand Up @@ -277,7 +277,7 @@ int hv_synic_cleanup(unsigned int cpu)
if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY;

hv_stimer_cleanup(cpu);
hv_stimer_legacy_cleanup(cpu);

hv_synic_disable_regs(cpu);

Expand Down
30 changes: 14 additions & 16 deletions drivers/hv/vmbus_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1340,10 +1340,6 @@ static int vmbus_bus_init(void)
if (ret)
goto err_alloc;

ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
if (ret < 0)
goto err_alloc;

/*
* Initialize the per-cpu interrupt state and stimer state.
* Then connect to the host.
Expand Down Expand Up @@ -1400,9 +1396,8 @@ static int vmbus_bus_init(void)
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
err_cpuhp:
hv_stimer_free();
err_alloc:
hv_synic_free();
err_alloc:
hv_remove_vmbus_irq();

bus_unregister(&hv_bus);
Expand Down Expand Up @@ -2315,20 +2310,23 @@ static void hv_crash_handler(struct pt_regs *regs)
static int hv_synic_suspend(void)
{
/*
* When we reach here, all the non-boot CPUs have been offlined, and
* the stimers on them have been unbound in hv_synic_cleanup() ->
* When we reach here, all the non-boot CPUs have been offlined.
* If we're in a legacy configuration where stimer Direct Mode is
* not enabled, the stimers on the non-boot CPUs have been unbound
* in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
* hv_stimer_cleanup() -> clockevents_unbind_device().
*
* hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
* we do not unbind the stimer on CPU0 because: 1) it's unnecessary
* because the interrupts remain disabled between syscore_suspend()
* and syscore_resume(): see create_image() and resume_target_kernel();
* hv_synic_suspend() only runs on CPU0 with interrupts disabled.
* Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
* 1) it's unnecessary as interrupts remain disabled between
* syscore_suspend() and syscore_resume(): see create_image() and
* resume_target_kernel()
* 2) the stimer on CPU0 is automatically disabled later by
* syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
* would be triggered if we call clockevents_unbind_device(), which
* may sleep, in an interrupts-disabled context. So, we intentionally
* don't call hv_stimer_cleanup(0) here.
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
* 3) a warning would be triggered if we call
* clockevents_unbind_device(), which may sleep, in an
* interrupts-disabled context.
*/

hv_synic_disable_regs(0);
Expand Down
Loading

0 comments on commit 64d6a12

Please sign in to comment.