From c14d52cf8a3ba5e1ebe1854c0d19f608a0fb1c67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Mon, 4 Apr 2016 20:54:30 +0200 Subject: [PATCH 1/9] KVM: x86: add kvm_apic_map_get_dest_lapic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_irq_delivery_to_apic_fast and kvm_intr_is_single_vcpu_fast both compute the interrupt destination. Factor the code. 'struct kvm_lapic **dst = NULL' had to be added to silence GCC. GCC might complain about potential NULL access in the future, because it missed conditions that avoided uninitialized uses of dst. Signed-off-by: Radim Krčmář --- arch/x86/kvm/lapic.c | 219 ++++++++++++++++++------------------------- 1 file changed, 90 insertions(+), 129 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1a2da0e5a37328..6812e61c12d4a9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -694,102 +694,115 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm) } } -bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) +/* If kvm_apic_map_get_dest_lapic returns true, then *bitmap encodes accessible + * elements in the *dst array. Those are destinations for the interrupt. + */ +static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, struct kvm_apic_map *map, + struct kvm_lapic ***dst, unsigned long *bitmap) { - struct kvm_apic_map *map; - unsigned long bitmap = 1; - struct kvm_lapic **dst; - int i; - bool ret, x2apic_ipi; - - *r = -1; + int i, lowest; + bool x2apic_ipi; + u16 cid; if (irq->shorthand == APIC_DEST_SELF) { - *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); + *dst = &src; + *bitmap = 1; return true; - } - - if (irq->shorthand) + } else if (irq->shorthand) return false; x2apic_ipi = src && apic_x2apic_mode(src); if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) return false; - ret = true; - rcu_read_lock(); - map = rcu_dereference(kvm->arch.apic_map); + if (!map) + return false; - if (!map) { - ret = false; - goto out; + if (irq->dest_mode == APIC_DEST_PHYSICAL) { + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { + *bitmap = 0; + } else { + *dst = &map->phys_map[irq->dest_id]; + *bitmap = 1; + } + return true; } - if (irq->dest_mode == APIC_DEST_PHYSICAL) { - if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) - goto out; + if (!kvm_apic_logical_map_valid(map)) + return false; - dst = &map->phys_map[irq->dest_id]; - } else { - u16 cid; + apic_logical_id(map, irq->dest_id, &cid, (u16 *)bitmap); + + if (cid >= ARRAY_SIZE(map->logical_map)) { + *bitmap = 0; + return true; + } + + *dst = map->logical_map[cid]; + + if (!kvm_lowest_prio_delivery(irq)) + return true; - if (!kvm_apic_logical_map_valid(map)) { - ret = false; - goto out; + if (!kvm_vector_hashing_enabled()) { + lowest = -1; + for_each_set_bit(i, bitmap, 16) { + if (!(*dst)[i]) + continue; + if (lowest < 0) + lowest = i; + else if (kvm_apic_compare_prio((*dst)[i]->vcpu, + (*dst)[lowest]->vcpu) < 0) + lowest = i; } + } else { + if (!*bitmap) + return true; - apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); + lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), + bitmap, 16); - if (cid >= ARRAY_SIZE(map->logical_map)) - goto out; + if (!(*dst)[lowest]) { + kvm_apic_disabled_lapic_found(kvm); + *bitmap = 0; + return true; + } + } - dst = map->logical_map[cid]; + *bitmap = (lowest >= 0) ? 1 << lowest : 0; - if (!kvm_lowest_prio_delivery(irq)) - goto set_irq; + return true; +} - if (!kvm_vector_hashing_enabled()) { - int l = -1; - for_each_set_bit(i, &bitmap, 16) { - if (!dst[i]) - continue; - if (l < 0) - l = i; - else if (kvm_apic_compare_prio(dst[i]->vcpu, - dst[l]->vcpu) < 0) - l = i; - } - bitmap = (l >= 0) ? 1 << l : 0; - } else { - int idx; - unsigned int dest_vcpus; +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) +{ + struct kvm_apic_map *map; + unsigned long bitmap; + struct kvm_lapic **dst = NULL; + int i; + bool ret; - dest_vcpus = hweight16(bitmap); - if (dest_vcpus == 0) - goto out; + *r = -1; - idx = kvm_vector_to_index(irq->vector, - dest_vcpus, &bitmap, 16); + if (irq->shorthand == APIC_DEST_SELF) { + *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); + return true; + } - if (!dst[idx]) { - kvm_apic_disabled_lapic_found(kvm); - goto out; - } + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); - bitmap = (idx >= 0) ? 1 << idx : 0; + ret = kvm_apic_map_get_dest_lapic(kvm, src, irq, map, &dst, &bitmap); + if (ret) + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + if (*r < 0) + *r = 0; + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } - } -set_irq: - for_each_set_bit(i, &bitmap, 16) { - if (!dst[i]) - continue; - if (*r < 0) - *r = 0; - *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); - } -out: rcu_read_unlock(); return ret; } @@ -812,8 +825,9 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { struct kvm_apic_map *map; + unsigned long bitmap; + struct kvm_lapic **dst = NULL; bool ret = false; - struct kvm_lapic *dst = NULL; if (irq->shorthand) return false; @@ -821,69 +835,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); - if (!map) - goto out; - - if (irq->dest_mode == APIC_DEST_PHYSICAL) { - if (irq->dest_id == 0xFF) - goto out; - - if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) - goto out; - - dst = map->phys_map[irq->dest_id]; - if (dst && kvm_apic_present(dst->vcpu)) - *dest_vcpu = dst->vcpu; - else - goto out; - } else { - u16 cid; - unsigned long bitmap = 1; - int i, r = 0; - - if (!kvm_apic_logical_map_valid(map)) - goto out; - - apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); - - if (cid >= ARRAY_SIZE(map->logical_map)) - goto out; - - if (kvm_vector_hashing_enabled() && - kvm_lowest_prio_delivery(irq)) { - int idx; - unsigned int dest_vcpus; + if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && + hweight16(bitmap) == 1) { + unsigned long i = find_first_bit(&bitmap, 16); - dest_vcpus = hweight16(bitmap); - if (dest_vcpus == 0) - goto out; - - idx = kvm_vector_to_index(irq->vector, dest_vcpus, - &bitmap, 16); - - dst = map->logical_map[cid][idx]; - if (!dst) { - kvm_apic_disabled_lapic_found(kvm); - goto out; - } - - *dest_vcpu = dst->vcpu; - } else { - for_each_set_bit(i, &bitmap, 16) { - dst = map->logical_map[cid][i]; - if (++r == 2) - goto out; - } - - if (dst && kvm_apic_present(dst->vcpu)) - *dest_vcpu = dst->vcpu; - else - goto out; + if (dst[i]) { + *dest_vcpu = dst[i]->vcpu; + ret = true; } } - ret = true; -out: rcu_read_unlock(); return ret; } From e084288ce8bfc474dc2999fab189d107b531458a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 6 Apr 2016 20:33:33 +0200 Subject: [PATCH 2/9] KVM: x86: dynamic kvm_apic_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit x2APIC supports up to 2^32-1 LAPICs, but most guest in coming years will have slighly less VCPUs. Dynamic size saves memory at the cost of turning one constant into a variable. apic_map mutex had to be moved before allocation to avoid races with cpu hotplug. Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 9 ++-- arch/x86/kvm/lapic.c | 87 ++++++++++++++++++--------------- arch/x86/kvm/lapic.h | 2 +- 3 files changed, 54 insertions(+), 44 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b7e394485a5f2a..c93c4a7877ed72 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -682,9 +682,12 @@ struct kvm_arch_memory_slot { struct kvm_apic_map { struct rcu_head rcu; u8 mode; - struct kvm_lapic *phys_map[256]; - /* first index is cluster id second is cpu id in a cluster */ - struct kvm_lapic *logical_map[16][16]; + u32 size; + union { + struct kvm_lapic *xapic_flat_map[8]; + struct kvm_lapic *xapic_cluster_map[16][4]; + }; + struct kvm_lapic *phys_map[]; }; /* Hyper-V emulation context */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6812e61c12d4a9..90e07c109dfe3e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -129,26 +129,32 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -/* The logical map is definitely wrong if we have multiple - * modes at the same time. (Physical map is always right.) - */ -static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) -{ - return !(map->mode & (map->mode - 1)); -} - -static inline void -apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) -{ - unsigned lid_bits; - - BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4); - BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8); - BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16); - lid_bits = map->mode; +static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, + u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { + switch (map->mode) { + case KVM_APIC_MODE_X2APIC: { + u32 offset = (dest_id >> 16) * 16; + // XXX: phys_map must be allocated as multiples of 16 + if (offset < map->size) { + *cluster = &map->phys_map[offset]; + *mask = dest_id & 0xffff; + } else + *mask = 0; - *cid = dest_id >> lid_bits; - *lid = dest_id & ((1 << lid_bits) - 1); + return true; + } + case KVM_APIC_MODE_XAPIC_FLAT: + *cluster = map->xapic_flat_map; + *mask = dest_id & 0xff; + return true; + case KVM_APIC_MODE_XAPIC_CLUSTER: + *cluster = map->xapic_cluster_map[dest_id >> 4]; + *mask = dest_id & 0xf; + return true; + default: + /* Not optimized. */ + return false; + } } static void recalculate_apic_map(struct kvm *kvm) @@ -156,17 +162,28 @@ static void recalculate_apic_map(struct kvm *kvm) struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; int i; - - new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); + u32 size = 255; mutex_lock(&kvm->arch.apic_map_lock); + kvm_for_each_vcpu(i, vcpu, kvm) + if (kvm_apic_present(vcpu)) + size = max(size, kvm_apic_id(vcpu->arch.apic)); + + size++; + size += (16 - size) & 16; + new = kzalloc(sizeof(struct kvm_apic_map) + + sizeof(struct kvm_lapic) * size, GFP_KERNEL); + if (!new) goto out; + new->size = size; + kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; - u16 cid, lid; + struct kvm_lapic **cluster; + u16 mask; u32 ldr, aid; if (!kvm_apic_present(vcpu)) @@ -175,7 +192,7 @@ static void recalculate_apic_map(struct kvm *kvm) aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); - if (aid < ARRAY_SIZE(new->phys_map)) + if (aid < size) new->phys_map[aid] = apic; if (apic_x2apic_mode(apic)) { @@ -188,13 +205,11 @@ static void recalculate_apic_map(struct kvm *kvm) new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; } - if (!kvm_apic_logical_map_valid(new)) + if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask)) continue; - apic_logical_id(new, ldr, &cid, &lid); - - if (lid && cid < ARRAY_SIZE(new->logical_map)) - new->logical_map[cid][ffs(lid) - 1] = apic; + if (mask) + cluster[ffs(mask) - 1] = apic; } out: old = rcu_dereference_protected(kvm->arch.apic_map, @@ -703,7 +718,6 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic { int i, lowest; bool x2apic_ipi; - u16 cid; if (irq->shorthand == APIC_DEST_SELF) { *dst = &src; @@ -720,7 +734,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic return false; if (irq->dest_mode == APIC_DEST_PHYSICAL) { - if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { + if (irq->dest_id >= map->size) { *bitmap = 0; } else { *dst = &map->phys_map[irq->dest_id]; @@ -729,18 +743,11 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic return true; } - if (!kvm_apic_logical_map_valid(map)) + *bitmap = 0; + if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, + (u16 *)bitmap)) return false; - apic_logical_id(map, irq->dest_id, &cid, (u16 *)bitmap); - - if (cid >= ARRAY_SIZE(map->logical_map)) { - *bitmap = 0; - return true; - } - - *dst = map->logical_map[cid]; - if (!kvm_lowest_prio_delivery(irq)) return true; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f71183e502ee9a..d818a36ce24e8f 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -167,7 +167,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } -static inline int kvm_apic_id(struct kvm_lapic *apic) +static inline u32 kvm_apic_id(struct kvm_lapic *apic) { return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } From 955fd4a8e8aaf421e7b5f6b0b06da8ea02ba2673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 6 Apr 2016 20:34:28 +0200 Subject: [PATCH 3/9] KVM: x86: use u16 for logical VCPU mask in lapic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit for_all_bits() never were type safe, so we don't lose safety when casting u16 * to unsigned long * and u16 is the size we want to avoid mishaps when sending the pointer to functions that operate only on 16 bits. Signed-off-by: Radim Krčmář --- arch/x86/kvm/lapic.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 90e07c109dfe3e..6ae5e74031d6fb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -714,7 +714,7 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm) */ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, struct kvm_apic_map *map, - struct kvm_lapic ***dst, unsigned long *bitmap) + struct kvm_lapic ***dst, u16 *bitmap) { int i, lowest; bool x2apic_ipi; @@ -743,9 +743,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic return true; } - *bitmap = 0; - if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, - (u16 *)bitmap)) + if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, bitmap)) return false; if (!kvm_lowest_prio_delivery(irq)) @@ -753,7 +751,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic if (!kvm_vector_hashing_enabled()) { lowest = -1; - for_each_set_bit(i, bitmap, 16) { + for_each_set_bit(i, (unsigned long *)bitmap, 16) { if (!(*dst)[i]) continue; if (lowest < 0) @@ -767,7 +765,7 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, struct kvm_lapic return true; lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), - bitmap, 16); + (unsigned long *)bitmap, 16); if (!(*dst)[lowest]) { kvm_apic_disabled_lapic_found(kvm); @@ -785,7 +783,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) { struct kvm_apic_map *map; - unsigned long bitmap; + u16 bitmap; struct kvm_lapic **dst = NULL; int i; bool ret; @@ -802,7 +800,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, ret = kvm_apic_map_get_dest_lapic(kvm, src, irq, map, &dst, &bitmap); if (ret) - for_each_set_bit(i, &bitmap, 16) { + for_each_set_bit(i, (unsigned long *)&bitmap, 16) { if (!dst[i]) continue; if (*r < 0) @@ -832,7 +830,7 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { struct kvm_apic_map *map; - unsigned long bitmap; + u16 bitmap; struct kvm_lapic **dst = NULL; bool ret = false; @@ -844,7 +842,7 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && hweight16(bitmap) == 1) { - unsigned long i = find_first_bit(&bitmap, 16); + unsigned long i = find_first_bit((unsigned long *)&bitmap, 16); if (dst[i]) { *dest_vcpu = dst[i]->vcpu; From f10f124f5901073b48ad3e647b53264ed7d3839c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Mon, 2 May 2016 13:15:16 +0200 Subject: [PATCH 4/9] KVM: x86: use generic function for MSI parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Radim Krčmář --- arch/x86/kvm/irq_comm.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 54ead79e444b55..3d17eee0987bcd 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -391,21 +391,16 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm->arch.nr_reserved_ioapic_pins); for (i = 0; i < nr_ioapic_pins; ++i) { hlist_for_each_entry(entry, &table->map[i], link) { - u32 dest_id, dest_mode; - bool level; + struct kvm_lapic_irq irq; if (entry->type != KVM_IRQ_ROUTING_MSI) continue; - dest_id = (entry->msi.address_lo >> 12) & 0xff; - dest_mode = (entry->msi.address_lo >> 2) & 0x1; - level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL; - if (level && kvm_apic_match_dest(vcpu, NULL, 0, - dest_id, dest_mode)) { - u32 vector = entry->msi.data & 0xff; - - __set_bit(vector, - ioapic_handled_vectors); - } + + kvm_set_msi_irq(entry, &irq); + + if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + irq.dest_id, irq.dest_mode)) + __set_bit(irq.vector, ioapic_handled_vectors); } } srcu_read_unlock(&kvm->irq_srcu, idx); From dd9e852e392d84a07e1761d49f4b3b4babefe853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 3 May 2016 18:04:51 +0200 Subject: [PATCH 5/9] KVM: support x2APIC ID in userspace routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need a way to pass 32 bit APIC ID. Intel® Virtualization Technology for Directed I/O, 5.1.8 Programming in Intel® 64 x2APIC Mode, specifies that Remapping Hardware is configured with APIC ID 31:8 in Upper Address Register 31:8. This patch adopts that method and applies it to KVM_SIGNAL_MSI and GSI routes. Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 11 ++++++++++- arch/x86/kvm/irq_comm.c | 9 +++++++-- arch/x86/kvm/vmx.c | 3 ++- arch/x86/kvm/x86.c | 1 + include/uapi/linux/kvm.h | 5 +++++ virt/kvm/irqchip.c | 6 +++++- 6 files changed, 30 insertions(+), 5 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4d0542c5206b8f..07bcedc0ba0996 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1461,6 +1461,7 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 +#define KVM_IRQ_ROUTING_MSI_X2APIC 5 No flags are specified so far, the corresponding field must be set to zero. @@ -1489,6 +1490,11 @@ struct kvm_irq_routing_hv_sint { __u32 sint; }; +KVM_IRQ_ROUTING_MSI_X2APIC can be used if KVM_CAP_MSI_X2APIC is present. +The entry uses struct kvm_irq_routing_msi and stores APIC ID bits 8-31 in +address_hi bits 8-31. + + 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated) Capability: none @@ -2166,7 +2172,10 @@ struct kvm_msi { __u8 pad[16]; }; -No flags are defined so far. The corresponding field must be 0. +Valid flags are 0 and bitwise OR of any following: +* KVM_SIGNAL_MSI_X2APIC + - valid with capability KVM_CAP_MSI_X2APIC + - address_hi bits 8-31 contain APIC ID bits 8-31 4.71 KVM_CREATE_PIT2 diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 3d17eee0987bcd..b9595251834b3e 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -117,6 +117,8 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, irq->dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + if (e->type == KVM_IRQ_ROUTING_MSI_X2APIC) + irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi); irq->vector = (e->msi.data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; @@ -150,7 +152,8 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq irq; int r; - if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) + if (unlikely(e->type != KVM_IRQ_ROUTING_MSI && + e->type != KVM_IRQ_ROUTING_MSI_X2APIC)) return -EWOULDBLOCK; kvm_set_msi_irq(e, &irq); @@ -281,6 +284,7 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, goto out; break; case KVM_IRQ_ROUTING_MSI: + case KVM_IRQ_ROUTING_MSI_X2APIC: e->set = kvm_set_msi; e->msi.address_lo = ue->u.msi.address_lo; e->msi.address_hi = ue->u.msi.address_hi; @@ -393,7 +397,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, hlist_for_each_entry(entry, &table->map[i], link) { struct kvm_lapic_irq irq; - if (entry->type != KVM_IRQ_ROUTING_MSI) + if (entry->type != KVM_IRQ_ROUTING_MSI && + entry->type != KVM_IRQ_ROUTING_MSI_X2APIC) continue; kvm_set_msi_irq(entry, &irq); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 133679d520afee..35df8d757d1dbe 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10829,7 +10829,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, BUG_ON(guest_irq >= irq_rt->nr_rt_entries); hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { - if (e->type != KVM_IRQ_ROUTING_MSI) + if (e->type != KVM_IRQ_ROUTING_MSI && + e->type != KVM_IRQ_ROUTING_MSI_X2APIC) continue; /* * VT-d PI cannot support posting multicast/broadcast diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b7798c7b210e7..86d1eb1c9d8b62 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2567,6 +2567,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_DISABLE_QUIRKS: case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: + case KVM_CAP_MSI_X2APIC: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a7f1f8032ec1fb..af1abec5f41d40 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -865,6 +865,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_SPAPR_TCE_64 125 #define KVM_CAP_ARM_PMU_V3 126 #define KVM_CAP_VCPU_ATTRIBUTES 127 +#define KVM_CAP_MSI_X2APIC 128 #ifdef KVM_CAP_IRQ_ROUTING @@ -898,6 +899,7 @@ struct kvm_irq_routing_hv_sint { #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 +#define KVM_IRQ_ROUTING_MSI_X2APIC 5 /* KVM_CAP_MSI_X2APIC */ struct kvm_irq_routing_entry { __u32 gsi; @@ -1023,6 +1025,9 @@ struct kvm_one_reg { __u64 addr; }; +#define KVM_SIGNAL_MSI_X2APIC (1 << 0) /* KVM_CAP_X2APIC */ +#define KVM_SIGNAL_MSI_FLAGS KVM_SIGNAL_MSI_X2APIC + struct kvm_msi { __u32 address_lo; __u32 address_hi; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index fe84e1a95dd55e..39b38b1a21569e 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -62,9 +62,13 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; - if (!irqchip_in_kernel(kvm) || msi->flags != 0) + if (!irqchip_in_kernel(kvm) || + (msi->flags & ~KVM_SIGNAL_MSI_FLAGS) != 0) return -EINVAL; + /* assignment must copy kvm_set_routing_entry() */ + route.type = msi->flags & KVM_SIGNAL_MSI_X2APIC ? + KVM_IRQ_ROUTING_MSI_X2APIC : KVM_IRQ_ROUTING_MSI; route.msi.address_lo = msi->address_lo; route.msi.address_hi = msi->address_hi; route.msi.data = msi->data; From a35230a22bd9ad88ee9c6b5be9a3ac011e2552b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 May 2016 18:50:20 +0200 Subject: [PATCH 6/9] KVM: x86: directly call recalculate_apic_map on lapic restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The get/set dance was just for that. Signed-off-by: Radim Krčmář --- arch/x86/kvm/lapic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6ae5e74031d6fb..615067b41dde49 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1933,8 +1933,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); - /* call kvm_apic_set_id() to put apic into apic_map */ - kvm_apic_set_id(apic, kvm_apic_id(apic)); + /* put apic into apic_map */ + recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); apic_update_ppr(apic); From 23820d8cd981b59fb6823133ccaa17c67fc288b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 27 Apr 2016 18:10:08 +0200 Subject: [PATCH 7/9] KVM: x86: use proper format of APIC ID register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently always shift APIC ID as if APIC was in xAPIC mode. x2APIC mode wants to use more bits and storing a hardware-compabible value is the the sanest option. VMX can stop intercepting the APIC ID register then. KVM API to set the lapic expects that bottom 8 bits of APIC ID are in top 8 bits of APIC_ID register. Definite that x2APIC IDs are byte swapped to keep compatibility without new toggles. Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 6 +++++ arch/x86/kvm/lapic.c | 44 ++++++++++++++++++++++--------- arch/x86/kvm/lapic.h | 7 ++++- arch/x86/kvm/vmx.c | 4 --- arch/x86/kvm/x86.c | 2 ++ 5 files changed, 46 insertions(+), 17 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 07bcedc0ba0996..b1ddea38e6b973 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1583,6 +1583,11 @@ struct kvm_lapic_state { Reads the Local APIC registers and copies them into the input argument. The data format and layout are the same as documented in the architecture manual. +Note that the APIC ID is stored in APIC_ID register in big endian format. +This makes no difference for xAPIC APIC ID, which is still in the top 8 bits, +but x2APIC ID needs to be byteswapped. The reason is compatibility with KVM's +definition of x2APIC. (The hardware stores x2APIC ID as little endian.) + 4.58 KVM_SET_LAPIC @@ -1600,6 +1605,7 @@ struct kvm_lapic_state { Copies the input argument into the Local APIC registers. The data format and layout are the same as documented in the architecture manual. +See the note about APIC_ID register in KVM_GET_LAPIC. 4.59 KVM_IOEVENTFD diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 615067b41dde49..25702584d65c4f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -239,7 +239,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) } } -static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) +static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { apic_set_reg(apic, APIC_ID, id << 24); recalculate_apic_map(apic->vcpu->kvm); @@ -251,11 +251,11 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } -static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id) +static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); - apic_set_reg(apic, APIC_ID, id << 24); + apic_set_reg(apic, APIC_ID, id); apic_set_reg(apic, APIC_LDR, ldr); recalculate_apic_map(apic->vcpu->kvm); } @@ -1116,12 +1116,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) return 0; switch (offset) { - case APIC_ID: - if (apic_x2apic_mode(apic)) - val = kvm_apic_id(apic); - else - val = kvm_apic_id(apic) << 24; - break; case APIC_ARBPRI: apic_debug("Access APIC ARBPRI register which is for P6\n"); break; @@ -1400,7 +1394,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ if (!apic_x2apic_mode(apic)) - kvm_apic_set_id(apic, val >> 24); + kvm_apic_set_xapic_id(apic, val >> 24); else ret = 1; break; @@ -1671,8 +1665,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) if (value & X2APIC_ENABLE) { kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); - } else + } else { + kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); + } } apic->base_address = apic->vcpu->arch.apic_base & @@ -1703,7 +1699,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) hrtimer_cancel(&apic->lapic_timer.timer); if (!init_event) - kvm_apic_set_id(apic, vcpu->vcpu_id); + kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); kvm_apic_set_version(apic->vcpu); for (i = 0; i < APIC_LVT_NUM; i++) @@ -1924,6 +1920,30 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) return vector; } +static void __kvm_apic_state_fixup(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s, bool restore) +{ + if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 *id = (u32 *)(s->regs + APIC_ID); + if (restore) + *id = be32_to_cpu(*id); + else + *id = cpu_to_be32(*id); + } +} + +void kvm_apic_state_get_fixup(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) +{ + __kvm_apic_state_fixup(vcpu, s, false); +} + +void kvm_apic_state_set_fixup(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) +{ + __kvm_apic_state_fixup(vcpu, s, true); +} + void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index d818a36ce24e8f..435606dd916f6d 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -71,6 +71,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +void kvm_apic_state_get_fixup(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s); +void kvm_apic_state_set_fixup(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); @@ -169,7 +173,8 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) static inline u32 kvm_apic_id(struct kvm_lapic *apic) { - return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; + u32 id = kvm_apic_get_reg(apic, APIC_ID); + return apic_x2apic_mode(apic) ? id : id >> 24; } bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 35df8d757d1dbe..12beb83c61d696 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6337,10 +6337,6 @@ static __init int hardware_setup(void) for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); - /* According SDM, in x2apic mode, the whole id reg is used. - * But in KVM, it only use the highest eight bits. Need to - * intercept it */ - vmx_enable_intercept_msr_read_x2apic(0x802); /* TMCCT */ vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 86d1eb1c9d8b62..2a244624032050 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2769,6 +2769,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, kvm_x86_ops->sync_pir_to_irr(vcpu); memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); + kvm_apic_state_get_fixup(vcpu, s); return 0; } @@ -2776,6 +2777,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + kvm_apic_state_set_fixup(vcpu, s); kvm_apic_post_state_restore(vcpu, s); update_cr8_intercept(vcpu); From 629551dafb04e758d45ad8270e62082ac472d79c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 4 May 2016 18:48:28 +0200 Subject: [PATCH 8/9] KVM: x86: reset lapic base in kvm_lapic_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LAPIC is reset in xAPIC mode and the surrounding code expects that. KVM never resets after initialization, so this patch is just for sanity. Signed-off-by: Radim Krčmář --- arch/x86/kvm/lapic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 25702584d65c4f..3669ed8a79d359 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1698,8 +1698,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - if (!init_event) + if (!init_event) { + kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE); kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); + } kvm_apic_set_version(apic->vcpu); for (i = 0; i < APIC_LVT_NUM; i++) @@ -1838,9 +1841,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) * thinking that APIC satet has changed. */ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; - kvm_lapic_set_base(vcpu, - APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); - static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_lapic_reset(vcpu, false); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); From 39c6ec13dced7ad2d3dec3cea5442ddb3af6de80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 27 Apr 2016 18:10:32 +0200 Subject: [PATCH 9/9] KVM: bump MAX_VCPUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 288 is in high demand because of Knights Landing CPU. We cannot set the limit to 640k, because that would be wasting space. Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c93c4a7877ed72..c776b741b0db6f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #include #include -#define KVM_MAX_VCPUS 255 +#define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 160 #define KVM_USER_MEM_SLOTS 509 /* memory slots that are not exposed to userspace */