diff --git a/9998-fix-directed-eoi.patch b/9998-fix-directed-eoi.patch new file mode 100644 index 0000000..de8fd4b --- /dev/null +++ b/9998-fix-directed-eoi.patch @@ -0,0 +1,189 @@ +From: Roger Pau Monne +To: xen-devel@lists.xenproject.org +Cc: Roger Pau Monne , + Jan Beulich , + Andrew Cooper , + Willi Junga , + David Woodhouse +Subject: [PATCH v4] x86/io-apic: fix directed EOI when using AMD-Vi interrupt remapping +Date: Thu, 31 Oct 2024 09:57:13 +0100 +Message-ID: <20241031085713.6156-1-roger.pau@citrix.com> +X-Mailer: git-send-email 2.46.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Status: RO +Content-Length: 7061 + +When using AMD-Vi interrupt remapping the vector field in the IO-APIC RTE is +repurposed to contain part of the offset into the remapping table. Previous to +2ca9fbd739b8 Xen had logic so that the offset into the interrupt remapping +table would match the vector. Such logic was mandatory for end of interrupt to +work, since the vector field (even when not containing a vector) is used by the +IO-APIC to find for which pin the EOI must be performed. + +A simple solution wold be to read the IO-APIC RTE each time an EOI is to be +performed, so the raw value of the vector field can be obtained. However +that's likely to perform poorly. Instead introduce a cache to store the +EOI handles when using interrupt remapping, so that the IO-APIC driver can +translate pins into EOI handles without having to read the IO-APIC RTE entry. +Note that to simplify the logic such cache is used unconditionally when +interrupt remapping is enabled, even if strictly it would only be required +for AMD-Vi. + +Reported-by: Willi Junga +Suggested-by: David Woodhouse +Fixes: 2ca9fbd739b8 ('AMD IOMMU: allocate IRTE entries instead of using a static mapping') +Signed-off-by: Roger Pau Monné +--- +Changes since v3: + - Remove the usage of a sentinel value (again). + - Allocate an initialize the cache in enable_IO_APIC(). + - Fix MISRA compliance. + - Use xvmalloc. + +Changes since v2: + - Restore sentinel value. + +Changes since v1: + - s/apic_pin_eoi/io_apic_pin_eoi/. + - Expand comment about io_apic_pin_eoi usage and layout. + - Use uint8_t instead of unsigned int as array type. + - Do not use a sentinel value. +--- + xen/arch/x86/io_apic.c | 76 +++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 71 insertions(+), 5 deletions(-) + +diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c +index e40d2f7dbd75..4e5ee2b890a0 100644 +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -71,6 +72,24 @@ static int apic_pin_2_gsi_irq(int apic, int pin); + + static vmask_t *__read_mostly vector_map[MAX_IO_APICS]; + ++/* ++ * Store the EOI handle when using interrupt remapping. ++ * ++ * If using AMD-Vi interrupt remapping the IO-APIC redirection entry remapped ++ * format repurposes the vector field to store the offset into the Interrupt ++ * Remap table. This breaks directed EOI, as the CPU vector no longer matches ++ * the contents of the RTE vector field. Add a translation cache so that ++ * directed EOI uses the value in the RTE vector field when interrupt remapping ++ * is enabled. ++ * ++ * Intel VT-d Xen code still stores the CPU vector in the RTE vector field when ++ * using the remapped format, but use the translation cache uniformly in order ++ * to avoid extra logic to differentiate between VT-d and AMD-Vi. ++ * ++ * The matrix is accessed as [#io-apic][#pin]. ++ */ ++static uint8_t **io_apic_pin_eoi; ++ + static void share_vector_maps(unsigned int src, unsigned int dst) + { + unsigned int pin; +@@ -273,6 +292,17 @@ void __ioapic_write_entry( + { + __io_apic_write(apic, 0x11 + 2 * pin, eu.w2); + __io_apic_write(apic, 0x10 + 2 * pin, eu.w1); ++ /* ++ * Might be called before io_apic_pin_eoi is allocated. Entry will be ++ * initialized to the RTE value once the cache is allocated. ++ * ++ * The vector field is only cached for raw RTE writes when using IR. ++ * In that case the vector field might have been repurposed to store ++ * something different than the CPU vector, and hence need to be cached ++ * for performing EOI. ++ */ ++ if ( io_apic_pin_eoi ) ++ io_apic_pin_eoi[apic][pin] = e.vector; + } + else + iommu_update_ire_from_apic(apic, pin, e.raw); +@@ -288,18 +318,36 @@ static void ioapic_write_entry( + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +-/* EOI an IO-APIC entry. Vector may be -1, indicating that it should be ++/* ++ * EOI an IO-APIC entry. Vector may be -1, indicating that it should be + * worked out using the pin. This function expects that the ioapic_lock is + * being held, and interrupts are disabled (or there is a good reason not + * to), and that if both pin and vector are passed, that they refer to the +- * same redirection entry in the IO-APIC. */ ++ * same redirection entry in the IO-APIC. ++ * ++ * If using Interrupt Remapping the vector is always ignored because the RTE ++ * remapping format might have repurposed the vector field and a cached value ++ * of the EOI handle to use is obtained based on the provided apic and pin ++ * values. ++ */ + static void __io_apic_eoi(unsigned int apic, unsigned int vector, unsigned int pin) + { + /* Prefer the use of the EOI register if available */ + if ( ioapic_has_eoi_reg(apic) ) + { +- /* If vector is unknown, read it from the IO-APIC */ +- if ( vector == IRQ_VECTOR_UNASSIGNED ) ++ if ( io_apic_pin_eoi ) ++ /* ++ * If the EOI handle is cached use it. When using AMD-Vi IR the CPU ++ * vector no longer matches the vector field in the RTE, because ++ * the RTE remapping format repurposes the field. ++ * ++ * The value in the RTE vector field must always be used to signal ++ * which RTE to EOI, hence use the cached value which always ++ * mirrors the contents of the raw RTE vector field. ++ */ ++ vector = io_apic_pin_eoi[apic][pin]; ++ else if ( vector == IRQ_VECTOR_UNASSIGNED ) ++ /* If vector is unknown, read it from the IO-APIC */ + vector = __ioapic_read_entry(apic, pin, true).vector; + + *(IO_APIC_BASE(apic)+16) = vector; +@@ -1317,12 +1365,30 @@ void __init enable_IO_APIC(void) + vector_map[apic] = vector_map[0]; + } + ++ if ( iommu_intremap != iommu_intremap_off ) ++ { ++ io_apic_pin_eoi = xmalloc_array(typeof(*io_apic_pin_eoi), nr_ioapics); ++ BUG_ON(!io_apic_pin_eoi); ++ } ++ + for(apic = 0; apic < nr_ioapics; apic++) { + int pin; +- /* See if any of the pins is in ExtINT mode */ ++ ++ if ( io_apic_pin_eoi ) ++ { ++ io_apic_pin_eoi[apic] = xmalloc_array(typeof(**io_apic_pin_eoi), ++ nr_ioapic_entries[apic]); ++ BUG_ON(!io_apic_pin_eoi[apic]); ++ } ++ ++ /* See if any of the pins is in ExtINT mode and cache EOI handle */ + for (pin = 0; pin < nr_ioapic_entries[apic]; pin++) { + struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin, false); + ++ if ( io_apic_pin_eoi ) ++ io_apic_pin_eoi[apic][pin] = ++ ioapic_read_entry(apic, pin, true).vector; ++ + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ +-- +2.46.0 + + + diff --git a/9999-amd-iommu-test.patch b/9999-amd-iommu-test.patch new file mode 100644 index 0000000..5aed0a6 --- /dev/null +++ b/9999-amd-iommu-test.patch @@ -0,0 +1,38 @@ +diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c +index 3f5508eba049..edccb814921b 100644 +--- a/xen/drivers/passthrough/amd/iommu_acpi.c ++++ b/xen/drivers/passthrough/amd/iommu_acpi.c +@@ -248,6 +248,33 @@ static int __init register_range_for_device( + iommu = find_iommu_for_device(seg, bdf); + if ( !iommu ) + { ++ const pci_sbdf_t sbdf = PCI_SBDF(seg, bdf); ++ ++ if ( !pci_device_detect(sbdf.seg, sbdf.bus, sbdf.dev, sbdf.fn) ) ++ { ++ AMD_IOMMU_WARN("IVMD: non present %pp device not covered by IVRS\n", ++ &sbdf); ++ return 0; ++ } ++ ++ for_each_amd_iommu(iommu) ++ { ++ if ( iommu->seg == seg && iommu->bdf == bdf ) ++ { ++ AMD_IOMMU_WARN("IVMD: IOMMU %pp not covered by IVRS\n", ++ &sbdf); ++ return 0; ++ } ++ } ++ ++ if ( pdev_type(sbdf.seg, sbdf.bus, sbdf.devfn) == ++ DEV_TYPE_PCI_HOST_BRIDGE ) ++ { ++ AMD_IOMMU_WARN("IVMD: PCI Host bridge %pp not covered by IVRS\n", ++ &sbdf); ++ return 0; ++ } ++ + AMD_IOMMU_ERROR("IVMD: no IOMMU for Dev_Id %#x\n", bdf); + return -ENODEV; + } diff --git a/config b/config index 4bebb45..fd668b1 100644 --- a/config +++ b/config @@ -137,7 +137,7 @@ CONFIG_ARCH_SUPPORTS_INT128=y # # Debugging Options # -# CONFIG_DEBUG is not set +CONFIG_DEBUG=y # CONFIG_CRASH_DEBUG is not set CONFIG_GDBSX=y CONFIG_DEBUG_INFO=y @@ -146,7 +146,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_LOCK_PROFILE is not set # CONFIG_DEBUG_LOCKS is not set # CONFIG_PERF_COUNTERS is not set -# CONFIG_VERBOSE_DEBUG is not set +CONFIG_VERBOSE_DEBUG=y CONFIG_SCRUB_DEBUG=y # CONFIG_UBSAN is not set # CONFIG_DEBUG_TRACE is not set diff --git a/xen.spec.in b/xen.spec.in index 7e575a5..e77ed2f 100644 --- a/xen.spec.in +++ b/xen.spec.in @@ -170,6 +170,9 @@ Patch1200: 1200-hypercall-XENMEM_get_mfn_from_pfn.patch Patch1201: 1201-patch-gvt-hvmloader.patch.patch Patch1202: 1202-libxl-Add-partially-Intel-GVT-g-support-xengt-device.patch +Patch9998: 9998-fix-directed-eoi.patch +Patch9999: 9999-amd-iommu-test.patch + %if %build_qemutrad BuildRequires: libidn-devel zlib-devel SDL-devel curl-devel BuildRequires: libX11-devel gtk2-devel libaio-devel