From 611e9709f1ea26653d8d3fbb39365d5bbaf91ae3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 10 Jul 2024 11:35:01 -0700 Subject: [PATCH 1/6] locking/atomic: Make test_and_*_bit() ordered on failure jira LE-1482 commit 415d832497098030241605c52ea83d4e2cfa7879 upstream-diff - Conflicts around use of arch_atomic vs atomic - there are no arch_atomic* ops in Rocky 8.x These operations are documented as always ordered in include/asm-generic/bitops/instrumented-atomic.h, and producer-consumer type use cases where one side needs to ensure a flag is left pending after some shared data was updated rely on this ordering, even in the failure case. This is the case with the workqueue code, which currently suffers from a reproducible ordering violation on Apple M1 platforms (which are notoriously out-of-order) that ends up causing the TTY layer to fail to deliver data to userspace properly under the right conditions. This change fixes that bug. Change the documentation to restrict the "no order on failure" story to the _lock() variant (for which it makes sense), and remove the early-exit from the generic implementation, which is what causes the missing barrier semantics in that case. Without this, the remaining atomic op is fully ordered (including on ARM64 LSE, as of recent versions of the architecture spec). Suggested-by: Linus Torvalds Cc: stable@vger.kernel.org Fixes: e986a0d6cb36 ("locking/atomics, asm-generic/bitops/atomic.h: Rewrite using atomic_*() APIs") Fixes: 61e02392d3c7 ("locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit()") Signed-off-by: Hector Martin Acked-by: Will Deacon Reviewed-by: Arnd Bergmann Signed-off-by: Linus Torvalds (cherry picked from commit 415d832497098030241605c52ea83d4e2cfa7879) Signed-off-by: Greg Rose Signed-off-by: Jonathan Maple --- Documentation/atomic_bitops.txt | 2 +- include/asm-generic/bitops/atomic.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index be70b32c95d91..bc3fac8e1db3a 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -59,7 +59,7 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_{}_bit() operations, + otherwise the above rules apply. In the case of test_and_set_bit_lock(), if the bit in memory is unchanged by the operation then it is deemed to have failed. diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index dd90c9792909d..18399e3759a24 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -35,9 +35,6 @@ static inline int test_and_set_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (READ_ONCE(*p) & mask) - return 1; - old = atomic_long_fetch_or(mask, (atomic_long_t *)p); return !!(old & mask); } @@ -48,9 +45,6 @@ static inline int test_and_clear_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (!(READ_ONCE(*p) & mask)) - return 0; - old = atomic_long_fetch_andnot(mask, (atomic_long_t *)p); return !!(old & mask); } From 8d988b91fcb5f5af80112d4daf731e4e2eea4d29 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 11 Oct 2023 04:01:10 +0000 Subject: [PATCH 2/6] x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach jira roc-2673 commit fbf6449f84bf5e4ad09f2c09ee70ed7d629b5ff6 Instead of setting x86_virt_bits to a possibly-correct value and then correcting it later, do all the necessary checks before setting it. At this point, the #VC handler references boot_cpu_data.x86_virt_bits, and in the previous version, it would be triggered by the CPUIDs between the point at which it is set to 48 and when it is set to the correct value. Suggested-by: Dave Hansen Signed-off-by: Adam Dunlap Signed-off-by: Ingo Molnar Tested-by: Jacob Xu Link: https://lore.kernel.org/r/20230912002703.3924521-3-acdunlap@google.com Signed-off-by: Ronnie Sahlberg Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dbb5dee28ca27..40b4861cf9c79 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1020,17 +1020,32 @@ void get_cpu_cap(struct cpuinfo_x86 *c) static void get_cpu_address_sizes(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; + bool vp_bits_from_cpuid = true; - if (c->extended_cpuid_level >= 0x80000008) { + if (!cpu_has(c, X86_FEATURE_CPUID) || + (c->extended_cpuid_level < 0x80000008)) + vp_bits_from_cpuid = false; + + if (vp_bits_from_cpuid) { cpuid(0x80000008, &eax, &ebx, &ecx, &edx); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + } else { + if (IS_ENABLED(CONFIG_X86_64)) { + c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; + } else { + c->x86_clflush_size = 32; + c->x86_virt_bits = 32; + c->x86_phys_bits = 32; + + if (cpu_has(c, X86_FEATURE_PAE) || + cpu_has(c, X86_FEATURE_PSE36)) + c->x86_phys_bits = 36; + } } -#ifdef CONFIG_X86_32 - else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) - c->x86_phys_bits = 36; -#endif c->x86_cache_bits = c->x86_phys_bits; } @@ -1468,15 +1483,6 @@ static void __init cpu_parse_early_param(void) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; - c->x86_phys_bits = 36; - c->x86_virt_bits = 48; -#else - c->x86_clflush_size = 32; - c->x86_phys_bits = 32; - c->x86_virt_bits = 32; -#endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof(c->x86_capability)); @@ -1488,7 +1494,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_vendor(c); get_cpu_cap(c); get_model_name(c); /* RHEL8: get model name for unsupported check */ - get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); cpu_parse_early_param(); @@ -1505,6 +1510,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_CPUID); } + get_cpu_address_sizes(c); + setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); From 8b3ed9fcd03929fadb3b844cf74c7b3f1e41cac6 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 11 Oct 2023 04:04:08 +0000 Subject: [PATCH 3/6] x86/boot: Move x86_cache_alignment initialization to correct spot jira roc-2673 commit 3e32552652917f10c0aa8ac75cdc8f0b8d257dec c->x86_cache_alignment is initialized from c->x86_clflush_size. However, commit fbf6449f84bf moved c->x86_clflush_size initialization to later in boot without moving the c->x86_cache_alignment assignment: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") This presumably left c->x86_cache_alignment set to zero for longer than it should be. The result was an oops on 32-bit kernels while accessing a pointer at 0x20. The 0x20 came from accessing a structure member at offset 0x10 (buffer->cpumask) from a ZERO_SIZE_PTR=0x10. kmalloc() can evidently return ZERO_SIZE_PTR when it's given 0 as its alignment requirement. Move the c->x86_cache_alignment initialization to be after c->x86_clflush_size has an actual value. Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20231002220045.1014760-1-dave.hansen@linux.intel.com (cherry picked from commit 3e32552652917f10c0aa8ac75cdc8f0b8d257dec) Signed-off-by: Ronnie Sahlberg Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 40b4861cf9c79..2ad285f913633 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1047,6 +1047,7 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) } } c->x86_cache_bits = c->x86_phys_bits; + c->x86_cache_alignment = c->x86_clflush_size; } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -1483,8 +1484,6 @@ static void __init cpu_parse_early_param(void) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof(c->x86_capability)); c->extended_cpuid_level = 0; From 8249c75c8fe906e0c1b1a274735028854780527a Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 26 Dec 2024 14:34:31 -0500 Subject: [PATCH 4/6] x86/cpu: Allow reducing x86_phys_bits during early_identify_cpu() jira LE-2183 bug-fix x86/sev-es: Set x86_virt_bits commit-author Paolo Bonzini commit 9a458198eba98b7207669a166e64d04b04cb651b In commit fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach"), the initialization of c->x86_phys_bits was moved after this_cpu->c_early_init(c). This is incorrect because early_init_amd() expected to be able to reduce the value according to the contents of CPUID leaf 0x8000001f. Fortunately, the bug was negated by init_amd()'s call to early_init_amd(), which does reduce x86_phys_bits in the end. However, this is very late in the boot process and, most notably, the wrong value is used for x86_phys_bits when setting up MTRRs. To fix this, call get_cpu_address_sizes() as soon as X86_FEATURE_CPUID is set/cleared, and c->extended_cpuid_level is retrieved. Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Signed-off-by: Paolo Bonzini Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240131230902.1867092-2-pbonzini%40redhat.com (cherry picked from commit 9a458198eba98b7207669a166e64d04b04cb651b) Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2ad285f913633..e10abdd903bf3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1494,6 +1494,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); get_model_name(c); /* RHEL8: get model name for unsupported check */ setup_force_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); cpu_parse_early_param(); if (this_cpu->c_early_init) @@ -1507,10 +1508,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } else { identify_cpu_without_cpuid(c); setup_clear_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); } - get_cpu_address_sizes(c); - setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); From d612b2e96a0ecd5b71f22e1ae53e63ec8566ad5f Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 26 Dec 2024 14:44:56 -0500 Subject: [PATCH 5/6] x86/cpu: Get rid of an unnecessary local variable in get_cpu_address_sizes() jira LE-2183 bug-fix-prereq x86/sev-es: Set x86_virt_bits commit-author Borislav Petkov (AMD) commit 95bfb35269b2e85cff0dd2c957b2d42ebf95ae5f Drop 'vp_bits_from_cpuid' as it is not really needed. No functional changes. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240316120706.4352-1-bp@alien8.de (cherry picked from commit 95bfb35269b2e85cff0dd2c957b2d42ebf95ae5f) Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e10abdd903bf3..1a4d0b20d5b7f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1020,18 +1020,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) static void get_cpu_address_sizes(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - bool vp_bits_from_cpuid = true; if (!cpu_has(c, X86_FEATURE_CPUID) || - (c->extended_cpuid_level < 0x80000008)) - vp_bits_from_cpuid = false; - - if (vp_bits_from_cpuid) { - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } else { + (c->extended_cpuid_level < 0x80000008)) { if (IS_ENABLED(CONFIG_X86_64)) { c->x86_clflush_size = 64; c->x86_phys_bits = 36; @@ -1045,7 +1036,13 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) cpu_has(c, X86_FEATURE_PSE36)) c->x86_phys_bits = 36; } + } else { + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; } + c->x86_cache_bits = c->x86_phys_bits; c->x86_cache_alignment = c->x86_clflush_size; } From e913c4da19356a1563ebe2d2a7a08445c25d4dc0 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 26 Dec 2024 14:45:08 -0500 Subject: [PATCH 6/6] x86/cpu: Provide default cache line size if not enumerated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira LE-2183 bug-fix x86/sev-es: Set x86_virt_bits commit-author Dave Hansen commit 2a38e4ca302280fdcce370ba2bee79bac16c4587 tl;dr: CPUs with CPUID.80000008H but without CPUID.01H:EDX[CLFSH] will end up reporting cache_line_size()==0 and bad things happen. Fill in a default on those to avoid the problem. Long Story: The kernel dies a horrible death if c->x86_cache_alignment (aka. cache_line_size() is 0. Normally, this value is populated from c->x86_clflush_size. Right now the code is set up to get c->x86_clflush_size from two places. First, modern CPUs get it from CPUID. Old CPUs that don't have leaf 0x80000008 (or CPUID at all) just get some sane defaults from the kernel in get_cpu_address_sizes(). The vast majority of CPUs that have leaf 0x80000008 also get ->x86_clflush_size from CPUID. But there are oddballs. Intel Quark CPUs[1] and others[2] have leaf 0x80000008 but don't set CPUID.01H:EDX[CLFSH], so they skip over filling in ->x86_clflush_size: cpuid(0x00000001, &tfms, &misc, &junk, &cap0); if (cap0 & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; So they: land in get_cpu_address_sizes() and see that CPUID has level 0x80000008 and jump into the side of the if() that does not fill in c->x86_clflush_size. That assigns a 0 to c->x86_cache_alignment, and hilarity ensues in code like: buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), GFP_KERNEL); To fix this, always provide a sane value for ->x86_clflush_size. Big thanks to Andy Shevchenko for finding and reporting this and also providing a first pass at a fix. But his fix was only partial and only worked on the Quark CPUs. It would not, for instance, have worked on the QEMU config. 1. https://raw.githubusercontent.com/InstLatx64/InstLatx64/master/GenuineIntel/GenuineIntel0000590_Clanton_03_CPUID.txt 2. You can also get this behavior if you use "-cpu 486,+clzero" in QEMU. [ dhansen: remove 'vp_bits_from_cpuid' reference in changelog because bpetkov brutally murdered it recently. ] Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Reported-by: Andy Shevchenko Signed-off-by: Dave Hansen Tested-by: Andy Shevchenko Tested-by: Jörn Heusipp Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240516173928.3960193-1-andriy.shevchenko@linux.intel.com/ Link: https://lore.kernel.org/lkml/5e31cad3-ad4d-493e-ab07-724cfbfaba44@heusipp.de/ Link: https://lore.kernel.org/all/20240517200534.8EC5F33E%40davehans-spike.ostc.intel.com (cherry picked from commit 2a38e4ca302280fdcce370ba2bee79bac16c4587) Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1a4d0b20d5b7f..e3e7b06692907 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1041,6 +1041,10 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + + /* Provide a sane default if not enumerated: */ + if (!c->x86_clflush_size) + c->x86_clflush_size = 32; } c->x86_cache_bits = c->x86_phys_bits;