When auxiliary_device_add() returns error and then calls
auxiliary_device_uninit(), the callback function
gp_auxiliary_device_release() calls kfree() to free memory. Do not
call kfree() again in the error handling path.
Fix this by skipping the redundant kfree().
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 393fc2f5948f ("misc: microchip: pci1xxxx: load auxiliary bus driver for the PIO function in the multi-function endpoint of pci1xxxx device.")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- modified the patch omitted.
---
drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
index 32af2b14ff34..de75d89ef53e 100644
--- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
+++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c
@@ -111,6 +111,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id
err_aux_dev_add_1:
auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev);
+ goto err_aux_dev_add_0;
err_aux_dev_init_1:
ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[1]->aux_dev.id);
@@ -120,6 +121,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id
err_aux_dev_add_0:
auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev);
+ goto err_ret;
err_aux_dev_init_0:
ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[0]->aux_dev.id);
@@ -127,6 +129,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id
err_ida_alloc_0:
kfree(aux_bus->aux_device_wrapper[0]);
+err_ret:
return retval;
}
--
2.25.1
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x eaebeb93922ca6ab0dd92027b73d0112701706ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025010627-retry-detached-672f@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eaebeb93922ca6ab0dd92027b73d0112701706ef Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed(a)google.com>
Date: Thu, 19 Dec 2024 21:24:37 +0000
Subject: [PATCH] mm: zswap: fix race between [de]compression and CPU hotunplug
In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the
current CPU at the beginning of the operation is retrieved and used
throughout. However, since neither preemption nor migration are disabled,
it is possible that the operation continues on a different CPU.
If the original CPU is hotunplugged while the acomp_ctx is still in use,
we run into a UAF bug as the resources attached to the acomp_ctx are freed
during hotunplug in zswap_cpu_comp_dead().
The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to use
crypto_acomp API for hardware acceleration") when the switch to the
crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was
retrieved using get_cpu_ptr() which disables preemption and makes sure the
CPU cannot go away from under us. Preemption cannot be disabled with the
crypto_acomp API as a sleepable context is needed.
Commit 8ba2f844f050 ("mm/zswap: change per-cpu mutex and buffer to
per-acomp_ctx") increased the UAF surface area by making the per-CPU
buffers dynamic, adding yet another resource that can be freed from under
zswap compression/decompression by CPU hotunplug.
There are a few ways to fix this:
(a) Add a refcount for acomp_ctx.
(b) Disable migration while using the per-CPU acomp_ctx.
(c) Disable CPU hotunplug while using the per-CPU acomp_ctx by holding
the CPUs read lock.
Implement (c) since it's simpler than (a), and (b) involves using
migrate_disable() which is apparently undesired (see huge comment in
include/linux/preempt.h).
Link: https://lkml.kernel.org/r/20241219212437.2714151-1-yosryahmed@google.com
Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
Reported-by: Johannes Weiner <hannes(a)cmpxchg.org>
Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/
Reported-by: Sam Sun <samsun1006219(a)gmail.com>
Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tP…
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Acked-by: Barry Song <baohua(a)kernel.org>
Reviewed-by: Nhat Pham <nphamcs(a)gmail.com>
Cc: Vitaly Wool <vitalywool(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/zswap.c b/mm/zswap.c
index f6316b66fb23..5a27af8d86ea 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -880,6 +880,18 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
return 0;
}
+/* Prevent CPU hotplug from freeing up the per-CPU acomp_ctx resources */
+static struct crypto_acomp_ctx *acomp_ctx_get_cpu(struct crypto_acomp_ctx __percpu *acomp_ctx)
+{
+ cpus_read_lock();
+ return raw_cpu_ptr(acomp_ctx);
+}
+
+static void acomp_ctx_put_cpu(void)
+{
+ cpus_read_unlock();
+}
+
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
struct zswap_pool *pool)
{
@@ -893,8 +905,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
- acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
-
+ acomp_ctx = acomp_ctx_get_cpu(pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
dst = acomp_ctx->buffer;
@@ -950,6 +961,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
zswap_reject_alloc_fail++;
mutex_unlock(&acomp_ctx->mutex);
+ acomp_ctx_put_cpu();
return comp_ret == 0 && alloc_ret == 0;
}
@@ -960,7 +972,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
struct crypto_acomp_ctx *acomp_ctx;
u8 *src;
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+ acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
@@ -990,6 +1002,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
+ acomp_ctx_put_cpu();
}
/*********************************
In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the
current CPU at the beginning of the operation is retrieved and used
throughout. However, since neither preemption nor migration are
disabled, it is possible that the operation continues on a different
CPU.
If the original CPU is hotunplugged while the acomp_ctx is still in use,
we run into a UAF bug as the resources attached to the acomp_ctx are
freed during hotunplug in zswap_cpu_comp_dead().
The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to
use crypto_acomp API for hardware acceleration") when the switch to the
crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was
retrieved using get_cpu_ptr() which disables preemption and makes sure
the CPU cannot go away from under us. Preemption cannot be disabled with
the crypto_acomp API as a sleepable context is needed.
Commit 8ba2f844f050 ("mm/zswap: change per-cpu mutex and buffer to
per-acomp_ctx") increased the UAF surface area by making the per-CPU
buffers dynamic, adding yet another resource that can be freed from
under zswap compression/decompression by CPU hotunplug.
There are a few ways to fix this:
(a) Add a refcount for acomp_ctx.
(b) Disable migration while using the per-CPU acomp_ctx.
(c) Disable CPU hotunplug while using the per-CPU acomp_ctx by holding
the CPUs read lock.
Implement (c) since it's simpler than (a), and (b) involves using
migrate_disable() which is apparently undesired (see huge comment in
include/linux/preempt.h).
Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
Reported-by: Johannes Weiner <hannes(a)cmpxchg.org>
Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/
Reported-by: Sam Sun <samsun1006219(a)gmail.com>
Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tP…
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
---
mm/zswap.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index f6316b66fb236..5a27af8d86ea9 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -880,6 +880,18 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
return 0;
}
+/* Prevent CPU hotplug from freeing up the per-CPU acomp_ctx resources */
+static struct crypto_acomp_ctx *acomp_ctx_get_cpu(struct crypto_acomp_ctx __percpu *acomp_ctx)
+{
+ cpus_read_lock();
+ return raw_cpu_ptr(acomp_ctx);
+}
+
+static void acomp_ctx_put_cpu(void)
+{
+ cpus_read_unlock();
+}
+
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
struct zswap_pool *pool)
{
@@ -893,8 +905,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
- acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
-
+ acomp_ctx = acomp_ctx_get_cpu(pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
dst = acomp_ctx->buffer;
@@ -950,6 +961,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
zswap_reject_alloc_fail++;
mutex_unlock(&acomp_ctx->mutex);
+ acomp_ctx_put_cpu();
return comp_ret == 0 && alloc_ret == 0;
}
@@ -960,7 +972,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
struct crypto_acomp_ctx *acomp_ctx;
u8 *src;
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+ acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
@@ -990,6 +1002,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
+ acomp_ctx_put_cpu();
}
/*********************************
--
2.47.1.613.gc27f4b7a9f-goog
The code for detecting CPUs that are vulnerable to Spectre BHB was
based on a hardcoded list of CPU IDs that were known to be affected.
Unfortunately, the list mostly only contained the IDs of standard ARM
cores. The IDs for many cores that are minor variants of the standard
ARM cores (like many Qualcomm Kyro CPUs) weren't listed. This led the
code to assume that those variants were not affected.
Flip the code on its head and instead assume that a core is vulnerable
if it doesn't have CSV2_3 but is unrecognized as being safe. This
involves creating a "Spectre BHB safe" list.
As of right now, the only CPU IDs added to the "Spectre BHB safe" list
are ARM Cortex A35, A53, A55, A510, and A520. This list was created by
looking for cores that weren't listed in ARM's list [1] as per review
feedback on v2 of this patch [2].
NOTE: this patch will not actually _mitigate_ anyone, it will simply
cause them to report themselves as vulnerable. If any cores in the
system are reported as vulnerable but not mitigated then the whole
system will be reported as vulnerable though the system will attempt
to mitigate with the information it has about the known cores.
[1] https://developer.arm.com/Arm%20Security%20Center/Spectre-BHB
[2] https://lore.kernel.org/r/20241219175128.GA25477@willie-the-truck
Fixes: 558c303c9734 ("arm64: Mitigate spectre style branch history side channels")
Cc: stable(a)vger.kernel.org
Signed-off-by: Douglas Anderson <dianders(a)chromium.org>
---
Changes in v3:
- Don't guess the mitigation; just report unknown cores as vulnerable.
- Restructure the code since is_spectre_bhb_affected() defaults to true
Changes in v2:
- New
arch/arm64/include/asm/spectre.h | 1 -
arch/arm64/kernel/proton-pack.c | 144 +++++++++++++++++--------------
2 files changed, 77 insertions(+), 68 deletions(-)
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index 0c4d9045c31f..f1524cdeacf1 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -97,7 +97,6 @@ enum mitigation_state arm64_get_meltdown_state(void);
enum mitigation_state arm64_get_spectre_bhb_state(void);
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
-u8 spectre_bhb_loop_affected(int scope);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index da53722f95d4..06e04c9e6480 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -845,52 +845,68 @@ static unsigned long system_bhb_mitigations;
* This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
* SCOPE_SYSTEM call will give the right answer.
*/
-u8 spectre_bhb_loop_affected(int scope)
+static bool is_spectre_bhb_safe(int scope)
+{
+ static const struct midr_range spectre_bhb_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A520),
+ {},
+ };
+ static bool all_safe = true;
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return all_safe;
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_safe_list))
+ return true;
+
+ all_safe = false;
+
+ return false;
+}
+
+static u8 spectre_bhb_loop_affected(void)
{
u8 k = 0;
- static u8 max_bhb_k;
-
- if (scope == SCOPE_LOCAL_CPU) {
- static const struct midr_range spectre_bhb_k32_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
- {},
- };
- static const struct midr_range spectre_bhb_k24_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
- {},
- };
- static const struct midr_range spectre_bhb_k11_list[] = {
- MIDR_ALL_VERSIONS(MIDR_AMPERE1),
- {},
- };
- static const struct midr_range spectre_bhb_k8_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- {},
- };
-
- if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
- k = 32;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
- k = 24;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
- k = 11;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
- k = 8;
-
- max_bhb_k = max(max_bhb_k, k);
- } else {
- k = max_bhb_k;
- }
+
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k11_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
+ k = 11;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+ k = 8;
return k;
}
@@ -916,9 +932,8 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
}
}
-static bool is_spectre_bhb_fw_affected(int scope)
+static bool is_spectre_bhb_fw_affected(void)
{
- static bool system_affected;
enum mitigation_state fw_state;
bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
@@ -929,16 +944,8 @@ static bool is_spectre_bhb_fw_affected(int scope)
bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
spectre_bhb_firmware_mitigated_list);
- if (scope != SCOPE_LOCAL_CPU)
- return system_affected;
-
fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
- system_affected = true;
- return true;
- }
-
- return false;
+ return cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED);
}
static bool supports_ecbhb(int scope)
@@ -954,6 +961,8 @@ static bool supports_ecbhb(int scope)
ID_AA64MMFR1_EL1_ECBHB_SHIFT);
}
+static u8 max_bhb_k;
+
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -962,16 +971,18 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
if (supports_csv2p3(scope))
return false;
- if (supports_clearbhb(scope))
- return true;
-
- if (spectre_bhb_loop_affected(scope))
- return true;
+ if (is_spectre_bhb_safe(scope))
+ return false;
- if (is_spectre_bhb_fw_affected(scope))
- return true;
+ /*
+ * At this point the core isn't known to be "safe" so we're going to
+ * assume it's vulnerable. We still need to update `max_bhb_k` though,
+ * but only if we aren't mitigating with clearbhb though.
+ */
+ if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU))
+ max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected());
- return false;
+ return true;
}
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
@@ -1028,7 +1039,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
state = SPECTRE_MITIGATED;
set_bit(BHB_INSN, &system_bhb_mitigations);
- } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ } else if (spectre_bhb_loop_affected()) {
/*
* Ensure KVM uses the indirect vector which will have the
* branchy-loop added. A57/A72-r0 will already have selected
@@ -1041,7 +1052,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
state = SPECTRE_MITIGATED;
set_bit(BHB_LOOP, &system_bhb_mitigations);
- } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
+ } else if (is_spectre_bhb_fw_affected()) {
fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
if (fw_state == SPECTRE_MITIGATED) {
/*
@@ -1100,7 +1111,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
{
u8 rd;
u32 insn;
- u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
BUG_ON(nr_inst != 1); /* MOV -> MOV */
@@ -1109,7 +1119,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
insn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
- insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr++ = cpu_to_le32(insn);
--
2.47.1.613.gc27f4b7a9f-goog
This partially reverts commit fe4f5d4b6616 ("drm/xe: Clean up VM / exec
queue file lock usage."). While it's desired to have the mutex to
protect only the reference to the exec queue, getting and dropping each
mutex and then later getting the GPU timestamp, doesn't produce a
correct result: it introduces multiple opportunities for the task to be
scheduled out and thus wrecking havoc the deltas reported to userspace.
Also, to better correlate the timestamp from the exec queues with the
GPU, disable preemption so they can be updated without allowing the task
to be scheduled out. We leave interrupts enabled as that shouldn't be
enough disturbance for the deltas to matter to userspace.
Test scenario:
* IGT'S `xe_drm_fdinfo --r --r utilization-single-full-load`
* Platform: LNL, where CI occasionally reports failures
* `stress -c $(nproc)` running in parallel to disturb the
system
This brings a first failure from "after ~150 executions" to "never
occurs after 1000 attempts".
Cc: stable(a)vger.kernel.org # v6.11+
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3512
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
---
drivers/gpu/drm/xe/xe_drm_client.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 298a587da7f17..e307b4d6bab5a 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -338,15 +338,12 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
- xa_for_each(&xef->exec_queue.xa, i, q) {
- xe_exec_queue_get(q);
- mutex_unlock(&xef->exec_queue.lock);
+ preempt_disable();
+ xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q);
- mutex_lock(&xef->exec_queue.lock);
- xe_exec_queue_put(q);
- }
+ preempt_enable();
mutex_unlock(&xef->exec_queue.lock);
gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
--
2.47.0
From: Sean Christopherson <seanjc(a)google.com>
[ Upstream commit 4b7c3f6d04bd53f2e5b228b6821fb8f5d1ba3071 ]
Ignore the userspace provided x2APIC ID when fixing up APIC state for
KVM_SET_LAPIC, i.e. make the x2APIC fully readonly in KVM. Commit
a92e2543d6a8 ("KVM: x86: use hardware-compatible format for APIC ID
register"), which added the fixup, didn't intend to allow userspace to
modify the x2APIC ID. In fact, that commit is when KVM first started
treating the x2APIC ID as readonly, apparently to fix some race:
static inline u32 kvm_apic_id(struct kvm_lapic *apic)
{
- return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+ /* To avoid a race between apic_base and following APIC_ID update when
+ * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
+ */
+ if (apic_x2apic_mode(apic))
+ return apic->vcpu->vcpu_id;
+
+ return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
}
Furthermore, KVM doesn't support delivering interrupts to vCPUs with a
modified x2APIC ID, but KVM *does* return the modified value on a guest
RDMSR and for KVM_GET_LAPIC. I.e. no remotely sane setup can actually
work with a modified x2APIC ID.
Making the x2APIC ID fully readonly fixes a WARN in KVM's optimized map
calculation, which expects the LDR to align with the x2APIC ID.
WARNING: CPU: 2 PID: 958 at arch/x86/kvm/lapic.c:331 kvm_recalculate_apic_map+0x609/0xa00 [kvm]
CPU: 2 PID: 958 Comm: recalc_apic_map Not tainted 6.4.0-rc3-vanilla+ #35
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.2-1-1 04/01/2014
RIP: 0010:kvm_recalculate_apic_map+0x609/0xa00 [kvm]
Call Trace:
<TASK>
kvm_apic_set_state+0x1cf/0x5b0 [kvm]
kvm_arch_vcpu_ioctl+0x1806/0x2100 [kvm]
kvm_vcpu_ioctl+0x663/0x8a0 [kvm]
__x64_sys_ioctl+0xb8/0xf0
do_syscall_64+0x56/0x80
entry_SYSCALL_64_after_hwframe+0x46/0xb0
RIP: 0033:0x7fade8b9dd6f
Unfortunately, the WARN can still trigger for other CPUs than the current
one by racing against KVM_SET_LAPIC, so remove it completely.
Reported-by: Michal Luczaj <mhal(a)rbox.co>
Closes: https://lore.kernel.org/all/814baa0c-1eaa-4503-129f-059917365e80@rbox.co
Reported-by: Haoyu Wu <haoyuwu254(a)gmail.com>
Closes: https://lore.kernel.org/all/20240126161633.62529-1-haoyuwu254@gmail.com
Reported-by: syzbot+545f1326f405db4e1c3e(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/000000000000c2a6b9061cbca3c3@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-ID: <20240802202941.344889-2-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Gavin Guo <gavinguo(a)igalia.com>
---
arch/x86/kvm/lapic.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 34766abbabd8..cd9c1e1f6fd3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -338,10 +338,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
* reversing the LDR calculation to get cluster of APICs, i.e. no
* additional work is required.
*/
- if (apic_x2apic_mode(apic)) {
- WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
+ if (apic_x2apic_mode(apic))
return;
- }
if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
&cluster, &mask))) {
@@ -2964,18 +2962,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s, bool set)
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
+ u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
u32 *id = (u32 *)(s->regs + APIC_ID);
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
u64 icr;
if (vcpu->kvm->arch.x2apic_format) {
- if (*id != vcpu->vcpu_id)
+ if (*id != x2apic_id)
return -EINVAL;
} else {
+ /*
+ * Ignore the userspace value when setting APIC state.
+ * KVM's model is that the x2APIC ID is readonly, e.g.
+ * KVM only supports delivering interrupts to KVM's
+ * version of the x2APIC ID. However, for backwards
+ * compatibility, don't reject attempts to set a
+ * mismatched ID for userspace that hasn't opted into
+ * x2apic_format.
+ */
if (set)
- *id >>= 24;
+ *id = x2apic_id;
else
- *id <<= 24;
+ *id = x2apic_id << 24;
}
/*
@@ -2984,7 +2992,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
* split to ICR+ICR2 in userspace for backwards compatibility.
*/
if (set) {
- *ldr = kvm_apic_calc_x2apic_ldr(*id);
+ *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
(u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
--
2.43.0