On Mon, 13 Jan 2025 06:03:53 -0800, Sasha Levin wrote:
>
Hello Sasha,
> This is a note to let you know that I've just added the patch titled
>
> drm/xe/oa: Separate batch submission from waiting for completion
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> drm-xe-oa-separate-batch-submission-from-waiting-for.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
I am writing about 3 emails I received (including this one) about 3 commits
being added to stable. These are the 3 commits I am referring to (all
commit SHA's refer to Linus' tree and first commit is at the bottom,
everywhere in this email):
2fb4350a283af drm/xe/oa: Add input fence dependencies
c8507a25cebd1 drm/xe/oa/uapi: Define and parse OA sync properties
dddcb19ad4d4b drm/xe/oa: Separate batch submission from waiting for completion
So none of these commits had any "Fixes:" tag or "Cc: stable" so not sure
why they are being added to stable. Also, they are part of a 7 commit
series so not sure why only 3 of the 7 commits are being added to stable?
In addition, for this commit which is also added to stable:
f0ed39830e606 xe/oa: Fix query mode of operation for OAR/OAC
We modified this commit for stable because it will otherwise with the
previous 3 commits mentioned above, which we were assuming would not be in
stable.
Now, if we want all these commits in stable (I actually prefer it), we
should just pick them straight from Linus' tree. This would be all these
commits:
f0ed39830e606 xe/oa: Fix query mode of operation for OAR/OAC
c0403e4ceecae drm/xe/oa: Fix "Missing outer runtime PM protection" warning
85d3f9e84e062 drm/xe/oa: Allow only certain property changes from config
9920c8b88c5cf drm/xe/oa: Add syncs support to OA config ioctl
cc4e6994d5a23 drm/xe/oa: Move functions up so they can be reused for config ioctl
343dd246fd9b5 drm/xe/oa: Signal output fences
2fb4350a283af drm/xe/oa: Add input fence dependencies
c8507a25cebd1 drm/xe/oa/uapi: Define and parse OA sync properties
dddcb19ad4d4b drm/xe/oa: Separate batch submission from waiting for completion
So: we should either drop the 3 patches at the top, or just pick all 9
patches above. Doing the latter will probably be the simplest and I don't
expect any conflicts, or if there are, I can help to resolve those.
The above list can be generated by running the following in Linus' tree:
git log --oneline -- drivers/gpu/drm/xe/xe_oa.c
Thanks.
--
Ashutosh
>
>
>
> commit 9aeced687e728b9de067a502a0780f8029e61763
> Author: Ashutosh Dixit <ashutosh.dixit(a)intel.com>
> Date: Tue Oct 22 13:03:46 2024 -0700
>
> drm/xe/oa: Separate batch submission from waiting for completion
>
> [ Upstream commit dddcb19ad4d4bbe943a72a1fb3266c6e8aa8d541 ]
>
> When we introduce xe_syncs, we don't wait for internal OA programming
> batches to complete. That is, xe_syncs are signaled asynchronously. In
> anticipation for this, separate out batch submission from waiting for
> completion of those batches.
>
> v2: Change return type of xe_oa_submit_bb to "struct dma_fence *" (Matt B)
> v3: Retain init "int err = 0;" in xe_oa_submit_bb (Jose)
>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt(a)intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit(a)intel.com>
> Link: https://patchwork.freedesktop.org/patch/msgid/20241022200352.1192560-2-ashu…
> Stable-dep-of: f0ed39830e60 ("xe/oa: Fix query mode of operation for OAR/OAC")
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index 78823f53d290..4962c9eb9a81 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -567,11 +567,10 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
> return ret;
> }
>
> -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
> +static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
> {
> struct xe_sched_job *job;
> struct dma_fence *fence;
> - long timeout;
> int err = 0;
>
> /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
> @@ -585,14 +584,9 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
> fence = dma_fence_get(&job->drm.s_fence->finished);
> xe_sched_job_push(job);
>
> - timeout = dma_fence_wait_timeout(fence, false, HZ);
> - dma_fence_put(fence);
> - if (timeout < 0)
> - err = timeout;
> - else if (!timeout)
> - err = -ETIME;
> + return fence;
> exit:
> - return err;
> + return ERR_PTR(err);
> }
>
> static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
> @@ -656,6 +650,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
> static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
> const struct flex *flex, u32 count)
> {
> + struct dma_fence *fence;
> struct xe_bb *bb;
> int err;
>
> @@ -667,7 +662,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
>
> xe_oa_store_flex(stream, lrc, bb, flex, count);
>
> - err = xe_oa_submit_bb(stream, bb);
> + fence = xe_oa_submit_bb(stream, bb);
> + if (IS_ERR(fence)) {
> + err = PTR_ERR(fence);
> + goto free_bb;
> + }
> + xe_bb_free(bb, fence);
> + dma_fence_put(fence);
> +
> + return 0;
> +free_bb:
> xe_bb_free(bb, NULL);
> exit:
> return err;
> @@ -675,6 +679,7 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
>
> static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
> {
> + struct dma_fence *fence;
> struct xe_bb *bb;
> int err;
>
> @@ -686,7 +691,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
>
> write_cs_mi_lri(bb, reg_lri, 1);
>
> - err = xe_oa_submit_bb(stream, bb);
> + fence = xe_oa_submit_bb(stream, bb);
> + if (IS_ERR(fence)) {
> + err = PTR_ERR(fence);
> + goto free_bb;
> + }
> + xe_bb_free(bb, fence);
> + dma_fence_put(fence);
> +
> + return 0;
> +free_bb:
> xe_bb_free(bb, NULL);
> exit:
> return err;
> @@ -914,15 +928,32 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
> {
> #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
> struct xe_oa_config_bo *oa_bo;
> - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> + int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> + struct dma_fence *fence;
> + long timeout;
>
> + /* Emit OA configuration batch */
> oa_bo = xe_oa_alloc_config_buffer(stream, config);
> if (IS_ERR(oa_bo)) {
> err = PTR_ERR(oa_bo);
> goto exit;
> }
>
> - err = xe_oa_submit_bb(stream, oa_bo->bb);
> + fence = xe_oa_submit_bb(stream, oa_bo->bb);
> + if (IS_ERR(fence)) {
> + err = PTR_ERR(fence);
> + goto exit;
> + }
> +
> + /* Wait till all previous batches have executed */
> + timeout = dma_fence_wait_timeout(fence, false, 5 * HZ);
> + dma_fence_put(fence);
> + if (timeout < 0)
> + err = timeout;
> + else if (!timeout)
> + err = -ETIME;
> + if (err)
> + drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err);
>
> /* Additional empirical delay needed for NOA programming after registers are written */
> usleep_range(us, 2 * us);
On Fri, 10 Jan 2025 06:02:04 +0000 Potin Lai (賴柏廷) wrote:
> > Neat!
> > Potin, please give this a test ASAP.
>
> Thanks for the new patch.
> I am currently tied up with other tasks, but I’ll make sure to test
> it as soon as possible and share the results with you.
Understood, would you be able to test it by January 13th?
Depending on how long we need to wait we may be better off
applying the patch already or waiting with committing..
The quilt patch titled
Subject: selftests/mm: virtual_address_range: avoid reading VVAR mappings
has been removed from the -mm tree. Its filename was
selftests-mm-virtual_address_range-avoid-reading-vvar-mappings.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Subject: selftests/mm: virtual_address_range: avoid reading VVAR mappings
Date: Tue, 07 Jan 2025 16:14:46 +0100
The virtual_address_range selftest reads from the start of each mapping
listed in /proc/self/maps.
However not all mappings are valid to be arbitrarily accessed. For
example the vvar data used for virtual clocks on x86 can only be accessed
if 1) the kernel configuration enables virtual clocks and 2) the
hypervisor provided the data for it, which can only determined by the VDSO
code itself.
Since commit e93d2521b27f ("x86/vdso: Split virtual clock pages into
dedicated mapping") the virtual clock data was split out into its own
mapping, triggering faulting accesses by virtual_address_range.
Skip the various vvar mappings in virtual_address_range to avoid errors.
Link: https://lkml.kernel.org/r/20250107-virtual_address_range-tests-v1-2-3834a2f…
Fixes: e93d2521b27f ("x86/vdso: Split virtual clock pages into dedicated mapping")
Fixes: 010409649885 ("selftests/mm: confirm VA exhaustion without reliance on correctness of mmap()")
Signed-off-by: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202412271148.2656e485-lkp@intel.com
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/virtual_address_range.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/tools/testing/selftests/mm/virtual_address_range.c~selftests-mm-virtual_address_range-avoid-reading-vvar-mappings
+++ a/tools/testing/selftests/mm/virtual_address_range.c
@@ -116,10 +116,11 @@ static int validate_complete_va_space(vo
prev_end_addr = 0;
while (fgets(line, sizeof(line), file)) {
+ int path_offset = 0;
unsigned long hop;
- if (sscanf(line, "%lx-%lx %s[rwxp-]",
- &start_addr, &end_addr, prot) != 3)
+ if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n",
+ &start_addr, &end_addr, prot, &path_offset) != 3)
ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
/* end of userspace mappings; ignore vsyscall mapping */
@@ -135,6 +136,10 @@ static int validate_complete_va_space(vo
if (prot[0] != 'r')
continue;
+ /* Only the VDSO can know if a VVAR mapping is really readable */
+ if (path_offset && !strncmp(line + path_offset, "[vvar", 5))
+ continue;
+
/*
* Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
* If write succeeds, no need to check MAP_CHUNK_SIZE - 1
_
Patches currently in -mm which might be from thomas.weissschuh(a)linutronix.de are
The quilt patch titled
Subject: selftests/mm: virtual_address_range: fix error when CommitLimit < 1GiB
has been removed from the -mm tree. Its filename was
selftests-mm-virtual_address_range-fix-error-when-commitlimit-1gib.patch
This patch was dropped because an updated version will be issued
------------------------------------------------------
From: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Subject: selftests/mm: virtual_address_range: fix error when CommitLimit < 1GiB
Date: Tue, 07 Jan 2025 16:14:45 +0100
If not enough physical memory is available the kernel may fail mmap(); see
__vm_enough_memory() and vm_commit_limit(). In that case the logic in
validate_complete_va_space() does not make sense and will even incorrectly
fail. Instead skip the test if no mmap() succeeded.
Link: https://lkml.kernel.org/r/20250107-virtual_address_range-tests-v1-1-3834a2f…
Fixes: 010409649885 ("selftests/mm: confirm VA exhaustion without reliance on correctness of mmap()")
Signed-off-by: Thomas Wei��schuh <thomas.weissschuh(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Cc: Dev Jain <dev.jain(a)arm.com>
Cc: kernel test robot <oliver.sang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/virtual_address_range.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/tools/testing/selftests/mm/virtual_address_range.c~selftests-mm-virtual_address_range-fix-error-when-commitlimit-1gib
+++ a/tools/testing/selftests/mm/virtual_address_range.c
@@ -178,6 +178,12 @@ int main(int argc, char *argv[])
validate_addr(ptr[i], 0);
}
lchunks = i;
+
+ if (!lchunks) {
+ ksft_test_result_skip("Not enough memory for a single chunk\n");
+ ksft_finished();
+ }
+
hptr = (char **) calloc(NR_CHUNKS_HIGH, sizeof(char *));
if (hptr == NULL) {
ksft_test_result_skip("Memory constraint not fulfilled\n");
_
Patches currently in -mm which might be from thomas.weissschuh(a)linutronix.de are
selftests-mm-virtual_address_range-avoid-reading-vvar-mappings.patch
On Mon, Jan 13, 2025 at 6:04 AM Sasha Levin <sashal(a)kernel.org> wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> mm: zswap: fix race between [de]compression and CPU hotunplug
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> mm-zswap-fix-race-between-de-compression-and-cpu-hot.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
Please drop this patch from v6.12 (and all stable trees) as it has a
bug, as I pointed out in [1] (was that the correct place to surface
this?).
Andrew's latest PR contains a revert of this patch (and alternative fix) [2].
Thanks!
[1]https://lore.kernel.org/stable/CAJD7tkaiUA6J1nb0=1ELpUD0OgjoD+tft-iPqbPdy…
[2]https://lore.kernel.org/lkml/20250113000543.862792e948c2646032a477e0@linu…
>
>
>
> commit c56e79d453ef5b5fc6ded252bc6ba461f12946ba
> Author: Yosry Ahmed <yosryahmed(a)google.com>
> Date: Thu Dec 19 21:24:37 2024 +0000
>
> mm: zswap: fix race between [de]compression and CPU hotunplug
>
> [ Upstream commit eaebeb93922ca6ab0dd92027b73d0112701706ef ]
>
> In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the
> current CPU at the beginning of the operation is retrieved and used
> throughout. However, since neither preemption nor migration are disabled,
> it is possible that the operation continues on a different CPU.
>
> If the original CPU is hotunplugged while the acomp_ctx is still in use,
> we run into a UAF bug as the resources attached to the acomp_ctx are freed
> during hotunplug in zswap_cpu_comp_dead().
>
> The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to use
> crypto_acomp API for hardware acceleration") when the switch to the
> crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was
> retrieved using get_cpu_ptr() which disables preemption and makes sure the
> CPU cannot go away from under us. Preemption cannot be disabled with the
> crypto_acomp API as a sleepable context is needed.
>
> Commit 8ba2f844f050 ("mm/zswap: change per-cpu mutex and buffer to
> per-acomp_ctx") increased the UAF surface area by making the per-CPU
> buffers dynamic, adding yet another resource that can be freed from under
> zswap compression/decompression by CPU hotunplug.
>
> There are a few ways to fix this:
> (a) Add a refcount for acomp_ctx.
> (b) Disable migration while using the per-CPU acomp_ctx.
> (c) Disable CPU hotunplug while using the per-CPU acomp_ctx by holding
> the CPUs read lock.
>
> Implement (c) since it's simpler than (a), and (b) involves using
> migrate_disable() which is apparently undesired (see huge comment in
> include/linux/preempt.h).
>
> Link: https://lkml.kernel.org/r/20241219212437.2714151-1-yosryahmed@google.com
> Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
> Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com>
> Reported-by: Johannes Weiner <hannes(a)cmpxchg.org>
> Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/
> Reported-by: Sam Sun <samsun1006219(a)gmail.com>
> Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tP…
> Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
> Acked-by: Barry Song <baohua(a)kernel.org>
> Reviewed-by: Nhat Pham <nphamcs(a)gmail.com>
> Cc: Vitaly Wool <vitalywool(a)gmail.com>
> Cc: <stable(a)vger.kernel.org>
> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 0030ce8fecfc..c86d4bcbb447 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -875,6 +875,18 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
> return 0;
> }
>
> +/* Prevent CPU hotplug from freeing up the per-CPU acomp_ctx resources */
> +static struct crypto_acomp_ctx *acomp_ctx_get_cpu(struct crypto_acomp_ctx __percpu *acomp_ctx)
> +{
> + cpus_read_lock();
> + return raw_cpu_ptr(acomp_ctx);
> +}
> +
> +static void acomp_ctx_put_cpu(void)
> +{
> + cpus_read_unlock();
> +}
> +
> static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> {
> struct crypto_acomp_ctx *acomp_ctx;
> @@ -887,8 +899,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> gfp_t gfp;
> u8 *dst;
>
> - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> -
> + acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
> mutex_lock(&acomp_ctx->mutex);
>
> dst = acomp_ctx->buffer;
> @@ -944,6 +955,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> zswap_reject_alloc_fail++;
>
> mutex_unlock(&acomp_ctx->mutex);
> + acomp_ctx_put_cpu();
> return comp_ret == 0 && alloc_ret == 0;
> }
>
> @@ -954,7 +966,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> struct crypto_acomp_ctx *acomp_ctx;
> u8 *src;
>
> - acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
> + acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
> mutex_lock(&acomp_ctx->mutex);
>
> src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
> @@ -984,6 +996,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>
> if (src != acomp_ctx->buffer)
> zpool_unmap_handle(zpool, entry->handle);
> + acomp_ctx_put_cpu();
> }
>
> /*********************************
This partially reverts commit fe4f5d4b6616 ("drm/xe: Clean up VM / exec
queue file lock usage."). While it's desired to have the mutex to
protect only the reference to the exec queue, getting and dropping each
mutex and then later getting the GPU timestamp, doesn't produce a
correct result: it introduces multiple opportunities for the task to be
scheduled out and thus wrecking havoc the deltas reported to userspace.
Also, to better correlate the timestamp from the exec queues with the
GPU, disable preemption so they can be updated without allowing the task
to be scheduled out. We leave interrupts enabled as that shouldn't be
enough disturbance for the deltas to matter to userspace.
Test scenario:
* IGT'S `xe_drm_fdinfo --r utilization-single-full-load`
* Platform: LNL, where CI occasionally reports failures
* `stress -c $(nproc)` running in parallel to disturb the
system
This brings a first failure from "after ~150 executions" to "never
occurs after 1000 attempts".
v2: Also keep xe_hw_engine_read_timestamp() call inside the
preemption-disabled section (Umesh)
Cc: stable(a)vger.kernel.org # v6.11+
Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3512
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
---
drivers/gpu/drm/xe/xe_drm_client.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 7d55ad846bac5..2220a09bf9751 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -337,20 +337,18 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
return;
}
+ /* Let both the GPU timestamp and exec queue be updated together */
+ preempt_disable();
+ gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
- xa_for_each(&xef->exec_queue.xa, i, q) {
- xe_exec_queue_get(q);
- mutex_unlock(&xef->exec_queue.lock);
+ xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q);
- mutex_lock(&xef->exec_queue.lock);
- xe_exec_queue_put(q);
- }
mutex_unlock(&xef->exec_queue.lock);
-
- gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+ preempt_enable();
xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref);
xe_pm_runtime_put(xe);
--
2.47.0
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x d1cacd74776895f6435941f86a1130e58f6dd226
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025011253-citable-monstrous-3ae9@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1cacd74776895f6435941f86a1130e58f6dd226 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Mon, 6 Jan 2025 10:01:36 -0800
Subject: [PATCH] netdev: prevent accessing NAPI instances from another
namespace
The NAPI IDs were not fully exposed to user space prior to the netlink
API, so they were never namespaced. The netlink API must ensure that
at the very least NAPI instance belongs to the same netns as the owner
of the genl sock.
napi_by_id() can become static now, but it needs to move because of
dev_get_by_napi_id().
Cc: stable(a)vger.kernel.org
Fixes: 1287c1ae0fc2 ("netdev-genl: Support setting per-NAPI config values")
Fixes: 27f91aaf49b3 ("netdev-genl: Add netlink framework functions for napi")
Reviewed-by: Sridhar Samudrala <sridhar.samudrala(a)intel.com>
Reviewed-by: Joe Damato <jdamato(a)fastly.com>
Link: https://patch.msgid.link/20250106180137.1861472-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/core/dev.c b/net/core/dev.c
index faa23042df38..a9f62f5aeb84 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -753,6 +753,36 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
}
EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+/* must be called under rcu_read_lock(), as we dont take a reference */
+static struct napi_struct *napi_by_id(unsigned int napi_id)
+{
+ unsigned int hash = napi_id % HASH_SIZE(napi_hash);
+ struct napi_struct *napi;
+
+ hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
+ if (napi->napi_id == napi_id)
+ return napi;
+
+ return NULL;
+}
+
+/* must be called under rcu_read_lock(), as we dont take a reference */
+struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ napi = napi_by_id(napi_id);
+ if (!napi)
+ return NULL;
+
+ if (WARN_ON_ONCE(!napi->dev))
+ return NULL;
+ if (!net_eq(net, dev_net(napi->dev)))
+ return NULL;
+
+ return napi;
+}
+
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
@@ -6293,19 +6323,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
}
EXPORT_SYMBOL(napi_complete_done);
-/* must be called under rcu_read_lock(), as we dont take a reference */
-struct napi_struct *napi_by_id(unsigned int napi_id)
-{
- unsigned int hash = napi_id % HASH_SIZE(napi_hash);
- struct napi_struct *napi;
-
- hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
- if (napi->napi_id == napi_id)
- return napi;
-
- return NULL;
-}
-
static void skb_defer_free_flush(struct softnet_data *sd)
{
struct sk_buff *skb, *next;
diff --git a/net/core/dev.h b/net/core/dev.h
index d043dee25a68..deb5eae5749f 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -22,6 +22,8 @@ struct sd_flow_limit {
extern int netdev_flow_limit_table_len;
+struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id);
+
#ifdef CONFIG_PROC_FS
int __init dev_proc_init(void);
#else
@@ -269,7 +271,6 @@ void xdp_do_check_flushed(struct napi_struct *napi);
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
-struct napi_struct *napi_by_id(unsigned int napi_id);
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 125b660004d3..a3bdaf075b6b 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -167,8 +167,6 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
void *hdr;
pid_t pid;
- if (WARN_ON_ONCE(!napi->dev))
- return -EINVAL;
if (!(napi->dev->flags & IFF_UP))
return 0;
@@ -234,7 +232,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
rcu_read_lock();
- napi = napi_by_id(napi_id);
+ napi = netdev_napi_by_id(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
} else {
@@ -355,7 +353,7 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
rcu_read_lock();
- napi = napi_by_id(napi_id);
+ napi = netdev_napi_by_id(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_set_config(napi, info);
} else {