From: Takashi Iwai <tiwai(a)suse.de>
Upstream commit a3dd4d63eeb452cfb064a13862fb376ab108f6a6
The current USB-audio driver code doesn't check bLength of each
descriptor at traversing for clock descriptors. That is, when a
device provides a bogus descriptor with a shorter bLength, the driver
might hit out-of-bounds reads.
For addressing it, this patch adds sanity checks to the validator
functions for the clock descriptor traversal. When the descriptor
length is shorter than expected, it's skipped in the loop.
For the clock source and clock multiplier descriptors, we can just
check bLength against the sizeof() of each descriptor type.
OTOH, the clock selector descriptor of UAC2 and UAC3 has an array
of bNrInPins elements and two more fields at its tail, hence those
have to be checked in addition to the sizeof() check.
This patch ports the upstream commit a3dd4d63eeb4 to trees that do not
include the refactoring commit 9ec730052fa2 ("ALSA: usb-audio:
Refactoring UAC2/3 clock setup code"). That commit provides union
objects for pointing both UAC2 and UAC3 objects and unifies the clock
source, selector and multiplier helper functions. This means we need to
perform the check in each version specific helper function, but on the
other hand do not need to do version specific union dereferencing in the
macros and helper functions.
Reported-by: Benoît Sevens <bsevens(a)google.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/20241121140613.3651-1-bsevens@google.com
Link: https://patch.msgid.link/20241125144629.20757-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
(cherry picked from commit a3dd4d63eeb452cfb064a13862fb376ab108f6a6)
Signed-off-by: Benoît Sevens <bsevens(a)google.com>
---
sound/usb/clock.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 3d1c0ec11753..58902275c815 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -21,6 +21,10 @@
#include "clock.h"
#include "quirks.h"
+/* check whether the descriptor bLength has the minimal length */
+#define DESC_LENGTH_CHECK(p) \
+ (p->bLength >= sizeof(*p))
+
static void *find_uac_clock_desc(struct usb_host_interface *iface, int id,
bool (*validator)(void *, int), u8 type)
{
@@ -38,36 +42,60 @@ static void *find_uac_clock_desc(struct usb_host_interface *iface, int id,
static bool validate_clock_source_v2(void *p, int id)
{
struct uac_clock_source_descriptor *cs = p;
+ if (!DESC_LENGTH_CHECK(cs))
+ return false;
return cs->bClockID == id;
}
static bool validate_clock_source_v3(void *p, int id)
{
struct uac3_clock_source_descriptor *cs = p;
+ if (!DESC_LENGTH_CHECK(cs))
+ return false;
return cs->bClockID == id;
}
static bool validate_clock_selector_v2(void *p, int id)
{
struct uac_clock_selector_descriptor *cs = p;
- return cs->bClockID == id;
+ if (!DESC_LENGTH_CHECK(cs))
+ return false;
+ if (cs->bClockID != id)
+ return false;
+ /* additional length check for baCSourceID array (in bNrInPins size)
+ * and two more fields (which sizes depend on the protocol)
+ */
+ return cs->bLength >= sizeof(*cs) + cs->bNrInPins +
+ 1 /* bmControls */ + 1 /* iClockSelector */;
}
static bool validate_clock_selector_v3(void *p, int id)
{
struct uac3_clock_selector_descriptor *cs = p;
- return cs->bClockID == id;
+ if (!DESC_LENGTH_CHECK(cs))
+ return false;
+ if (cs->bClockID != id)
+ return false;
+ /* additional length check for baCSourceID array (in bNrInPins size)
+ * and two more fields (which sizes depend on the protocol)
+ */
+ return cs->bLength >= sizeof(*cs) + cs->bNrInPins +
+ 4 /* bmControls */ + 2 /* wCSelectorDescrStr */;
}
static bool validate_clock_multiplier_v2(void *p, int id)
{
struct uac_clock_multiplier_descriptor *cs = p;
+ if (!DESC_LENGTH_CHECK(cs))
+ return false;
return cs->bClockID == id;
}
static bool validate_clock_multiplier_v3(void *p, int id)
{
struct uac3_clock_multiplier_descriptor *cs = p;
+ if (!DESC_LENGTH_CHECK(cs))
+ return false;
return cs->bClockID == id;
}
--
2.47.0.338.g60cca15819-goog
The quilt patch titled
Subject: sched/numa: fix memory leak due to the overwritten vma->numab_state
has been removed from the -mm tree. Its filename was
sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Adrian Huang <ahuang12(a)lenovo.com>
Subject: sched/numa: fix memory leak due to the overwritten vma->numab_state
Date: Wed, 13 Nov 2024 18:21:46 +0800
[Problem Description]
When running the hackbench program of LTP, the following memory leak is
reported by kmemleak.
# /opt/ltp/testcases/bin/hackbench 20 thread 1000
Running with 20*40 (== 800) tasks.
# dmesg | grep kmemleak
...
kmemleak: 480 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
kmemleak: 665 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
# cat /sys/kernel/debug/kmemleak
unreferenced object 0xffff888cd8ca2c40 (size 64):
comm "hackbench", pid 17142, jiffies 4299780315
hex dump (first 32 bytes):
ac 74 49 00 01 00 00 00 4c 84 49 00 01 00 00 00 .tI.....L.I.....
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace (crc bff18fd4):
[<ffffffff81419a89>] __kmalloc_cache_noprof+0x2f9/0x3f0
[<ffffffff8113f715>] task_numa_work+0x725/0xa00
[<ffffffff8110f878>] task_work_run+0x58/0x90
[<ffffffff81ddd9f8>] syscall_exit_to_user_mode+0x1c8/0x1e0
[<ffffffff81dd78d5>] do_syscall_64+0x85/0x150
[<ffffffff81e0012b>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
...
This issue can be consistently reproduced on three different servers:
* a 448-core server
* a 256-core server
* a 192-core server
[Root Cause]
Since multiple threads are created by the hackbench program (along with
the command argument 'thread'), a shared vma might be accessed by two or
more cores simultaneously. When two or more cores observe that
vma->numab_state is NULL at the same time, vma->numab_state will be
overwritten.
Although current code ensures that only one thread scans the VMAs in a
single 'numa_scan_period', there might be a chance for another thread
to enter in the next 'numa_scan_period' while we have not gotten till
numab_state allocation [1].
Note that the command `/opt/ltp/testcases/bin/hackbench 50 process 1000`
cannot the reproduce the issue. It is verified with 200+ test runs.
[Solution]
Use the cmpxchg atomic operation to ensure that only one thread executes
the vma->numab_state assignment.
[1] https://lore.kernel.org/lkml/1794be3c-358c-4cdc-a43d-a1f841d91ef7@amd.com/
Link: https://lkml.kernel.org/r/20241113102146.2384-1-ahuang12@lenovo.com
Fixes: ef6a22b70f6d ("sched/numa: apply the scan delay to every new vma")
Signed-off-by: Adrian Huang <ahuang12(a)lenovo.com>
Reported-by: Jiwei Sun <sunjw10(a)lenovo.com>
Reviewed-by: Raghavendra K T <raghavendra.kt(a)amd.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Ben Segall <bsegall(a)google.com>
Cc: Dietmar Eggemann <dietmar.eggemann(a)arm.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Juri Lelli <juri.lelli(a)redhat.com>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: Valentin Schneider <vschneid(a)redhat.com>
Cc: Vincent Guittot <vincent.guittot(a)linaro.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/sched/fair.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
--- a/kernel/sched/fair.c~sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state
+++ a/kernel/sched/fair.c
@@ -3399,10 +3399,16 @@ retry_pids:
/* Initialise new per-VMA NUMAB state. */
if (!vma->numab_state) {
- vma->numab_state = kzalloc(sizeof(struct vma_numab_state),
- GFP_KERNEL);
- if (!vma->numab_state)
+ struct vma_numab_state *ptr;
+
+ ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ continue;
+
+ if (cmpxchg(&vma->numab_state, NULL, ptr)) {
+ kfree(ptr);
continue;
+ }
vma->numab_state->start_scan_seq = mm->numa_scan_seq;
_
Patches currently in -mm which might be from ahuang12(a)lenovo.com are
The quilt patch titled
Subject: mm/damon: fix order of arguments in damos_before_apply tracepoint
has been removed from the -mm tree. Its filename was
mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Akinobu Mita <akinobu.mita(a)gmail.com>
Subject: mm/damon: fix order of arguments in damos_before_apply tracepoint
Date: Fri, 15 Nov 2024 10:20:23 -0800
Since the order of the scheme_idx and target_idx arguments in TP_ARGS is
reversed, they are stored in the trace record in reverse.
Link: https://lkml.kernel.org/r/20241115182023.43118-1-sj@kernel.org
Link: https://patch.msgid.link/20241112154828.40307-1-akinobu.mita@gmail.com
Fixes: c603c630b509 ("mm/damon/core: add a tracepoint for damos apply target regions")
Signed-off-by: Akinobu Mita <akinobu.mita(a)gmail.com>
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/trace/events/damon.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/include/trace/events/damon.h~mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint
+++ a/include/trace/events/damon.h
@@ -15,7 +15,7 @@ TRACE_EVENT_CONDITION(damos_before_apply
unsigned int target_idx, struct damon_region *r,
unsigned int nr_regions, bool do_trace),
- TP_ARGS(context_idx, target_idx, scheme_idx, r, nr_regions, do_trace),
+ TP_ARGS(context_idx, scheme_idx, target_idx, r, nr_regions, do_trace),
TP_CONDITION(do_trace),
_
Patches currently in -mm which might be from akinobu.mita(a)gmail.com are
The quilt patch titled
Subject: lib: stackinit: hide never-taken branch from compiler
has been removed from the -mm tree. Its filename was
lib-stackinit-hide-never-taken-branch-from-compiler.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kees Cook <kees(a)kernel.org>
Subject: lib: stackinit: hide never-taken branch from compiler
Date: Sun, 17 Nov 2024 03:38:13 -0800
The never-taken branch leads to an invalid bounds condition, which is by
design. To avoid the unwanted warning from the compiler, hide the
variable from the optimizer.
../lib/stackinit_kunit.c: In function 'do_nothing_u16_zero':
../lib/stackinit_kunit.c:51:49: error: array subscript 1 is outside array bounds of 'u16[0]' {aka 'short unsigned int[]'} [-Werror=array-bounds=]
51 | #define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr)
| ^~~~~~
../lib/stackinit_kunit.c:219:24: note: in expansion of macro 'DO_NOTHING_RETURN_SCALAR'
219 | return DO_NOTHING_RETURN_ ## which(ptr + 1); \
| ^~~~~~~~~~~~~~~~~~
Link: https://lkml.kernel.org/r/20241117113813.work.735-kees@kernel.org
Signed-off-by: Kees Cook <kees(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/stackinit_kunit.c | 1 +
1 file changed, 1 insertion(+)
--- a/lib/stackinit_kunit.c~lib-stackinit-hide-never-taken-branch-from-compiler
+++ a/lib/stackinit_kunit.c
@@ -212,6 +212,7 @@ static noinline void test_ ## name (stru
static noinline DO_NOTHING_TYPE_ ## which(var_type) \
do_nothing_ ## name(var_type *ptr) \
{ \
+ OPTIMIZER_HIDE_VAR(ptr); \
/* Will always be true, but compiler doesn't know. */ \
if ((unsigned long)ptr > 0x2) \
return DO_NOTHING_RETURN_ ## which(ptr); \
_
Patches currently in -mm which might be from kees(a)kernel.org are
The quilt patch titled
Subject: mm: respect mmap hint address when aligning for THP
has been removed from the -mm tree. Its filename was
mm-respect-mmap-hint-address-when-aligning-for-thp.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kalesh Singh <kaleshsingh(a)google.com>
Subject: mm: respect mmap hint address when aligning for THP
Date: Mon, 18 Nov 2024 13:46:48 -0800
Commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP
boundaries") updated __get_unmapped_area() to align the start address for
the VMA to a PMD boundary if CONFIG_TRANSPARENT_HUGEPAGE=y.
It does this by effectively looking up a region that is of size,
request_size + PMD_SIZE, and aligning up the start to a PMD boundary.
Commit 4ef9ad19e176 ("mm: huge_memory: don't force huge page alignment on
32 bit") opted out of this for 32bit due to regressions in mmap base
randomization.
Commit d4148aeab412 ("mm, mmap: limit THP alignment of anonymous mappings
to PMD-aligned sizes") restricted this to only mmap sizes that are
multiples of the PMD_SIZE due to reported regressions in some performance
benchmarks -- which seemed mostly due to the reduced spatial locality of
related mappings due to the forced PMD-alignment.
Another unintended side effect has emerged: When a user specifies an mmap
hint address, the THP alignment logic modifies the behavior, potentially
ignoring the hint even if a sufficiently large gap exists at the requested
hint location.
Example Scenario:
Consider the following simplified virtual address (VA) space:
...
0x200000-0x400000 --- VMA A
0x400000-0x600000 --- Hole
0x600000-0x800000 --- VMA B
...
A call to mmap() with hint=0x400000 and len=0x200000 behaves differently:
- Before THP alignment: The requested region (size 0x200000) fits into
the gap at 0x400000, so the hint is respected.
- After alignment: The logic searches for a region of size
0x400000 (len + PMD_SIZE) starting at 0x400000.
This search fails due to the mapping at 0x600000 (VMA B), and the hint
is ignored, falling back to arch_get_unmapped_area[_topdown]().
In general the hint is effectively ignored, if there is any existing
mapping in the below range:
[mmap_hint + mmap_size, mmap_hint + mmap_size + PMD_SIZE)
This changes the semantics of mmap hint; from ""Respect the hint if a
sufficiently large gap exists at the requested location" to "Respect the
hint only if an additional PMD-sized gap exists beyond the requested
size".
This has performance implications for allocators that allocate their heap
using mmap but try to keep it "as contiguous as possible" by using the end
of the exisiting heap as the address hint. With the new behavior it's
more likely to get a much less contiguous heap, adding extra fragmentation
and performance overhead.
To restore the expected behavior; don't use
thp_get_unmapped_area_vmflags() when the user provided a hint address, for
anonymous mappings.
Note: As Yang Shi pointed out: the issue still remains for filesystems
which are using thp_get_unmapped_area() for their get_unmapped_area() op.
It is unclear what worklaods will regress for if we ignore THP alignment
when the hint address is provided for such file backed mappings -- so this
fix will be handled separately.
Link: https://lkml.kernel.org/r/20241118214650.3667577-1-kaleshsingh@google.com
Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries")
Signed-off-by: Kalesh Singh <kaleshsingh(a)google.com>
Reviewed-by: Rik van Riel <riel(a)surriel.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Yang Shi <yang(a)os.amperecomputing.com>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Hans Boehm <hboehm(a)google.com>
Cc: Lokesh Gidra <lokeshgidra(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mmap.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/mmap.c~mm-respect-mmap-hint-address-when-aligning-for-thp
+++ a/mm/mmap.c
@@ -889,6 +889,7 @@ __get_unmapped_area(struct file *file, u
if (get_area) {
addr = get_area(file, addr, len, pgoff, flags);
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
+ && !addr /* no hint */
&& IS_ALIGNED(len, PMD_SIZE)) {
/* Ensures that larger anonymous mappings are THP aligned. */
addr = thp_get_unmapped_area_vmflags(file, addr, len,
_
Patches currently in -mm which might be from kaleshsingh(a)google.com are
The quilt patch titled
Subject: mm: memcg: declare do_memsw_account inline
has been removed from the -mm tree. Its filename was
mm-memcg-declare-do_memsw_account-inline.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: John Sperbeck <jsperbeck(a)google.com>
Subject: mm: memcg: declare do_memsw_account inline
Date: Thu, 28 Nov 2024 12:39:59 -0800
In commit 66d60c428b23 ("mm: memcg: move legacy memcg event code into
memcontrol-v1.c"), the static do_memsw_account() function was moved from a
.c file to a .h file. Unfortunately, the traditional inline keyword
wasn't added. If a file (e.g., a unit test) includes the .h file, but
doesn't refer to do_memsw_account(), it will get a warning like:
mm/memcontrol-v1.h:41:13: warning: unused function 'do_memsw_account' [-Wunused-function]
41 | static bool do_memsw_account(void)
| ^~~~~~~~~~~~~~~~
Link: https://lkml.kernel.org/r/20241128203959.726527-1-jsperbeck@google.com
Fixes: 66d60c428b23 ("mm: memcg: move legacy memcg event code into memcontrol-v1.c")
Signed-off-by: John Sperbeck <jsperbeck(a)google.com>
Acked-by: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol-v1.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/memcontrol-v1.h~mm-memcg-declare-do_memsw_account-inline
+++ a/mm/memcontrol-v1.h
@@ -38,7 +38,7 @@ void mem_cgroup_id_put_many(struct mem_c
iter = mem_cgroup_iter(NULL, iter, NULL))
/* Whether legacy memory+swap accounting is active */
-static bool do_memsw_account(void)
+static inline bool do_memsw_account(void)
{
return !cgroup_subsys_on_dfl(memory_cgrp_subsys);
}
_
Patches currently in -mm which might be from jsperbeck(a)google.com are
The quilt patch titled
Subject: ocfs2: update seq_file index in ocfs2_dlm_seq_next
has been removed from the -mm tree. Its filename was
ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next-v2.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Wengang Wang <wen.gang.wang(a)oracle.com>
Subject: ocfs2: update seq_file index in ocfs2_dlm_seq_next
Date: Tue, 19 Nov 2024 09:45:00 -0800
The following INFO level message was seen:
seq_file: buggy .next function ocfs2_dlm_seq_next [ocfs2] did not
update position index
Fix:
Update *pos (so m->index) to make seq_read_iter happy though the index its
self makes no sense to ocfs2_dlm_seq_next.
Link: https://lkml.kernel.org/r/20241119174500.9198-1-wen.gang.wang@oracle.com
Signed-off-by: Wengang Wang <wen.gang.wang(a)oracle.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/dlmglue.c | 1 +
1 file changed, 1 insertion(+)
--- a/fs/ocfs2/dlmglue.c~ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next-v2
+++ a/fs/ocfs2/dlmglue.c
@@ -3110,6 +3110,7 @@ static void *ocfs2_dlm_seq_next(struct s
struct ocfs2_lock_res *iter = v;
struct ocfs2_lock_res *dummy = &priv->p_iter_res;
+ (*pos)++;
spin_lock(&ocfs2_dlm_tracking_lock);
iter = ocfs2_dlm_next_res(iter, priv);
list_del_init(&dummy->l_debug_list);
_
Patches currently in -mm which might be from wen.gang.wang(a)oracle.com are
The quilt patch titled
Subject: stackdepot: fix stack_depot_save_flags() in NMI context
has been removed from the -mm tree. Its filename was
stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Marco Elver <elver(a)google.com>
Subject: stackdepot: fix stack_depot_save_flags() in NMI context
Date: Fri, 22 Nov 2024 16:39:47 +0100
Per documentation, stack_depot_save_flags() was meant to be usable from
NMI context if STACK_DEPOT_FLAG_CAN_ALLOC is unset. However, it still
would try to take the pool_lock in an attempt to save a stack trace in the
current pool (if space is available).
This could result in deadlock if an NMI is handled while pool_lock is
already held. To avoid deadlock, only try to take the lock in NMI context
and give up if unsuccessful.
The documentation is fixed to clearly convey this.
Link: https://lkml.kernel.org/r/Z0CcyfbPqmxJ9uJH@elver.google.com
Link: https://lkml.kernel.org/r/20241122154051.3914732-1-elver@google.com
Fixes: 4434a56ec209 ("stackdepot: make fast paths lock-less again")
Signed-off-by: Marco Elver <elver(a)google.com>
Reported-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/stackdepot.h | 6 +++---
lib/stackdepot.c | 10 +++++++++-
2 files changed, 12 insertions(+), 4 deletions(-)
--- a/include/linux/stackdepot.h~stackdepot-fix-stack_depot_save_flags-in-nmi-context
+++ a/include/linux/stackdepot.h
@@ -147,7 +147,7 @@ static inline int stack_depot_early_init
* If the provided stack trace comes from the interrupt context, only the part
* up to the interrupt entry is saved.
*
- * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if
+ * Context: Any context, but unsetting STACK_DEPOT_FLAG_CAN_ALLOC is required if
* alloc_pages() cannot be used from the current context. Currently
* this is the case for contexts where neither %GFP_ATOMIC nor
* %GFP_NOWAIT can be used (NMI, raw_spin_lock).
@@ -156,7 +156,7 @@ static inline int stack_depot_early_init
*/
depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
unsigned int nr_entries,
- gfp_t gfp_flags,
+ gfp_t alloc_flags,
depot_flags_t depot_flags);
/**
@@ -175,7 +175,7 @@ depot_stack_handle_t stack_depot_save_fl
* Return: Handle of the stack trace stored in depot, 0 on failure
*/
depot_stack_handle_t stack_depot_save(unsigned long *entries,
- unsigned int nr_entries, gfp_t gfp_flags);
+ unsigned int nr_entries, gfp_t alloc_flags);
/**
* __stack_depot_get_stack_record - Get a pointer to a stack_record struct
--- a/lib/stackdepot.c~stackdepot-fix-stack_depot_save_flags-in-nmi-context
+++ a/lib/stackdepot.c
@@ -630,7 +630,15 @@ depot_stack_handle_t stack_depot_save_fl
prealloc = page_address(page);
}
- raw_spin_lock_irqsave(&pool_lock, flags);
+ if (in_nmi()) {
+ /* We can never allocate in NMI context. */
+ WARN_ON_ONCE(can_alloc);
+ /* Best effort; bail if we fail to take the lock. */
+ if (!raw_spin_trylock_irqsave(&pool_lock, flags))
+ goto exit;
+ } else {
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ }
printk_deferred_enter();
/* Try to find again, to avoid concurrently inserting duplicates. */
_
Patches currently in -mm which might be from elver(a)google.com are