On Sun, 2024-11-24 at 07:46 -0500, Sasha Levin wrote:
From: Thomas Hellström thomas.hellstrom@linux.intel.com
[ Upstream commit 823a566221a5639f6c69424897218e5d6431a970 ]
When using mutex_acquire_nest() with a nest_lock, lockdep refcounts the number of acquired lockdep_maps of mutexes of the same class, and also keeps a pointer to the first acquired lockdep_map of a class. That pointer is then used for various comparison-, printing- and checking purposes, but there is no mechanism to actively ensure that lockdep_map stays in memory. Instead, a warning is printed if the lockdep_map is freed and there are still held locks of the same lock class, even if the lockdep_map itself has been released.
In the context of WW/WD transactions that means that if a user unlocks and frees a ww_mutex from within an ongoing ww transaction, and that mutex happens to be the first ww_mutex grabbed in the transaction, such a warning is printed and there might be a risk of a UAF.
Note that this is only problem when lockdep is enabled and affects only dereferences of struct lockdep_map.
Adjust to this by adding a fake lockdep_map to the acquired context and make sure it is the first acquired lockdep map of the associated ww_mutex class. Then hold it for the duration of the WW/WD transaction.
This has the side effect that trying to lock a ww mutex *without* a ww_acquire_context but where a such context has been acquire, we'd see a lockdep splat. The test-ww_mutex.c selftest attempts to do that, so modify that particular test to not acquire a ww_acquire_context if it is not going to be used.
Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20241009092031.6356-1-thomas.hellstrom@linux.intel... Signed-off-by: Sasha Levin sashal@kernel.org
The commit introduces regressions and should not be backported, please see the corresponding patch for 6.12 for a discussion.
Thanks, Thomas
include/linux/ww_mutex.h | 14 ++++++++++++++ kernel/locking/test-ww_mutex.c | 8 +++++--- 2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index bb763085479af..a401a2f31a775 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -65,6 +65,16 @@ struct ww_acquire_ctx { #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map;
- /**
* @first_lock_dep_map: fake lockdep_map for first locked
ww_mutex.
*
* lockdep requires the lockdep_map for the first locked
ww_mutex
* in a ww transaction to remain in memory until all
ww_mutexes of
* the transaction have been unlocked. Ensure this by
keeping a
* fake locked ww_mutex lockdep map between
ww_acquire_init() and
* ww_acquire_fini().
*/
- struct lockdep_map first_lock_dep_map;
#endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; @@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0);
- lockdep_init_map(&ctx->first_lock_dep_map, ww_class-
mutex_name,
&ww_class->mutex_key, 0);
mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_);
- mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx-
dep_map, _RET_IP_);
#endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; @@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC
- mutex_release(&ctx->first_lock_dep_map, _THIS_IP_);
mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test- ww_mutex.c index 7c5a8f05497f2..02b84288865ca 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -62,7 +62,8 @@ static int __test_mutex(unsigned int flags) int ret; ww_mutex_init(&mtx.mutex, &ww_class);
- ww_acquire_init(&ctx, &ww_class);
- if (flags & TEST_MTX_CTX)
ww_acquire_init(&ctx, &ww_class);
INIT_WORK_ONSTACK(&mtx.work, test_mutex_work); init_completion(&mtx.ready); @@ -90,7 +91,8 @@ static int __test_mutex(unsigned int flags) ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); } ww_mutex_unlock(&mtx.mutex);
- ww_acquire_fini(&ctx);
- if (flags & TEST_MTX_CTX)
ww_acquire_fini(&ctx);
if (ret) { pr_err("%s(flags=%x): mutual exclusion failure\n", @@ -663,7 +665,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret;
- ret = stress(2047, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
- ret = stress(2046, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
if (ret) return ret;