/me wonders what's up with partial Cc's today..
On Wed, Jun 13, 2018 at 09:47:44AM +0200, Thomas Hellstrom wrote:
The current Wound-Wait mutex algorithm is actually not Wound-Wait but Wait-Die. Implement also Wound-Wait as a per-ww-class choice. Wound-Wait is, contrary to Wait-Die a preemptive algorithm and is known to generate fewer backoffs. Testing reveals that this is true if the number of simultaneous contending transactions is small. As the number of simultaneous contending threads increases, Wait-Wound becomes inferior to Wait-Die in terms of elapsed time. Possibly due to the larger number of held locks of sleeping transactions.
Update documentation and callers.
Timings using git://people.freedesktop.org/~thomash/ww_mutex_test tag patch-18-06-04
Each thread runs 100000 batches of lock / unlock 800 ww mutexes randomly chosen out of 100000. Four core Intel x86_64:
Algorithm #threads Rollbacks time Wound-Wait 4 ~100 ~17s. Wait-Die 4 ~150000 ~19s. Wound-Wait 16 ~360000 ~109s. Wait-Die 16 ~450000 ~82s.
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 39fda195bf78..6278077f288b 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -8,6 +8,8 @@
- Wound/wait implementation:
- Copyright (C) 2013 Canonical Ltd.
- Choice of algorithm:
*/
- Copyright (C) 2018 WMWare Inc.
- This file contains the main data structure and API definitions.
@@ -23,15 +25,17 @@ struct ww_class { struct lock_class_key mutex_key; const char *acquire_name; const char *mutex_name;
- bool is_wait_die;
};
No _Bool in composites please.
struct ww_acquire_ctx { struct task_struct *task; unsigned long stamp; unsigned acquired;
- bool wounded;
Again.
- struct ww_class *ww_class;
#ifdef CONFIG_DEBUG_MUTEXES unsigned done_acquire;
- struct ww_class *ww_class; struct ww_mutex *contending_lock;
#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 2048359f33d2..b449a012c6f9 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -290,12 +290,47 @@ __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) (a->stamp != b->stamp || a > b); } +/*
- Wound the lock holder transaction if it's younger than the contending
- transaction, and there is a possibility of a deadlock.
- Also if the lock holder transaction isn't the current transaction,
Comma followed by a capital?
- Make sure it's woken up in case it's sleeping on another ww mutex.
- */
+static bool __ww_mutex_wound(struct mutex *lock,
struct ww_acquire_ctx *ww_ctx,
struct ww_acquire_ctx *hold_ctx)
+{
- struct task_struct *owner =
__owner_task(atomic_long_read(&lock->owner));
Did you just spell __mutex_owner() wrong?
- lockdep_assert_held(&lock->wait_lock);
- if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) &&
ww_ctx->acquired > 0) {
WRITE_ONCE(hold_ctx->wounded, true);
if (owner != current) {
/*
* wake_up_process() inserts a write memory barrier to
It does no such thing. But yes, it does ensure the wakee sees all prior stores IFF the wakeup happened.
* make sure owner sees it is wounded before
* TASK_RUNNING in case it's sleeping on another
* ww_mutex. Note that owner points to a valid
* task_struct as long as we hold the wait_lock.
*/
What exactly are you trying to say here ?
I'm thinking this is the pairing barrier to the smp_mb() below, with your list_empty() thing? Might make sense to write a single coherent comment and refer to the other location.
wake_up_process(owner);
}
return true;
- }
- return false;
+}
/*
- Wake up any waiters that may have to back off when the lock is held by the
- given context.
- Due to the invariants on the wait list, this can only affect the first
- waiter with a context.
- waiter with a context, unless the Wound-Wait algorithm is used where
*/
- also subsequent waiters with a context main wound the lock holder.
- The current task must not be on the wait list.
@@ -303,6 +338,7 @@ static void __sched __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) { struct mutex_waiter *cur;
- bool is_wait_die = ww_ctx->ww_class->is_wait_die;
lockdep_assert_held(&lock->wait_lock); @@ -310,13 +346,14 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) if (!cur->ww_ctx) continue;
if (cur->ww_ctx->acquired > 0 &&
__ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) { debug_mutex_wake_waiter(lock, cur); wake_up_process(cur->task); }if (is_wait_die && cur->ww_ctx->acquired > 0 &&
break;
if (is_wait_die || __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
}break;
} @@ -338,12 +375,17 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) * and keep spinning, or it will acquire wait_lock, add itself * to waiter list and sleep. */
- smp_mb(); /* ^^^ */
- smp_mb(); /* See comments above and below. */
/*
* Check if lock is contended, if not there is nobody to wake up
* Check if lock is contended, if not there is nobody to wake up.
* Checking MUTEX_FLAG_WAITERS is not enough here,
That seems like a superfluous thing to say. It makes sense in the context of this patch because we change the FLAG check into a list check, but the resulting comment/code looks odd.
since we need to
* order against the lock->ctx check in __ww_mutex_wound called from
* __ww_mutex_add_waiter. We can use list_empty without taking the
* wait_lock, given the memory barrier above and the list_empty
* documentation.
I don't trust documentation. Please reason about implementation.
*/
- if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
- if (likely(list_empty(&lock->base.wait_list))) return;
/* @@ -653,6 +695,17 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter, struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); struct mutex_waiter *cur;
- /*
* If we miss a wounded == true here, we will have a pending
Explain how we can miss that.
* TASK_RUNNING and pick it up on the next schedule fall-through.
*/
- if (!ctx->ww_class->is_wait_die) {
if (READ_ONCE(ctx->wounded))
goto deadlock;
else
return 0;
- }
- if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) goto deadlock;
@@ -683,12 +736,15 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, { struct mutex_waiter *cur; struct list_head *pos;
- bool is_wait_die;
if (!ww_ctx) { list_add_tail(&waiter->list, &lock->wait_list); return 0; }
- is_wait_die = ww_ctx->ww_class->is_wait_die;
- /*
- Add the waiter before the first waiter with a higher stamp.
- Waiters without a context are skipped to avoid starving
@@ -701,7 +757,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { /* Back off immediately if necessary. */
if (ww_ctx->acquired > 0) {
if (is_wait_die && ww_ctx->acquired > 0) {
#ifdef CONFIG_DEBUG_MUTEXES struct ww_mutex *ww; @@ -721,13 +777,26 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, * Wake up the waiter so that it gets a chance to back * off. */
if (cur->ww_ctx->acquired > 0) {
} }if (is_wait_die && cur->ww_ctx->acquired > 0) { debug_mutex_wake_waiter(lock, cur); wake_up_process(cur->task);
list_add_tail(&waiter->list, pos);
- if (!is_wait_die) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
/*
* Make sure a racing lock taker sees a non-empty waiting list
* before we read ww->ctx, so that if we miss ww->ctx, the
* racing lock taker will call __ww_mutex_wake_up_for_backoff()
* and wound itself.
*/
smp_mb();
__ww_mutex_wound(lock, ww_ctx, ww->ctx);
- }
- return 0;
} @@ -750,6 +819,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (use_ww_ctx && ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY;
/*
* Reset the wounded flag after a backoff.
* No other process can race and wound us here since they
* can't have a valid owner pointer at this time
*/
if (ww_ctx->acquired == 0)
}ww_ctx->wounded = false;
preempt_disable(); @@ -858,6 +935,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, acquired: __set_current_state(TASK_RUNNING);
- /* We stole the lock. Need to check wounded status. */
- if (use_ww_ctx && ww_ctx && !ww_ctx->ww_class->is_wait_die &&
!__mutex_waiter_is_first(lock, &waiter))
__ww_mutex_wakeup_for_backoff(lock, ww_ctx);
- mutex_remove_waiter(lock, &waiter, current); if (likely(list_empty(&lock->wait_list))) __mutex_clear_flag(lock, MUTEX_FLAGS);
I can't say I'm a fan. I'm already cursing the ww_mutex stuff every time I have to look at it, and you just made it worse spagethi.