Manfred Spraul manfred@colorfullife.com writes:
Hello together,
On 11/5/21 22:34, Eric W. Biederman wrote:
+static inline void shm_clist_del(struct shmid_kernel *shp) +{
- struct task_struct *creator;
- rcu_read_lock();
- creator = rcu_dereference(shp->shm_creator);
- if (creator) {
task_lock(creator);
list_del(&shp->shm_clist);
task_unlock(creator);
- }
- rcu_read_unlock();
+}
shm_clist_del() only synchronizes against exit_shm() when shm_creator is not NULL.
list_del(&shp->shm_clist);
rcu_assign_pointer(shp->shm_creator, NULL);
We set shm_creator to NULL -> no more synchronization.
Now IPC_RMID can run in parallel - regardless if we test for list_empty() or shm_creator.
/* Guarantee shp lives after task_lock is dropped */
ipc_getref(&shp->shm_perm);
task_lock() doesn't help: As soon as shm_creator is set to NULL, IPC_RMID won't acquire task_lock() anymore.
Thus shp can disappear before we arrive at this ipc_getref.
[Yes, I think I have introduced this bug. ]
Corrected version attached.
I'll reboot and retest the patch, then I would send it to akpm as replacement for current patch in mmotm.
--
Manfred
@@ -382,48 +425,94 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) /* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task) {
- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
- struct shmid_kernel *shp, *n;
- for (;;) {
struct shmid_kernel *shp;
struct ipc_namespace *ns;
- if (list_empty(&task->sysvshm.shm_clist))
return;
task_lock(task);
if (list_empty(&task->sysvshm.shm_clist)) {
task_unlock(task);
break;
}
shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
shm_clist);
- /*
* If kernel.shm_rmid_forced is not set then only keep track of
* which shmids are orphaned, so that a later set of the sysctl
* can clean them up.
*/
- if (!ns->shm_rmid_forced) {
down_read(&shm_ids(ns).rwsem);
list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
/*shp->shm_creator = NULL;
* Only under read lock but we are only called on current
* so no entry on the list will be shared.
* 1) get a reference to shp.
* This must be done first: Right now, task_lock() prevents
* any concurrent IPC_RMID calls. After the list_del_init(),
* IPC_RMID will not acquire task_lock(->shm_creator)
*/* anymore.
list_del(&task->sysvshm.shm_clist);
up_read(&shm_ids(ns).rwsem);
return;
- }
WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
- /*
* Destroy all already created segments, that were not yet mapped,
* and mark any mapped as orphan to cover the sysctl toggling.
* Destroy is skipped if shm_may_destroy() returns false.
*/
- down_write(&shm_ids(ns).rwsem);
- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
shp->shm_creator = NULL;
/* 2) unlink */
list_del_init(&shp->shm_clist);
/*
* 3) Get pointer to the ipc namespace. It is worth to say
* that this pointer is guaranteed to be valid because
* shp lifetime is always shorter than namespace lifetime
* in which shp lives.
* We taken task_lock it means that shp won't be freed.
*/
ns = shp->ns;
if (shm_may_destroy(ns, shp)) {
shm_lock_by_ptr(shp);
shm_destroy(ns, shp);
/*
* 4) If kernel.shm_rmid_forced is not set then only keep track of
* which shmids are orphaned, so that a later set of the sysctl
* can clean them up.
*/
if (!ns->shm_rmid_forced) {
ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
task_unlock(task);
}continue;
- }
- /* Remove the list head from any segments still attached. */
- list_del(&task->sysvshm.shm_clist);
- up_write(&shm_ids(ns).rwsem);
/*
* 5) get a reference to the namespace.
* The refcount could be already 0. If it is 0, then
* the shm objects will be free by free_ipc_work().
*/
ns = get_ipc_ns_not_zero(ns);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Isn't this increment also too late? Doesn't this need to move up by ipc_rcu_getref while shp is still on the list?
Assuming the code is running in parallel with shm_exit_ns after removal from shm_clist shm_destroy can run to completion and shm_exit_ns can run to completion and the ipc namespace can be freed.
Eric