Manfred Spraul manfred@colorfullife.com writes:
Hello together,
On 11/5/21 22:34, Eric W. Biederman wrote:
+static inline void shm_clist_del(struct shmid_kernel *shp) +{
- struct task_struct *creator;
- rcu_read_lock();
- creator = rcu_dereference(shp->shm_creator);
- if (creator) {
task_lock(creator);list_del(&shp->shm_clist);task_unlock(creator);- }
- rcu_read_unlock();
+}
shm_clist_del() only synchronizes against exit_shm() when shm_creator is not NULL.
list_del(&shp->shm_clist);rcu_assign_pointer(shp->shm_creator, NULL);We set shm_creator to NULL -> no more synchronization.
Now IPC_RMID can run in parallel - regardless if we test for list_empty() or shm_creator.
/* Guarantee shp lives after task_lock is dropped */ipc_getref(&shp->shm_perm);task_lock() doesn't help: As soon as shm_creator is set to NULL, IPC_RMID won't acquire task_lock() anymore.
Thus shp can disappear before we arrive at this ipc_getref.
[Yes, I think I have introduced this bug. ]
Corrected version attached.
I'll reboot and retest the patch, then I would send it to akpm as replacement for current patch in mmotm.
--
Manfred
@@ -382,48 +425,94 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) /* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task) {
- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
- struct shmid_kernel *shp, *n;
- for (;;) {
struct shmid_kernel *shp;struct ipc_namespace *ns;
- if (list_empty(&task->sysvshm.shm_clist))
return;
task_lock(task);if (list_empty(&task->sysvshm.shm_clist)) {task_unlock(task);break;}shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,shm_clist);
- /*
* If kernel.shm_rmid_forced is not set then only keep track of* which shmids are orphaned, so that a later set of the sysctl* can clean them up.*/- if (!ns->shm_rmid_forced) {
down_read(&shm_ids(ns).rwsem);list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) /*shp->shm_creator = NULL;* Only under read lock but we are only called on current* so no entry on the list will be shared.
* 1) get a reference to shp.* This must be done first: Right now, task_lock() prevents* any concurrent IPC_RMID calls. After the list_del_init(),* IPC_RMID will not acquire task_lock(->shm_creator) */* anymore.
list_del(&task->sysvshm.shm_clist);up_read(&shm_ids(ns).rwsem);return;- }
WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
- /*
* Destroy all already created segments, that were not yet mapped,* and mark any mapped as orphan to cover the sysctl toggling.* Destroy is skipped if shm_may_destroy() returns false.*/- down_write(&shm_ids(ns).rwsem);
- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
shp->shm_creator = NULL;
/* 2) unlink */list_del_init(&shp->shm_clist);/** 3) Get pointer to the ipc namespace. It is worth to say* that this pointer is guaranteed to be valid because* shp lifetime is always shorter than namespace lifetime* in which shp lives.* We taken task_lock it means that shp won't be freed.*/ns = shp->ns;
if (shm_may_destroy(ns, shp)) {shm_lock_by_ptr(shp);shm_destroy(ns, shp);
/** 4) If kernel.shm_rmid_forced is not set then only keep track of* which shmids are orphaned, so that a later set of the sysctl* can clean them up.*/if (!ns->shm_rmid_forced) {ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);task_unlock(task); }continue;
- }
- /* Remove the list head from any segments still attached. */
- list_del(&task->sysvshm.shm_clist);
- up_write(&shm_ids(ns).rwsem);
/** 5) get a reference to the namespace.* The refcount could be already 0. If it is 0, then* the shm objects will be free by free_ipc_work().*/ns = get_ipc_ns_not_zero(ns);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Isn't this increment also too late? Doesn't this need to move up by ipc_rcu_getref while shp is still on the list?
Assuming the code is running in parallel with shm_exit_ns after removal from shm_clist shm_destroy can run to completion and shm_exit_ns can run to completion and the ipc namespace can be freed.
Eric