Hi Eric,
On 11/5/21 22:34, Eric W. Biederman wrote:
I have to dash so this is short.
As last time, I'll review the change and check for new/good ideas.
As first question: Is the change tested?
[...]
/* The task created the shm object. NULL if the task is dead. */
- struct task_struct *shm_creator;
- struct task_struct __rcu *shm_creator; struct list_head shm_clist; /* list by creator */
- struct ipc_namespace *shm_ns; /* valid when shm_nattch != 0 */ } __randomize_layout;
There is no reason to modify shm_creator:
We need _one_ indicator that the creator has died, not two.
We have both list_empty() and shm_creator. Thus we should/must define what is the relevant indicator, and every function must use the same one.
exit_sem() must walk shm_clist. list_empty() must return the correct answer.
Thus I think it is simpler that list_empty() is the indicator.
In addition, as you have correctly noticed: If we make shm_creator==NULL the indicator, then we must use at __rcu or at least READ_ONCE() accessors.
But: This would only solve a self created problem. Just leave shm_creator unmodified - and the need for READ_ONCE() goes away.
/* shm_mode upper byte flags */ @@ -106,29 +107,17 @@ void shm_init_ns(struct ipc_namespace *ns) ipc_init_ids(&shm_ids(ns)); } -/*
- Called with shm_ids.rwsem (writer) and the shp structure locked.
- Only shm_ids.rwsem remains locked on exit.
- */
-static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) +static void do_shm_destroy(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) {
- struct shmid_kernel *shp;
- shp = container_of(ipcp, struct shmid_kernel, shm_perm);
- if (shp->shm_nattch) {
shp->shm_perm.mode |= SHM_DEST;
/* Do not find it any more */
ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
shm_unlock(shp);
- } else
shm_destroy(ns, shp);
- struct shmid_kernel *shp =
container_of(ipcp, struct shmid_kernel, shm_perm);
- shm_destroy(ns, shp); }
#ifdef CONFIG_IPC_NS void shm_exit_ns(struct ipc_namespace *ns) {
- free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
- free_ipcs(ns, &shm_ids(ns), do_shm_destroy); idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); }
@@ -225,9 +214,22 @@ static void shm_rcu_free(struct rcu_head *head) kfree(shp); } +static inline void shm_clist_del(struct shmid_kernel *shp) +{
- struct task_struct *creator;
- rcu_read_lock();
- creator = rcu_dereference(shp->shm_creator);
- if (creator) {
task_lock(creator);
list_del(&shp->shm_clist);
Does this work? You are using list_del() instead of list_del_init().
I fear that this might break exit_sem()
task_unlock(creator);
- }
- rcu_read_unlock();
+}
- static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) {
- list_del(&s->shm_clist); ipc_rmid(&shm_ids(ns), &s->shm_perm); }
@@ -283,7 +285,9 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) shm_file = shp->shm_file; shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
- shm_clist_del(shp); shm_rmid(ns, shp);
- shp->shm_ns = NULL; shm_unlock(shp); if (!is_file_hugepages(shm_file)) shmem_lock(shm_file, 0, shp->mlock_ucounts);
@@ -361,7 +365,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) * * As shp->* are changed under rwsem, it's safe to skip shp locking. */
- if (shp->shm_creator != NULL)
- if (rcu_access_pointer(shp->shm_creator) != NULL) return 0;
if (shm_may_destroy(ns, shp)) { @@ -382,48 +386,62 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) /* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task) {
- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
- struct shmid_kernel *shp, *n;
- if (list_empty(&task->sysvshm.shm_clist))
return;
- /*
* If kernel.shm_rmid_forced is not set then only keep track of
* which shmids are orphaned, so that a later set of the sysctl
* can clean them up.
*/
- if (!ns->shm_rmid_forced) {
down_read(&shm_ids(ns).rwsem);
list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
shp->shm_creator = NULL;
/*
* Only under read lock but we are only called on current
* so no entry on the list will be shared.
*/
list_del(&task->sysvshm.shm_clist);
up_read(&shm_ids(ns).rwsem);
return;
- }
- struct list_head *head = &task->sysvshm.shm_clist;
/* * Destroy all already created segments, that were not yet mapped, * and mark any mapped as orphan to cover the sysctl toggling. * Destroy is skipped if shm_may_destroy() returns false. */
- down_write(&shm_ids(ns).rwsem);
- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
shp->shm_creator = NULL;
- for (;;) {
struct ipc_namespace *ns;
struct shmid_kernel *shp;
if (shm_may_destroy(ns, shp)) {
task_lock(task);
if (list_empty(head)) {
task_unlock(task);
break;
}
shp = list_first_entry(head, struct shmid_kernel, shm_clist);
list_del(&shp->shm_clist);
rcu_assign_pointer(shp->shm_creator, NULL);
/*
* Guarantee that ns lives after task_list is dropped.
*
* This shm segment may not be attached and it's ipc
* namespace may be exiting. If so ignore the shm
* segment as it will be destroyed by shm_exit_ns.
*/
ns = get_ipc_ns_not_zero(shp->shm_ns);
if (!ns) {
task_unlock(task);
continue;
}
/* Guarantee shp lives after task_lock is dropped */
ipc_getref(&shp->shm_perm);
/* Drop task_lock so that shm_destroy may take it */
task_unlock(task);
/* Can the shm segment be destroyed? */
down_write(&shm_ids(ns).rwsem);
shm_lock_by_ptr(shp);
if (ipc_valid_object(&shp->shm_perm) &&
shm_may_destroy(ns, shp)) { shm_lock_by_ptr(shp); shm_destroy(ns, shp);
} else {
}shm_unlock(shp);
- }
- /* Remove the list head from any segments still attached. */
- list_del(&task->sysvshm.shm_clist);
- up_write(&shm_ids(ns).rwsem);
ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
up_write(&shm_ids(ns).rwsem);
put_ipc_ns(ns);
- } }
static vm_fault_t shm_fault(struct vm_fault *vmf) @@ -673,14 +691,17 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_segsz = size; shp->shm_nattch = 0; shp->shm_file = file;
- shp->shm_creator = current;
- RCU_INIT_POINTER(shp->shm_creator, current);
- shp->shm_ns = ns;
/* ipc_addid() locks shp upon success. */ error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); if (error < 0) goto no_id;
- task_lock(current); list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist);
- task_unlock(current);
/* * shmid gets reported as "inode#" in /proc/pid/maps. @@ -913,8 +934,14 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, switch (cmd) { case IPC_RMID: ipc_lock_object(&shp->shm_perm);
/* do_shm_rmid unlocks the ipc object and rcu */
do_shm_rmid(ns, ipcp);
if (shp->shm_nattch) {
shp->shm_perm.mode |= SHM_DEST;
/* Do not find it any more */
ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
shm_unlock(shp);
} else
shm_destroy(ns, shp);
goto out_up; case IPC_SET: ipc_lock_object(&shp->shm_perm);/* shm_unlock unlocked the ipc object and rcu */
diff --git a/ipc/util.c b/ipc/util.c index fa2d86ef3fb8..58228f342397 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -525,6 +525,11 @@ void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) ipcp->key = IPC_PRIVATE; } +void ipc_getref(struct kern_ipc_perm *ptr) +{
- return refcount_inc(&ptr->refcount);
+}
- bool ipc_rcu_getref(struct kern_ipc_perm *ptr) { return refcount_inc_not_zero(&ptr->refcount);
diff --git a/ipc/util.h b/ipc/util.h index 2dd7ce0416d8..e13b46ff675f 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -170,6 +170,7 @@ static inline int ipc_get_maxidx(struct ipc_ids *ids)
- refcount is initialized by ipc_addid(), before that point call_rcu()
- must be used.
*/ +void ipc_getref(struct kern_ipc_perm *ptr); bool ipc_rcu_getref(struct kern_ipc_perm *ptr); void ipc_rcu_putref(struct kern_ipc_perm *ptr, void (*func)(struct rcu_head *head)); diff --git a/kernel/fork.c b/kernel/fork.c index 38681ad44c76..3e881f78bcf2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3095,7 +3095,6 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWIPC) { /* Orphan segments in old ns (see sem above). */ exit_shm(current);
}shm_init_task(current);
if (new_nsproxy)