The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
Possible dependencies:
46307fd6e27a ("cgroup: Reorganize css_set_lock and kernfs path processing") 4534dee94105 ("cgroup: cgroup: Honor caller's cgroup NS when resolving cgroup id") 74e4b956eb1c ("cgroup: Honor caller's cgroup NS when resolving path") e210a89f5b07 ("cgroup.c: add helper __cset_cgroup_from_root to cleanup duplicated codes") be288169712f ("cgroup: reduce dependency on cgroup_mutex")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 46307fd6e27a3f678a1678b02e667678c22aa8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= mkoutny@suse.com Date: Mon, 10 Oct 2022 10:29:18 +0200 Subject: [PATCH] cgroup: Reorganize css_set_lock and kernfs path processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit
The commit 74e4b956eb1c incorrectly wrapped kernfs_walk_and_get (might_sleep) under css_set_lock (spinlock). css_set_lock is needed by __cset_cgroup_from_root to ensure stable cset->cgrp_links but not for kernfs_walk_and_get.
We only need to make sure that the returned root_cgrp won't be freed under us. This is given in the case of global root because it is static (cgrp_dfl_root.cgrp). When the root_cgrp is lower in the hierarchy, it is pinned by cgroup_ns->root_cset (and `current` task cannot switch namespace asynchronously so ns_proxy pins cgroup_ns).
Note this reasoning won't hold for root cgroups in v1 hierarchies, therefore create a special-cased helper function just for the default hierarchy.
Fixes: 74e4b956eb1c ("cgroup: Honor caller's cgroup NS when resolving path") Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Michal Koutný mkoutny@suse.com Signed-off-by: Tejun Heo tj@kernel.org
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 764bdd5fd8d1..ecf409e3c3a7 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1392,6 +1392,9 @@ static void cgroup_destroy_root(struct cgroup_root *root) cgroup_free_root(root); }
+/* + * Returned cgroup is without refcount but it's valid as long as cset pins it. + */ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) { @@ -1403,6 +1406,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, res_cgroup = cset->dfl_cgrp; } else { struct cgrp_cset_link *link; + lockdep_assert_held(&css_set_lock);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; @@ -1414,6 +1418,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, } }
+ BUG_ON(!res_cgroup); return res_cgroup; }
@@ -1436,23 +1441,36 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
rcu_read_unlock();
- BUG_ON(!res); return res; }
+/* + * Look up cgroup associated with current task's cgroup namespace on the default + * hierarchy. + * + * Unlike current_cgns_cgroup_from_root(), this doesn't need locks: + * - Internal rcu_read_lock is unnecessary because we don't dereference any rcu + * pointers. + * - css_set_lock is not needed because we just read cset->dfl_cgrp. + * - As a bonus returned cgrp is pinned with the current because it cannot + * switch cgroup_ns asynchronously. + */ +static struct cgroup *current_cgns_cgroup_dfl(void) +{ + struct css_set *cset; + + cset = current->nsproxy->cgroup_ns->root_cset; + return __cset_cgroup_from_root(cset, &cgrp_dfl_root); +} + /* look up cgroup associated with given css_set on the specified hierarchy */ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) { - struct cgroup *res = NULL; - lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock);
- res = __cset_cgroup_from_root(cset, root); - - BUG_ON(!res); - return res; + return __cset_cgroup_from_root(cset, root); }
/* @@ -6105,9 +6123,7 @@ struct cgroup *cgroup_get_from_id(u64 id) if (!cgrp) return ERR_PTR(-ENOENT);
- spin_lock_irq(&css_set_lock); - root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root); - spin_unlock_irq(&css_set_lock); + root_cgrp = current_cgns_cgroup_dfl(); if (!cgroup_is_descendant(cgrp, root_cgrp)) { cgroup_put(cgrp); return ERR_PTR(-ENOENT); @@ -6686,10 +6702,8 @@ struct cgroup *cgroup_get_from_path(const char *path) struct cgroup *cgrp = ERR_PTR(-ENOENT); struct cgroup *root_cgrp;
- spin_lock_irq(&css_set_lock); - root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root); + root_cgrp = current_cgns_cgroup_dfl(); kn = kernfs_walk_and_get(root_cgrp->kn, path); - spin_unlock_irq(&css_set_lock); if (!kn) goto out;
linux-stable-mirror@lists.linaro.org