No functional change intended. This rework reduces modifications of pids counters only to a minimal subtree of uncharged/charged cgroups.
Signed-off-by: Michal Koutný mkoutny@suse.com --- kernel/cgroup/pids.c | 80 ++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 33 deletions(-)
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index f5f81274658e..9df8a209a6e2 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -133,41 +133,23 @@ static void pids_uncharge(struct pids_cgroup *pids, int num) pids_cancel(p, num); }
-/** - * pids_charge - hierarchically charge the pid count - * @pids: the pid cgroup state - * @num: the number of pids to charge - * - * This function does *not* follow the pid limit set. It cannot fail and the new - * pid count may exceed the limit. This is only used for reverting failed - * attaches, where there is no other way out than violating the limit. - */ -static void pids_charge(struct pids_cgroup *pids, int num) -{ - struct pids_cgroup *p; - - for (p = pids; parent_pids(p); p = parent_pids(p)) { - int64_t new = atomic64_add_return(num, &p->counter); - - pids_update_watermark(p, new); - } -} - /** * pids_try_charge - hierarchically try to charge the pid count * @pids: the pid cgroup state * @num: the number of pids to charge + * @root: charge only under this root (NULL is global root) * @fail: storage of pid cgroup causing the fail * * This function follows the set limit. It will fail if the charge would cause - * the new value to exceed the hierarchical limit. Returns 0 if the charge - * succeeded, otherwise -EAGAIN. + * the new value to exceed the hierarchical limit and fail is set. Returns 0 if + * no limit was hit, otherwise -EAGAIN. */ -static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup **fail) +static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup *root, struct pids_cgroup **fail) { struct pids_cgroup *p, *q; + int ret = 0;
- for (p = pids; parent_pids(p); p = parent_pids(p)) { + for (p = pids; parent_pids(p) && p != root; p = parent_pids(p)) { int64_t new = atomic64_add_return(num, &p->counter); int64_t limit = atomic64_read(&p->limit);
@@ -177,8 +159,11 @@ static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup * fail. */ if (new > limit) { - *fail = p; - goto revert; + ret = -EAGAIN; + if (fail) { + *fail = p; + goto revert; + } } /* * Not technically accurate if we go over limit somewhere up @@ -187,14 +172,45 @@ static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup pids_update_watermark(p, new); }
- return 0; + return ret;
revert: for (q = pids; q != p; q = parent_pids(q)) pids_cancel(q, num); pids_cancel(p, num);
- return -EAGAIN; + return ret; +} + +/** + * pids_tranfer_charge - charge/uncharge in subtree betwee src and dst + * @src: pid cgroup state to uncharge + * @dst: pid cgroup state to charge + * @num: the number of pids to transfer + * + * The function updates charged pids in subtree whose root is the closest + * common ancestor of @src and @dst. This root and its ancestors are not + * modified (their limits are not enacted). + * + * Returns 0 if no limit was hit, -EAGAIN if a limit on path [@dst, @comm) was + * hit (charges are transferred despite the limit). + */ +static int pids_tranfer_charge(struct pids_cgroup *src, struct pids_cgroup *dst, int num) +{ + struct pids_cgroup *p, *comm = src; + int ret; + + /* for stable cgroup tree */ + lockdep_assert_held(&cgroup_mutex); + + while (!cgroup_is_descendant(dst->css.cgroup, comm->css.cgroup)) + comm = parent_pids(comm); + + ret = pids_try_charge(dst, num, comm, NULL); + + for (p = src; p != comm; p = parent_pids(p)) + pids_cancel(p, num); + return ret; }
static int pids_can_attach(struct cgroup_taskset *tset) @@ -215,8 +231,7 @@ static int pids_can_attach(struct cgroup_taskset *tset) old_css = task_css(task, pids_cgrp_id); old_pids = css_pids(old_css);
- pids_charge(pids, 1); - pids_uncharge(old_pids, 1); + (void) pids_tranfer_charge(old_pids, pids, 1); }
return 0; @@ -235,8 +250,7 @@ static void pids_cancel_attach(struct cgroup_taskset *tset) old_css = task_css(task, pids_cgrp_id); old_pids = css_pids(old_css);
- pids_charge(old_pids, 1); - pids_uncharge(pids, 1); + (void) pids_tranfer_charge(pids, old_pids, 1); } }
@@ -287,7 +301,7 @@ static int pids_can_fork(struct task_struct *task, struct css_set *cset) else css = task_css_check(current, pids_cgrp_id, true); pids = css_pids(css); - err = pids_try_charge(pids, 1, &pids_over_limit); + err = pids_try_charge(pids, 1, NULL, &pids_over_limit); if (err) pids_event(pids, pids_over_limit);