commit 18f9a4d47527772515ad6cbdac796422566e6440 upstream.
Cpuset v2 has no spread flags to set. So we can skip spread flags update if cpuset v2 is being used. Also change the name to cpuset_update_task_spread_flags() to indicate that there are multiple spread flags.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Tejun Heo tj@kernel.org --- kernel/cgroup/cpuset.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 428820bf141d..a7bbe277ae66 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -450,11 +450,15 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) /* * update task's spread flag if cpuset's page/slab spread flag is set * - * Call with callback_lock or cpuset_rwsem held. + * Call with callback_lock or cpuset_rwsem held. The check can be skipped + * if on default hierarchy. */ -static void cpuset_update_task_spread_flag(struct cpuset *cs, +static void cpuset_update_task_spread_flags(struct cpuset *cs, struct task_struct *tsk) { + if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) + return; + if (is_spread_page(cs)) task_set_spread_page(tsk); else @@ -1941,7 +1945,7 @@ static void update_tasks_flags(struct cpuset *cs)
css_task_iter_start(&cs->css, 0, &it); while ((task = css_task_iter_next(&it))) - cpuset_update_task_spread_flag(cs, task); + cpuset_update_task_spread_flags(cs, task); css_task_iter_end(&it); }
@@ -2270,7 +2274,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); - cpuset_update_task_spread_flag(cs, task); + cpuset_update_task_spread_flags(cs, task); }
/*
commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c upstream.
By default, the clone(2) syscall spawn a child process into the same cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes into cgroups"), the child will be spawned into a different cgroup which is somewhat similar to writing the child's tid into "cgroup.threads".
The current cpuset_fork() method does not properly handle the CLONE_INTO_CGROUP case where the cpuset of the child may be different from that of its parent. Update the cpuset_fork() method to treat the CLONE_INTO_CGROUP case similar to cpuset_attach().
Since the newly cloned task has not been running yet, its actual memory usage isn't known. So it is not necessary to make change to mm in cpuset_fork().
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") Reported-by: Giuseppe Scrivano gscrivan@redhat.com Signed-off-by: Waiman Long longman@redhat.com Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Tejun Heo tj@kernel.org --- kernel/cgroup/cpuset.c | 62 ++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 20 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a7bbe277ae66..3a7690450a3b 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2238,16 +2238,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) }
/* - * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach() + * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task() * but we can't allocate it dynamically there. Define it global and * allocate from cpuset_init(). */ static cpumask_var_t cpus_attach; +static nodemask_t cpuset_attach_nodemask_to; + +static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) +{ + percpu_rwsem_assert_held(&cpuset_rwsem); + + if (cs != &top_cpuset) + guarantee_online_cpus(task, cpus_attach); + else + cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); + /* + * can_attach beforehand should guarantee that this doesn't + * fail. TODO: have a better way to handle failure here + */ + WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); + + cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); + cpuset_update_task_spread_flags(cs, task); +}
static void cpuset_attach(struct cgroup_taskset *tset) { - /* static buf protected by cpuset_rwsem */ - static nodemask_t cpuset_attach_nodemask_to; struct task_struct *task; struct task_struct *leader; struct cgroup_subsys_state *css; @@ -2262,20 +2279,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, css, tset) { - if (cs != &top_cpuset) - guarantee_online_cpus(task, cpus_attach); - else - cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); - /* - * can_attach beforehand should guarantee that this doesn't - * fail. TODO: have a better way to handle failure here - */ - WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); - - cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); - cpuset_update_task_spread_flags(cs, task); - } + cgroup_taskset_for_each(task, css, tset) + cpuset_attach_task(cs, task);
/* * Change mm for all threadgroup leaders. This is expensive and may @@ -2948,11 +2953,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) */ static void cpuset_fork(struct task_struct *task) { - if (task_css_is_root(task, cpuset_cgrp_id)) + struct cpuset *cs; + bool same_cs; + + rcu_read_lock(); + cs = task_cs(task); + same_cs = (cs == task_cs(current)); + rcu_read_unlock(); + + if (same_cs) { + if (cs == &top_cpuset) + return; + + set_cpus_allowed_ptr(task, current->cpus_ptr); + task->mems_allowed = current->mems_allowed; return; + }
- set_cpus_allowed_ptr(task, current->cpus_ptr); - task->mems_allowed = current->mems_allowed; + /* CLONE_INTO_CGROUP */ + percpu_down_write(&cpuset_rwsem); + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); + cpuset_attach_task(cs, task); + percpu_up_write(&cpuset_rwsem); }
struct cgroup_subsys cpuset_cgrp_subsys = {
commit eee87853794187f6adbe19533ed79c8b44b36a91 upstream.
In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept new tasks. It is too late to check that in cpuset_fork(). So we need to add the cpuset_can_fork() and cpuset_cancel_fork() methods to pre-check it before we can allow attachment to a different cpuset.
We also need to set the attach_in_progress flag to alert other code that a new task is going to be added to the cpuset.
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") Suggested-by: Michal Koutný mkoutny@suse.com Signed-off-by: Waiman Long longman@redhat.com Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Tejun Heo tj@kernel.org --- kernel/cgroup/cpuset.c | 88 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 5 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 3a7690450a3b..58a515b4cbc0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2186,6 +2186,18 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
+/* + * Check to see if a cpuset can accept a new task + * For v1, cpus_allowed and mems_allowed can't be empty. + */ +static int cpuset_can_attach_check(struct cpuset *cs) +{ + if (!is_in_v2_mode() && + (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) + return -ENOSPC; + return 0; +} + /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */ static int cpuset_can_attach(struct cgroup_taskset *tset) { @@ -2200,10 +2212,8 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem);
- /* allow moving tasks into an empty cpuset if on default hierarchy */ - ret = -ENOSPC; - if (!is_in_v2_mode() && - (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) + ret = cpuset_can_attach_check(cs); + if (ret) goto out_unlock;
cgroup_taskset_for_each(task, css, tset) { @@ -2220,7 +2230,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) * changes which zero cpus/mems_allowed. */ cs->attach_in_progress++; - ret = 0; out_unlock: percpu_up_write(&cpuset_rwsem); return ret; @@ -2946,6 +2955,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) percpu_up_write(&cpuset_rwsem); }
+/* + * In case the child is cloned into a cpuset different from its parent, + * additional checks are done to see if the move is allowed. + */ +static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) +{ + struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); + bool same_cs; + int ret; + + rcu_read_lock(); + same_cs = (cs == task_cs(current)); + rcu_read_unlock(); + + if (same_cs) + return 0; + + lockdep_assert_held(&cgroup_mutex); + percpu_down_write(&cpuset_rwsem); + + /* Check to see if task is allowed in the cpuset */ + ret = cpuset_can_attach_check(cs); + if (ret) + goto out_unlock; + + ret = task_can_attach(task, cs->effective_cpus); + if (ret) + goto out_unlock; + + ret = security_task_setscheduler(task); + if (ret) + goto out_unlock; + + /* + * Mark attach is in progress. This makes validate_change() fail + * changes which zero cpus/mems_allowed. + */ + cs->attach_in_progress++; +out_unlock: + percpu_up_write(&cpuset_rwsem); + return ret; +} + +static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset) +{ + struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); + bool same_cs; + + rcu_read_lock(); + same_cs = (cs == task_cs(current)); + rcu_read_unlock(); + + if (same_cs) + return; + + percpu_down_write(&cpuset_rwsem); + cs->attach_in_progress--; + if (!cs->attach_in_progress) + wake_up(&cpuset_attach_wq); + percpu_up_write(&cpuset_rwsem); +} + /* * Make sure the new task conform to the current state of its parent, * which could have been changed by cpuset just after it inherits the @@ -2974,6 +3045,11 @@ static void cpuset_fork(struct task_struct *task) percpu_down_write(&cpuset_rwsem); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); cpuset_attach_task(cs, task); + + cs->attach_in_progress--; + if (!cs->attach_in_progress) + wake_up(&cpuset_attach_wq); + percpu_up_write(&cpuset_rwsem); }
@@ -2987,6 +3063,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .attach = cpuset_attach, .post_attach = cpuset_post_attach, .bind = cpuset_bind, + .can_fork = cpuset_can_fork, + .cancel_fork = cpuset_cancel_fork, .fork = cpuset_fork, .legacy_cftypes = legacy_files, .dfl_cftypes = dfl_files,
On Mon, Apr 17, 2023 at 11:36:45AM -0400, Waiman Long wrote:
commit 18f9a4d47527772515ad6cbdac796422566e6440 upstream.
Cpuset v2 has no spread flags to set. So we can skip spread flags update if cpuset v2 is being used. Also change the name to cpuset_update_task_spread_flags() to indicate that there are multiple spread flags.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Tejun Heo tj@kernel.org
kernel/cgroup/cpuset.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
Same as the 5.10.y series, I've dropped all of these due to build errors.
thanks,
greg k-h
linux-stable-mirror@lists.linaro.org