From: Tejun Heo tj@kernel.org
[ Upstream commit 4cbfd3de737b9d00544ff0f673cb75fc37bffb6a ]
When a CPU went online or offline, wq_update_unbound_numa() was called only on the CPU which was going up or down. This works fine because all CPUs on the same NUMA node share the same pool_workqueue slot - one CPU updating it updates it for everyone in the node.
However, future changes will make each CPU use a separate pool_workqueue even when they're sharing the same worker_pool, which requires updating pool_workqueue's for all CPUs which may be sharing the same pool_workqueue on hotplug.
To accommodate the planned changes, this patch updates workqueue_on/offline_cpu() so that they call wq_update_unbound_numa() for all CPUs sharing the same NUMA node as the CPU going up or down. In the current code, the second+ calls would be noops and there shouldn't be any behavior changes.
* As wq_update_unbound_numa() is now called on multiple CPUs per each hotplug event, @cpu is renamed to @hotplug_cpu and another @cpu argument is added. The former indicates the CPU being hot[un]plugged and the latter the CPU whose pool_workqueue is being updated.
* In wq_update_unbound_numa(), cpu_off is renamed to off_cpu for consistency with the new @hotplug_cpu.
Signed-off-by: Tejun Heo tj@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/workqueue.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8e108c040cc35..c1f96501e40f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4130,7 +4130,8 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, /** * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug * @wq: the target workqueue - * @cpu: the CPU coming up or going down + * @cpu: the CPU to update pool association for + * @hotplug_cpu: the CPU coming up or going down * @online: whether @cpu is coming up or going down * * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and @@ -4150,10 +4151,10 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, * CPU_DOWN_PREPARE. */ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, - bool online) + int hotplug_cpu, bool online) { int node = cpu_to_node(cpu); - int cpu_off = online ? -1 : cpu; + int off_cpu = online ? -1 : hotplug_cpu; struct pool_workqueue *old_pwq = NULL, *pwq; struct workqueue_attrs *target_attrs; cpumask_t *cpumask; @@ -4181,7 +4182,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, * and create a new one if they don't match. If the target cpumask * equals the default pwq's, the default pwq should be used. */ - if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) { + if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, off_cpu, cpumask)) { if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) return; } else { @@ -5168,8 +5169,15 @@ int workqueue_online_cpu(unsigned int cpu) }
/* update NUMA affinity of unbound workqueues */ - list_for_each_entry(wq, &workqueues, list) - wq_update_unbound_numa(wq, cpu, true); + list_for_each_entry(wq, &workqueues, list) { + int tcpu; + + for_each_possible_cpu(tcpu) { + if (cpu_to_node(tcpu) == cpu_to_node(cpu)) { + wq_update_unbound_numa(wq, tcpu, cpu, true); + } + } + }
mutex_unlock(&wq_pool_mutex); return 0; @@ -5187,8 +5195,15 @@ int workqueue_offline_cpu(unsigned int cpu)
/* update NUMA affinity of unbound workqueues */ mutex_lock(&wq_pool_mutex); - list_for_each_entry(wq, &workqueues, list) - wq_update_unbound_numa(wq, cpu, false); + list_for_each_entry(wq, &workqueues, list) { + int tcpu; + + for_each_possible_cpu(tcpu) { + if (cpu_to_node(tcpu) == cpu_to_node(cpu)) { + wq_update_unbound_numa(wq, tcpu, cpu, false); + } + } + } mutex_unlock(&wq_pool_mutex);
return 0; @@ -6132,7 +6147,8 @@ void __init workqueue_init(void) }
list_for_each_entry(wq, &workqueues, list) { - wq_update_unbound_numa(wq, smp_processor_id(), true); + wq_update_unbound_numa(wq, smp_processor_id(), smp_processor_id(), + true); WARN(init_rescuer(wq), "workqueue: failed to create early rescuer for %s", wq->name);