From: Prateek Sood prsood@codeaurora.org
commit aa24163b2ee5c92120e32e99b5a93143a0f4258e upstream.
Remove circular dependency deadlock in a scenario where hotplug of CPU is being done while there is updation in cgroup and cpuset triggered from userspace.
Process A => kthreadd => Process B => Process C => Process A
Process A cpu_subsys_offline(); cpu_down(); _cpu_down(); percpu_down_write(&cpu_hotplug_lock); //held cpuhp_invoke_callback(); workqueue_offline_cpu(); queue_work_on(); // unbind_work on system_highpri_wq __queue_work(); insert_work(); wake_up_worker(); flush_work(); wait_for_completion();
worker_thread(); manage_workers(); create_worker(); kthread_create_on_node(); wake_up_process(kthreadd_task);
kthreadd kthreadd(); kernel_thread(); do_fork(); copy_process(); percpu_down_read(&cgroup_threadgroup_rwsem); __rwsem_down_read_failed_common(); //waiting
Process B kernfs_fop_write(); cgroup_file_write(); cgroup_procs_write(); percpu_down_write(&cgroup_threadgroup_rwsem); //held cgroup_attach_task(); cgroup_migrate(); cgroup_migrate_execute(); cpuset_can_attach(); mutex_lock(&cpuset_mutex); //waiting
Process C kernfs_fop_write(); cgroup_file_write(); cpuset_write_resmask(); mutex_lock(&cpuset_mutex); //held update_cpumask(); update_cpumasks_hier(); rebuild_sched_domains_locked(); get_online_cpus(); percpu_down_read(&cpu_hotplug_lock); //waiting
Eliminating deadlock by reversing the locking order for cpuset_mutex and cpu_hotplug_lock.
Signed-off-by: Prateek Sood prsood@codeaurora.org Signed-off-by: Tejun Heo tj@kernel.org Signed-off-by: Amit Pundir amit.pundir@linaro.org --- Build tested on 4.14.74 for ARCH=arm/arm64 allmodconfig.
kernel/cgroup/cpuset.c | 53 ++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4657e2924ecb..54f4855b92fa 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -817,6 +817,18 @@ static int generate_sched_domains(cpumask_var_t **domains, return ndoms; }
+static void cpuset_sched_change_begin(void) +{ + cpus_read_lock(); + mutex_lock(&cpuset_mutex); +} + +static void cpuset_sched_change_end(void) +{ + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); +} + /* * Rebuild scheduler domains. * @@ -826,16 +838,14 @@ static int generate_sched_domains(cpumask_var_t **domains, * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_mutex held. Takes get_online_cpus(). */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms;
lockdep_assert_held(&cpuset_mutex); - get_online_cpus();
/* * We have raced with CPU hotplug. Don't do anything to avoid @@ -843,27 +853,25 @@ static void rebuild_sched_domains_locked(void) * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) - goto out; + return;
/* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr);
/* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); -out: - put_online_cpus(); } #else /* !CONFIG_SMP */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { } #endif /* CONFIG_SMP */
void rebuild_sched_domains(void) { - mutex_lock(&cpuset_mutex); - rebuild_sched_domains_locked(); - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_begin(); + rebuild_sched_domains_cpuslocked(); + cpuset_sched_change_end(); }
/** @@ -949,7 +957,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) rcu_read_unlock();
if (need_rebuild_sched_domains) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); }
/** @@ -1281,7 +1289,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); }
return 0; @@ -1314,7 +1322,6 @@ static void update_tasks_flags(struct cpuset *cs) * * Call with cpuset_mutex held. */ - static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { @@ -1347,7 +1354,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spin_unlock_irq(&callback_lock);
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked();
if (spread_flag_changed) update_tasks_flags(cs); @@ -1615,7 +1622,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = 0;
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; @@ -1651,7 +1658,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; }
@@ -1662,7 +1669,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = -ENODEV;
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock;
@@ -1675,7 +1682,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; }
@@ -1714,7 +1721,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work);
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock;
@@ -1738,7 +1745,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
free_trial_cpuset(trialcs); out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -2039,14 +2046,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) /* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains_locked(). + * will call rebuild_sched_domains_cpuslocked(). */
static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css);
- mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin();
if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); @@ -2054,7 +2061,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags);
- mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); }
static void cpuset_css_free(struct cgroup_subsys_state *css)
From: Carl Huang cjhuang@codeaurora.org
commit 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac upstream.
The skb may be freed in tx completion context before trace_ath10k_wmi_cmd is called. This can be easily captured when KASAN(Kernel Address Sanitizer) is enabled. The fix is to move trace_ath10k_wmi_cmd before the send operation. As the ret has no meaning in trace_ath10k_wmi_cmd then, so remove this parameter too.
Signed-off-by: Carl Huang cjhuang@codeaurora.org Tested-by: Brian Norris briannorris@chromium.org Reviewed-by: Brian Norris briannorris@chromium.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Amit Pundir amit.pundir@linaro.org --- To be applied on 4.9.y and 4.4.y as well. Build tested on 4.14.74, 4.9.131 and 4.4.159 for ARCH=arm/arm64 allmodconfig.
drivers/net/wireless/ath/ath10k/trace.h | 12 ++++-------- drivers/net/wireless/ath/ath10k/wmi.c | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h index e0d00cef0bd8..5b974bb76e6c 100644 --- a/drivers/net/wireless/ath/ath10k/trace.h +++ b/drivers/net/wireless/ath/ath10k/trace.h @@ -152,10 +152,9 @@ TRACE_EVENT(ath10k_log_dbg_dump, );
TRACE_EVENT(ath10k_wmi_cmd, - TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len, - int ret), + TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len),
- TP_ARGS(ar, id, buf, buf_len, ret), + TP_ARGS(ar, id, buf, buf_len),
TP_STRUCT__entry( __string(device, dev_name(ar->dev)) @@ -163,7 +162,6 @@ TRACE_EVENT(ath10k_wmi_cmd, __field(unsigned int, id) __field(size_t, buf_len) __dynamic_array(u8, buf, buf_len) - __field(int, ret) ),
TP_fast_assign( @@ -171,17 +169,15 @@ TRACE_EVENT(ath10k_wmi_cmd, __assign_str(driver, dev_driver_string(ar->dev)); __entry->id = id; __entry->buf_len = buf_len; - __entry->ret = ret; memcpy(__get_dynamic_array(buf), buf, buf_len); ),
TP_printk( - "%s %s id %d len %zu ret %d", + "%s %s id %d len %zu", __get_str(driver), __get_str(device), __entry->id, - __entry->buf_len, - __entry->ret + __entry->buf_len ) );
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 38a97086708b..2ab5311659ea 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1741,8 +1741,8 @@ int ath10k_wmi_cmd_send_nowait(struct ath10k *ar, struct sk_buff *skb, cmd_hdr->cmd_id = __cpu_to_le32(cmd);
memset(skb_cb, 0, sizeof(*skb_cb)); + trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len); ret = ath10k_htc_send(&ar->htc, ar->wmi.eid, skb); - trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len, ret);
if (ret) goto err_pull;
From: Yu Wang yyuwang@codeaurora.org
commit 50e79e25250bf928369996277e85b00536b380c7 upstream.
If device gone during chip reset, ar->normal_mode_fw.board is not initialized, but ath10k_debug_print_hwfw_info() will try to access its member, which will cause 'kernel NULL pointer' issue. This was found using a faulty device (pci link went down sometimes) in a random insmod/rmmod/other-op test. To fix it, check ar->normal_mode_fw.board before accessing the member.
pci 0000:02:00.0: BAR 0: assigned [mem 0xf7400000-0xf75fffff 64bit] ath10k_pci 0000:02:00.0: enabling device (0000 -> 0002) ath10k_pci 0000:02:00.0: pci irq msi oper_irq_mode 2 irq_mode 0 reset_mode 0 ath10k_pci 0000:02:00.0: failed to read device register, device is gone ath10k_pci 0000:02:00.0: failed to wait for target init: -5 ath10k_pci 0000:02:00.0: failed to warm reset: -5 ath10k_pci 0000:02:00.0: firmware crashed during chip reset ath10k_pci 0000:02:00.0: firmware crashed! (uuid 5d018951-b8e1-404a-8fde-923078b4423a) ath10k_pci 0000:02:00.0: (null) target 0x00000000 chip_id 0x00340aff sub 0000:0000 ath10k_pci 0000:02:00.0: kconfig debug 1 debugfs 1 tracing 1 dfs 1 testmode 1 ath10k_pci 0000:02:00.0: firmware ver api 0 features crc32 00000000 ... BUG: unable to handle kernel NULL pointer dereference at 00000004 ... Call Trace: [<fb4e7882>] ath10k_print_driver_info+0x12/0x20 [ath10k_core] [<fb62b7dd>] ath10k_pci_fw_crashed_dump+0x6d/0x4d0 [ath10k_pci] [<fb629f07>] ? ath10k_pci_sleep.part.19+0x57/0xc0 [ath10k_pci] [<fb62c8ee>] ath10k_pci_hif_power_up+0x14e/0x1b0 [ath10k_pci] [<c10477fb>] ? do_page_fault+0xb/0x10 [<fb4eb934>] ath10k_core_register_work+0x24/0x840 [ath10k_core] [<c18a00d8>] ? netlbl_unlhsh_remove+0x178/0x410 [<c10477f0>] ? __do_page_fault+0x480/0x480 [<c1068e44>] process_one_work+0x114/0x3e0 [<c1069d07>] worker_thread+0x37/0x4a0 [<c106e294>] kthread+0xa4/0xc0 [<c1069cd0>] ? create_worker+0x180/0x180 [<c106e1f0>] ? kthread_park+0x50/0x50 [<c18ab4f7>] ret_from_fork+0x1b/0x28 Code: 78 80 b8 50 09 00 00 00 75 5d 8d 75 94 c7 44 24 08 aa d7 52 fb c7 44 24 04 64 00 00 00 89 34 24 e8 82 52 e2 c5 8b 83 dc 08 00 00 <8b> 50 04 8b 08 31 c0 e8 20 57 e3 c5 89 44 24 10 8b 83 58 09 00 EIP: [<fb4e7754>]- ath10k_debug_print_board_info+0x34/0xb0 [ath10k_core] SS:ESP 0068:f4921d90 CR2: 0000000000000004
Signed-off-by: Yu Wang yyuwang@codeaurora.org Signed-off-by: Kalle Valo kvalo@codeaurora.org [AmitP: Minor rebasing for 4.14.y and 4.9.y] Signed-off-by: Amit Pundir amit.pundir@linaro.org --- To be applied on 4.9.y as well. Build tested on 4.14.74 and 4.9.131 for ARCH=arm/arm64 allmodconfig.
drivers/net/wireless/ath/ath10k/debug.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index df514507d3f1..22003895f854 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2013 Qualcomm Atheros, Inc. + * Copyright (c) 2018, The Linux Foundation. All rights reserved. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -163,6 +164,8 @@ void ath10k_debug_print_hwfw_info(struct ath10k *ar) void ath10k_debug_print_board_info(struct ath10k *ar) { char boardinfo[100]; + const struct firmware *board; + u32 crc;
if (ar->id.bmi_ids_valid) scnprintf(boardinfo, sizeof(boardinfo), "%d:%d", @@ -170,11 +173,16 @@ void ath10k_debug_print_board_info(struct ath10k *ar) else scnprintf(boardinfo, sizeof(boardinfo), "N/A");
+ board = ar->normal_mode_fw.board; + if (!IS_ERR_OR_NULL(board)) + crc = crc32_le(0, board->data, board->size); + else + crc = 0; + ath10k_info(ar, "board_file api %d bmi_id %s crc32 %08x", ar->bd_api, boardinfo, - crc32_le(0, ar->normal_mode_fw.board->data, - ar->normal_mode_fw.board->size)); + crc); }
void ath10k_debug_print_boot_info(struct ath10k *ar)
From: James Smart jsmart2021@gmail.com
commit cf25809bec2c7df4b45df5b2196845d9a4a3c89b upstream.
If there are errors during initial controller create, the transport will teardown the partially initialized controller struct and free the ctlr memory. Trouble is - most of those errors can occur due to asynchronous events happening such io timeouts and subsystem connectivity failures. Those failures invoke async workq items to reset the controller and attempt reconnect. Those may be in progress as the main thread frees the ctrl memory, resulting in NULL ptr oops.
Prevent this from happening by having the main ctrl failure thread changing state to DELETING followed by synchronously cancelling any pending queued work item. The change of state will prevent the scheduling of resets or reconnect events.
Signed-off-by: James Smart james.smart@broadcom.com Signed-off-by: Keith Busch keith.busch@intel.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Amit Pundir amit.pundir@linaro.org --- Build tested on 4.14.74 for ARCH=arm/arm64 allmodconfig.
drivers/nvme/host/fc.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7deb7b5d8683..058d542647dd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2868,6 +2868,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, }
if (ret) { + nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ctrl.reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + /* couldn't schedule retry - fail out */ dev_err(ctrl->ctrl.device, "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
linux-stable-mirror@lists.linaro.org