March 2025 - Linux-stable-mirror

[PATCH v5 2/5] ASoC: q6apm: add q6apm_get_hw_pointer helper

by srinivas.kandagatla＠linaro.org

From: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Implement an helper function in q6apm to be able to read the current hardware pointer for both read and write buffers. This should help q6apm-dai to get the hardware pointer consistently without it doing manual calculation, which could go wrong in some race conditions. Fixes: 9b4fe0f1cd79 ("ASoC: qdsp6: audioreach: add q6apm-dai support") Cc: stable(a)vger.kernel.org Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Tested-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> Tested-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/qcom/qdsp6/q6apm.c | 18 +++++++++++++++++- sound/soc/qcom/qdsp6/q6apm.h | 3 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c index 2a2a5bd98110..ca57413cb784 100644 --- a/sound/soc/qcom/qdsp6/q6apm.c +++ b/sound/soc/qcom/qdsp6/q6apm.c @@ -494,6 +494,19 @@ int q6apm_read(struct q6apm_graph *graph) } EXPORT_SYMBOL_GPL(q6apm_read); +int q6apm_get_hw_pointer(struct q6apm_graph *graph, int dir) +{ + struct audioreach_graph_data *data; + + if (dir == SNDRV_PCM_STREAM_PLAYBACK) + data = &graph->rx_data; + else + data = &graph->tx_data; + + return (int)atomic_read(&data->hw_ptr); +} +EXPORT_SYMBOL_GPL(q6apm_get_hw_pointer); + static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) { struct data_cmd_rsp_rd_sh_mem_ep_data_buffer_done_v2 *rd_done; @@ -520,7 +533,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) done = data->payload; phys = graph->rx_data.buf[token].phys; mutex_unlock(&graph->lock); - + /* token numbering starts at 0 */ + atomic_set(&graph->rx_data.hw_ptr, token + 1); if (lower_32_bits(phys) == done->buf_addr_lsw && upper_32_bits(phys) == done->buf_addr_msw) { graph->result.opcode = hdr->opcode; @@ -553,6 +567,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op) rd_done = data->payload; phys = graph->tx_data.buf[hdr->token].phys; mutex_unlock(&graph->lock); + /* token numbering starts at 0 */ + atomic_set(&graph->tx_data.hw_ptr, hdr->token + 1); if (upper_32_bits(phys) == rd_done->buf_addr_msw && lower_32_bits(phys) == rd_done->buf_addr_lsw) { diff --git a/sound/soc/qcom/qdsp6/q6apm.h b/sound/soc/qcom/qdsp6/q6apm.h index c248c8d2b1ab..7ce08b401e31 100644 --- a/sound/soc/qcom/qdsp6/q6apm.h +++ b/sound/soc/qcom/qdsp6/q6apm.h @@ -2,6 +2,7 @@ #ifndef __Q6APM_H__ #define __Q6APM_H__ #include <linux/types.h> +#include <linux/atomic.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/kernel.h> @@ -77,6 +78,7 @@ struct audioreach_graph_data { uint32_t num_periods; uint32_t dsp_buf; uint32_t mem_map_handle; + atomic_t hw_ptr; }; struct audioreach_graph { @@ -150,4 +152,5 @@ int q6apm_enable_compress_module(struct device *dev, struct q6apm_graph *graph, int q6apm_remove_initial_silence(struct device *dev, struct q6apm_graph *graph, uint32_t samples); int q6apm_remove_trailing_silence(struct device *dev, struct q6apm_graph *graph, uint32_t samples); int q6apm_set_real_module_id(struct device *dev, struct q6apm_graph *graph, uint32_t codec_id); +int q6apm_get_hw_pointer(struct q6apm_graph *graph, int dir); #endif /* __APM_GRAPH_ */ -- 2.39.5

3 months

3
3
0 0

[PATCH] f2fs: fix to do sanity check on ino and xnid

by Chao Yu

syzbot reported a f2fs bug as below: INFO: task syz-executor140:5308 blocked for more than 143 seconds. Not tainted 6.14.0-rc7-syzkaller-00069-g81e4f8d68c66 #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor140 state:D stack:24016 pid:5308 tgid:5308 ppid:5306 task_flags:0x400140 flags:0x00000006 Call Trace: <TASK> context_switch kernel/sched/core.c:5378 [inline] __schedule+0x190e/0x4c90 kernel/sched/core.c:6765 __schedule_loop kernel/sched/core.c:6842 [inline] schedule+0x14b/0x320 kernel/sched/core.c:6857 io_schedule+0x8d/0x110 kernel/sched/core.c:7690 folio_wait_bit_common+0x839/0xee0 mm/filemap.c:1317 __folio_lock mm/filemap.c:1664 [inline] folio_lock include/linux/pagemap.h:1163 [inline] __filemap_get_folio+0x147/0xb40 mm/filemap.c:1917 pagecache_get_page+0x2c/0x130 mm/folio-compat.c:87 find_get_page_flags include/linux/pagemap.h:842 [inline] f2fs_grab_cache_page+0x2b/0x320 fs/f2fs/f2fs.h:2776 __get_node_page+0x131/0x11b0 fs/f2fs/node.c:1463 read_xattr_block+0xfb/0x190 fs/f2fs/xattr.c:306 lookup_all_xattrs fs/f2fs/xattr.c:355 [inline] f2fs_getxattr+0x676/0xf70 fs/f2fs/xattr.c:533 __f2fs_get_acl+0x52/0x870 fs/f2fs/acl.c:179 f2fs_acl_create fs/f2fs/acl.c:375 [inline] f2fs_init_acl+0xd7/0x9b0 fs/f2fs/acl.c:418 f2fs_init_inode_metadata+0xa0f/0x1050 fs/f2fs/dir.c:539 f2fs_add_inline_entry+0x448/0x860 fs/f2fs/inline.c:666 f2fs_add_dentry+0xba/0x1e0 fs/f2fs/dir.c:765 f2fs_do_add_link+0x28c/0x3a0 fs/f2fs/dir.c:808 f2fs_add_link fs/f2fs/f2fs.h:3616 [inline] f2fs_mknod+0x2e8/0x5b0 fs/f2fs/namei.c:766 vfs_mknod+0x36d/0x3b0 fs/namei.c:4191 unix_bind_bsd net/unix/af_unix.c:1286 [inline] unix_bind+0x563/0xe30 net/unix/af_unix.c:1379 __sys_bind_socket net/socket.c:1817 [inline] __sys_bind+0x1e4/0x290 net/socket.c:1848 __do_sys_bind net/socket.c:1853 [inline] __se_sys_bind net/socket.c:1851 [inline] __x64_sys_bind+0x7a/0x90 net/socket.c:1851 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Let's dump and check metadata of corrupted inode, it shows its xattr_nid is the same to its i_ino. dump.f2fs -i 3 chaseyu.img.raw i_xattr_nid [0x 3 : 3] So that, during mknod in the corrupted directory, it tries to get and lock inode page twice, result in deadlock. - f2fs_mknod - f2fs_add_inline_entry - f2fs_get_inode_page --- lock dir's inode page - f2fs_init_acl - f2fs_acl_create(dir,..) - __f2fs_get_acl - f2fs_getxattr - lookup_all_xattrs - __get_node_page --- try to lock dir's inode page In order to fix this, let's add sanity check on ino and xnid. Cc: stable(a)vger.kernel.org Reported-by: syzbot+cc448dcdc7ae0b4e4ffa(a)syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/67e06150.050a0220.21942d.0005.GAE@… Signed-off-by: Chao Yu <chao(a)kernel.org> --- fs/f2fs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 83f862578fc8..5c8634eaef7b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -286,6 +286,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (ino_of_node(node_page) == fi->i_xattr_nid) { + f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + if (f2fs_has_extra_attr(inode)) { if (!f2fs_sb_has_extra_attr(sbi)) { f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", -- 2.48.1

3 months

2
1
0 0

[PATCH v2] RDMA/cma: Fix workqueue crash in cma_netevent_work_handler

by Sharath Srinivasan

struct rdma_cm_id has member "struct work_struct net_work" that is reused for enqueuing cma_netevent_work_handler()s onto cma_wq. Below crash[1] can occur if more than one call to cma_netevent_callback() occurs in quick succession, which further enqueues cma_netevent_work_handler()s for the same rdma_cm_id, overwriting any previously queued work-item(s) that was just scheduled to run i.e. there is no guarantee the queued work item may run between two successive calls to cma_netevent_callback() and the 2nd INIT_WORK would overwrite the 1st work item (for the same rdma_cm_id), despite grabbing id_table_lock during enqueue. Also drgn analysis [2] indicates the work item was likely overwritten. Fix this by moving the INIT_WORK() to __rdma_create_id(), so that it doesn't race with any existing queue_work() or its worker thread. [1] Trimmed crash stack: ============================================= BUG: kernel NULL pointer dereference, address: 0000000000000008 kworker/u256:6 ... 6.12.0-0... Workqueue: cma_netevent_work_handler [rdma_cm] (rdma_cm) RIP: 0010:process_one_work+0xba/0x31a Call Trace: worker_thread+0x266/0x3a0 kthread+0xcf/0x100 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1a/0x30 ============================================= [2] drgn crash analysis: >>> trace = prog.crashed_thread().stack_trace() >>> trace (0) crash_setup_regs (./arch/x86/include/asm/kexec.h:111:15) (1) __crash_kexec (kernel/crash_core.c:122:4) (2) panic (kernel/panic.c:399:3) (3) oops_end (arch/x86/kernel/dumpstack.c:382:3) ... (8) process_one_work (kernel/workqueue.c:3168:2) (9) process_scheduled_works (kernel/workqueue.c:3310:3) (10) worker_thread (kernel/workqueue.c:3391:4) (11) kthread (kernel/kthread.c:389:9) Line workqueue.c:3168 for this kernel version is in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); >>> trace[8]["work"] *(struct work_struct *)0xffff92577d0a21d8 = { .data = (atomic_long_t){ .counter = (s64)536870912, <=== Note }, .entry = (struct list_head){ .next = (struct list_head *)0xffff924d075924c0, .prev = (struct list_head *)0xffff924d075924c0, }, .func = (work_func_t)cma_netevent_work_handler+0x0 = 0xffffffffc2cec280, } Suspicion is that pwq is NULL: >>> trace[8]["pwq"] (struct pool_workqueue *)<absent> In process_one_work(), pwq is assigned from: struct pool_workqueue *pwq = get_work_pwq(work); and get_work_pwq() is: static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) return work_struct_pwq(data); else return NULL; } WORK_STRUCT_PWQ is 0x4: >>> print(repr(prog['WORK_STRUCT_PWQ'])) Object(prog, 'enum work_flags', value=4) But work->data is 536870912 which is 0x20000000. So, get_work_pwq() returns NULL and we crash in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); ============================================= Fixes: 925d046e7e52 ("RDMA/core: Add a netevent notifier to cma") Cc: stable(a)vger.kernel.org Co-developed-by: Håkon Bugge <haakon.bugge(a)oracle.com> Signed-off-by: Håkon Bugge <haakon.bugge(a)oracle.com> Signed-off-by: Sharath Srinivasan <sharath.srinivasan(a)oracle.com> --- v1->v2 cc:stable@vger.kernel.org --- drivers/infiniband/core/cma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 91db10515d74..176d0b3e4488 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,6 +72,8 @@ static const char * const cma_events[] = { static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, enum ib_gid_type gid_type); +static void cma_netevent_work_handler(struct work_struct *_work); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -1033,6 +1035,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler); rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); if (parent) @@ -5227,7 +5230,6 @@ static int cma_netevent_callback(struct notifier_block *self, if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, neigh->ha, ETH_ALEN)) continue; - INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler); cma_id_get(current_id); queue_work(cma_wq, &current_id->id.net_work); } -- 2.39.5 (Apple Git-154)

3 months

3
4
0 0

[PATCH 0/5] Fix onboard USB hub instability on RK3399 Puma SoM

by Lukasz Czechowski

The RK3399 Puma SoM contains the internal Cypress CYUSB3304 USB hub, that shows instability due to improper reset pin configuration. Currently reset pin is modeled as a vcc5v0_host regulator, that might result in too short reset pulse duration. Starting with the v6.6, the Onboard USB hub driver (later renamed to Onboard USB dev) contains support for Cypress HX3 hub family. It can be now used to correctly model the RK3399 Puma SoM hardware. The first commits in this series fix the onboard USB dev driver to support all HX3 hub variants, including the CYUSB3304 found in the RK3399 Puma SoM. This allows to introduce fix for internal USB hub instability on RK3399 Puma, by replacing the vcc5v0_host regulator with cy3304_reset, used inside the hub node. Please be aware that the patch that fixes USB hub instability in arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi can me merged only after updating the Onboard USB dev driver, otherwise the hub will not work. Two last commits in the series disable unrouted USB controllers and PHYs on RK3399 Puma SOM and Haikou carrier board, with no intended functional changes. Signed-off-by: Lukasz Czechowski <lukasz.czechowski(a)thaumatec.com> --- Lukasz Czechowski (3): usb: misc: onboard_usb_dev: fix support for Cypress HX3 hubs dt-bindings: usb: cypress,hx3: Add support for all variants arm64: dts: rockchip: fix internal USB hub instability on RK3399 Puma Quentin Schulz (2): arm64: dts: rockchip: disable unrouted USB controllers and PHY on RK3399 Puma arm64: dts: rockchip: disable unrouted USB controllers and PHY on RK3399 Puma with Haikou .../devicetree/bindings/usb/cypress,hx3.yaml | 6 +++ .../arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 8 ---- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 43 ++++++++++------------ drivers/usb/misc/onboard_usb_dev.c | 10 ++++- drivers/usb/misc/onboard_usb_dev.h | 6 +++ 5 files changed, 39 insertions(+), 34 deletions(-) --- base-commit: 1e26c5e28ca5821a824e90dd359556f5e9e7b89f change-id: 20250326-onboard_usb_dev-a7c063a8a515 Best regards, -- Lukasz Czechowski <lukasz.czechowski(a)thaumatec.com>

3 months

4
7
0 0

[PATCH v2] drm/i915/xe2hpd: Identify the memory type for SKUs with GDDR + ECC

by Lucas De Marchi

From: Vivek Kasireddy <vivek.kasireddy(a)intel.com> Some SKUs of Xe2_HPD platforms (such as BMG) have GDDR memory type with ECC enabled. We need to identify this scenario and add a new case in xelpdp_get_dram_info() to handle it. In addition, the derating value needs to be adjusted accordingly to compensate for the limited bandwidth. Bspec: 64602 Cc: Matt Roper <matthew.d.roper(a)intel.com> Fixes: 3adcf970dc7e ("drm/xe/bmg: Drop force_probe requirement") Cc: stable(a)vger.kernel.org Signed-off-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com> Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com> --- Changes in v2: - Add a separate sa_info for the ecc case (Lucas) - Link to v1: https://lore.kernel.org/r/20250214215944.187407-1-vivek.kasireddy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 12 ++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/soc/intel_dram.c | 4 ++++ drivers/gpu/drm/xe/xe_device_types.h | 1 + 4 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index dc7612658a9da..bb81efec08a01 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -250,6 +250,7 @@ static int icl_get_qgv_points(struct intel_display *display, qi->deinterleave = 4; break; case INTEL_DRAM_GDDR: + case INTEL_DRAM_GDDR_ECC: qi->channel_width = 32; break; default: @@ -404,6 +405,12 @@ static const struct intel_sa_info xe2_hpd_sa_info = { /* Other values not used by simplified algorithm */ }; +static const struct intel_sa_info xe2_hpd_ecc_sa_info = { + .derating = 45, + .deprogbwlimit = 53, + /* Other values not used by simplified algorithm */ +}; + static const struct intel_sa_info xe3lpd_sa_info = { .deburst = 32, .deprogbwlimit = 65, /* GB/s */ @@ -756,11 +763,16 @@ static unsigned int icl_qgv_bw(struct intel_display *display, void intel_bw_init_hw(struct intel_display *display) { + const struct dram_info *dram_info = &to_i915(display->drm)->dram_info; + if (!HAS_DISPLAY(display)) return; if (DISPLAY_VER(display) >= 30) tgl_get_bw_info(display, &xe3lpd_sa_info); + else if (DISPLAY_VERx100(display) >= 1401 && display->platform.dgfx && + dram_info->type == INTEL_DRAM_GDDR_ECC) + xe2_hpd_get_bw_info(display, &xe2_hpd_ecc_sa_info); else if (DISPLAY_VERx100(display) >= 1401 && display->platform.dgfx) xe2_hpd_get_bw_info(display, &xe2_hpd_sa_info); else if (DISPLAY_VER(display) >= 14) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ffc346379cc2c..54538b6f85df5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -305,6 +305,7 @@ struct drm_i915_private { INTEL_DRAM_DDR5, INTEL_DRAM_LPDDR5, INTEL_DRAM_GDDR, + INTEL_DRAM_GDDR_ECC, } type; u8 num_qgv_points; u8 num_psf_gv_points; diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index 9e310f4099f42..f60eedb0e92cf 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -687,6 +687,10 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) drm_WARN_ON(&i915->drm, !IS_DGFX(i915)); dram_info->type = INTEL_DRAM_GDDR; break; + case 9: + drm_WARN_ON(&i915->drm, !IS_DGFX(i915)); + dram_info->type = INTEL_DRAM_GDDR_ECC; + break; default: MISSING_CASE(val); return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 1334174388afe..20239d6a2e985 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -587,6 +587,7 @@ struct xe_device { INTEL_DRAM_DDR5, INTEL_DRAM_LPDDR5, INTEL_DRAM_GDDR, + INTEL_DRAM_GDDR_ECC, } type; u8 num_qgv_points; u8 num_psf_gv_points; --- base-commit: 74f632d1bd3b90ed79883361ca25f1225c0aee58 change-id: 20250321-tip-23d2af2e3291 Best regards, -- Lucas De Marchi <lucas.demarchi(a)intel.com>

3 months

3
5
0 0

[PATCH 2/3] can: kvaser_pciefd: Fix echo_skb race conditions

by Axel Forsman

The functions kvaser_pciefd_start_xmit() and kvaser_pciefd_handle_ack_packet() raced to stop/wake TX queues and get/put echo skbs, as kvaser_pciefd_can->echo_lock was only ever taken when transmitting. E.g., this caused the following error: can_put_echo_skb: BUG! echo_skb 5 is occupied! Instead, use the synchronization helpers in netdev_queues.h. As those piggyback on BQL barriers, start updating in-flight packets and bytes counts as well. Cc: stable(a)vger.kernel.org Signed-off-by: Axel Forsman <axfo(a)kvaser.com> Tested-by: Jimmy Assarsson <extja(a)kvaser.com> Reviewed-by: Jimmy Assarsson <extja(a)kvaser.com> --- drivers/net/can/kvaser_pciefd.c | 70 ++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 0d1b895509c3..6251a1ddfa7e 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -16,6 +16,7 @@ #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/timer.h> +#include <net/netdev_queues.h> MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Kvaser AB <support(a)kvaser.com>"); @@ -412,8 +413,9 @@ struct kvaser_pciefd_can { u8 cmd_seq; int err_rep_cnt; int echo_idx; + unsigned int completed_tx_pkts; + unsigned int completed_tx_bytes; spinlock_t lock; /* Locks sensitive registers (e.g. MODE) */ - spinlock_t echo_lock; /* Locks the message echo buffer */ struct timer_list bec_poll_timer; struct completion start_comp, flush_comp; }; @@ -745,11 +747,24 @@ static int kvaser_pciefd_stop(struct net_device *netdev) del_timer(&can->bec_poll_timer); } can->can.state = CAN_STATE_STOPPED; + netdev_reset_queue(netdev); close_candev(netdev); return ret; } +static unsigned int kvaser_pciefd_tx_avail(struct kvaser_pciefd_can *can) +{ + u8 count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, + ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + + if (count < can->can.echo_skb_max) /* Free TX FIFO slot? */ + /* Avoid reusing unacked seqno */ + return !can->can.echo_skb[can->echo_idx]; + else + return 0; +} + static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, struct kvaser_pciefd_can *can, struct sk_buff *skb) @@ -797,23 +812,31 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct kvaser_pciefd_can *can = netdev_priv(netdev); - unsigned long irq_flags; struct kvaser_pciefd_tx_packet packet; + unsigned int frame_len = 0; int nr_words; - u8 count; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; + /* + * Without room for a new message, stop the queue until at least + * one successful transmit. + */ + if (!netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1)) + return NETDEV_TX_BUSY; + nr_words = kvaser_pciefd_prepare_tx_packet(&packet, can, skb); - spin_lock_irqsave(&can->echo_lock, irq_flags); /* Prepare and save echo skb in internal slot */ - can_put_echo_skb(skb, netdev, can->echo_idx, 0); + frame_len = can_skb_get_frame_len(skb); + can_put_echo_skb(skb, netdev, can->echo_idx, frame_len); /* Move echo index to the next slot */ can->echo_idx = (can->echo_idx + 1) % can->can.echo_skb_max; + netdev_sent_queue(netdev, frame_len); + /* Write header to fifo */ iowrite32(packet.header[0], can->reg_base + KVASER_PCIEFD_KCAN_FIFO_REG); @@ -836,15 +859,6 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, KVASER_PCIEFD_KCAN_FIFO_LAST_REG); } - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); - /* No room for a new message, stop the queue until at least one - * successful transmit - */ - if (count >= can->can.echo_skb_max || can->can.echo_skb[can->echo_idx]) - netif_stop_queue(netdev); - spin_unlock_irqrestore(&can->echo_lock, irq_flags); - return NETDEV_TX_OK; } @@ -970,6 +984,8 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) can->kv_pcie = pcie; can->cmd_seq = 0; can->err_rep_cnt = 0; + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; can->bec.txerr = 0; can->bec.rxerr = 0; @@ -987,7 +1003,6 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) can->can.clock.freq = pcie->freq; can->can.echo_skb_max = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); can->echo_idx = 0; - spin_lock_init(&can->echo_lock); spin_lock_init(&can->lock); can->can.bittiming_const = &kvaser_pciefd_bittiming_const; @@ -1510,19 +1525,16 @@ static int kvaser_pciefd_handle_ack_packet(struct kvaser_pciefd *pcie, netdev_dbg(can->can.dev, "Packet was flushed\n"); } else { int echo_idx = FIELD_GET(KVASER_PCIEFD_PACKET_SEQ_MASK, p->header[0]); - int len; - u8 count; + unsigned int len, frame_len = 0; struct sk_buff *skb; skb = can->can.echo_skb[echo_idx]; if (skb) kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - len = can_get_echo_skb(can->can.dev, echo_idx, NULL); - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + len = can_get_echo_skb(can->can.dev, echo_idx, &frame_len); - if (count < can->can.echo_skb_max && netif_queue_stopped(can->can.dev)) - netif_wake_queue(can->can.dev); + can->completed_tx_pkts++; + can->completed_tx_bytes += frame_len; if (!one_shot_fail) { can->can.dev->stats.tx_bytes += len; @@ -1638,11 +1650,25 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) { int pos = 0; int res = 0; + unsigned int i; do { res = kvaser_pciefd_read_packet(pcie, &pos, dma_buf); } while (!res && pos > 0 && pos < KVASER_PCIEFD_DMA_SIZE); + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; + + if (!can->completed_tx_pkts) + continue; + netif_subqueue_completed_wake(can->can.dev, 0, + can->completed_tx_pkts, + can->completed_tx_bytes, + kvaser_pciefd_tx_avail(can), 1); + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; + } + return res; } -- 2.47.2

3 months

2
2
0 0

[PATCH v3] sched/rt: Fix race in push_rt_task

by Harshit Agarwal

Overview ======== When a CPU chooses to call push_rt_task and picks a task to push to another CPU's runqueue then it will call find_lock_lowest_rq method which would take a double lock on both CPUs' runqueues. If one of the locks aren't readily available, it may lead to dropping the current runqueue lock and reacquiring both the locks at once. During this window it is possible that the task is already migrated and is running on some other CPU. These cases are already handled. However, if the task is migrated and has already been executed and another CPU is now trying to wake it up (ttwu) such that it is queued again on the runqeue (on_rq is 1) and also if the task was run by the same CPU, then the current checks will pass even though the task was migrated out and is no longer in the pushable tasks list. Crashes ======= This bug resulted in quite a few flavors of crashes triggering kernel panics with various crash signatures such as assert failures, page faults, null pointer dereferences, and queue corruption errors all coming from scheduler itself. Some of the crashes: -> kernel BUG at kernel/sched/rt.c:1616! BUG_ON(idx >= MAX_RT_PRIO) Call Trace: ? __die_body+0x1a/0x60 ? die+0x2a/0x50 ? do_trap+0x85/0x100 ? pick_next_task_rt+0x6e/0x1d0 ? do_error_trap+0x64/0xa0 ? pick_next_task_rt+0x6e/0x1d0 ? exc_invalid_op+0x4c/0x60 ? pick_next_task_rt+0x6e/0x1d0 ? asm_exc_invalid_op+0x12/0x20 ? pick_next_task_rt+0x6e/0x1d0 __schedule+0x5cb/0x790 ? update_ts_time_stats+0x55/0x70 schedule_idle+0x1e/0x40 do_idle+0x15e/0x200 cpu_startup_entry+0x19/0x20 start_secondary+0x117/0x160 secondary_startup_64_no_verify+0xb0/0xbb -> BUG: kernel NULL pointer dereference, address: 00000000000000c0 Call Trace: ? __die_body+0x1a/0x60 ? no_context+0x183/0x350 ? __warn+0x8a/0xe0 ? exc_page_fault+0x3d6/0x520 ? asm_exc_page_fault+0x1e/0x30 ? pick_next_task_rt+0xb5/0x1d0 ? pick_next_task_rt+0x8c/0x1d0 __schedule+0x583/0x7e0 ? update_ts_time_stats+0x55/0x70 schedule_idle+0x1e/0x40 do_idle+0x15e/0x200 cpu_startup_entry+0x19/0x20 start_secondary+0x117/0x160 secondary_startup_64_no_verify+0xb0/0xbb -> BUG: unable to handle page fault for address: ffff9464daea5900 kernel BUG at kernel/sched/rt.c:1861! BUG_ON(rq->cpu != task_cpu(p)) -> kernel BUG at kernel/sched/rt.c:1055! BUG_ON(!rq->nr_running) Call Trace: ? __die_body+0x1a/0x60 ? die+0x2a/0x50 ? do_trap+0x85/0x100 ? dequeue_top_rt_rq+0xa2/0xb0 ? do_error_trap+0x64/0xa0 ? dequeue_top_rt_rq+0xa2/0xb0 ? exc_invalid_op+0x4c/0x60 ? dequeue_top_rt_rq+0xa2/0xb0 ? asm_exc_invalid_op+0x12/0x20 ? dequeue_top_rt_rq+0xa2/0xb0 dequeue_rt_entity+0x1f/0x70 dequeue_task_rt+0x2d/0x70 __schedule+0x1a8/0x7e0 ? blk_finish_plug+0x25/0x40 schedule+0x3c/0xb0 futex_wait_queue_me+0xb6/0x120 futex_wait+0xd9/0x240 do_futex+0x344/0xa90 ? get_mm_exe_file+0x30/0x60 ? audit_exe_compare+0x58/0x70 ? audit_filter_rules.constprop.26+0x65e/0x1220 __x64_sys_futex+0x148/0x1f0 do_syscall_64+0x30/0x80 entry_SYSCALL_64_after_hwframe+0x62/0xc7 -> BUG: unable to handle page fault for address: ffff8cf3608bc2c0 Call Trace: ? __die_body+0x1a/0x60 ? no_context+0x183/0x350 ? spurious_kernel_fault+0x171/0x1c0 ? exc_page_fault+0x3b6/0x520 ? plist_check_list+0x15/0x40 ? plist_check_list+0x2e/0x40 ? asm_exc_page_fault+0x1e/0x30 ? _cond_resched+0x15/0x30 ? futex_wait_queue_me+0xc8/0x120 ? futex_wait+0xd9/0x240 ? try_to_wake_up+0x1b8/0x490 ? futex_wake+0x78/0x160 ? do_futex+0xcd/0xa90 ? plist_check_list+0x15/0x40 ? plist_check_list+0x2e/0x40 ? plist_del+0x6a/0xd0 ? plist_check_list+0x15/0x40 ? plist_check_list+0x2e/0x40 ? dequeue_pushable_task+0x20/0x70 ? __schedule+0x382/0x7e0 ? asm_sysvec_reschedule_ipi+0xa/0x20 ? schedule+0x3c/0xb0 ? exit_to_user_mode_prepare+0x9e/0x150 ? irqentry_exit_to_user_mode+0x5/0x30 ? asm_sysvec_reschedule_ipi+0x12/0x20 Above are some of the common examples of the crashes that were observed due to this issue. Details ======= Let's look at the following scenario to understand this race. 1) CPU A enters push_rt_task a) CPU A has chosen next_task = task p. b) CPU A calls find_lock_lowest_rq(Task p, CPU Z’s rq). c) CPU A identifies CPU X as a destination CPU (X < Z). d) CPU A enters double_lock_balance(CPU Z’s rq, CPU X’s rq). e) Since X is lower than Z, CPU A unlocks CPU Z’s rq. Someone else has locked CPU X’s rq, and thus, CPU A must wait. 2) At CPU Z a) Previous task has completed execution and thus, CPU Z enters schedule, locks its own rq after CPU A releases it. b) CPU Z dequeues previous task and begins executing task p. c) CPU Z unlocks its rq. d) Task p yields the CPU (ex. by doing IO or waiting to acquire a lock) which triggers the schedule function on CPU Z. e) CPU Z enters schedule again, locks its own rq, and dequeues task p. f) As part of dequeue, it sets p.on_rq = 0 and unlocks its rq. 3) At CPU B a) CPU B enters try_to_wake_up with input task p. b) Since CPU Z dequeued task p, p.on_rq = 0, and CPU B updates B.state = WAKING. c) CPU B via select_task_rq determines CPU Y as the target CPU. 4) The race a) CPU A acquires CPU X’s lock and relocks CPU Z. b) CPU A reads task p.cpu = Z and incorrectly concludes task p is still on CPU Z. c) CPU A failed to notice task p had been dequeued from CPU Z while CPU A was waiting for locks in double_lock_balance. If CPU A knew that task p had been dequeued, it would return NULL forcing push_rt_task to give up the task p's migration. d) CPU B updates task p.cpu = Y and calls ttwu_queue. e) CPU B locks Ys rq. CPU B enqueues task p onto Y and sets task p.on_rq = 1. f) CPU B unlocks CPU Y, triggering memory synchronization. g) CPU A reads task p.on_rq = 1, cementing its assumption that task p has not migrated. h) CPU A decides to migrate p to CPU X. This leads to A dequeuing p from Y's queue and various crashes down the line. Solution ======== The solution here is fairly simple. After obtaining the lock (at 4a), the check is enhanced to make sure that the task is still at the head of the pushable tasks list. If not, then it is anyway not suitable for being pushed out. Testing ======= The fix is tested on a cluster of 3 nodes, where the panics due to this are hit every couple of days. A fix similar to this was deployed on such cluster and was stable for more than 30 days. Co-developed-by: Jon Kohler <jon(a)nutanix.com> Signed-off-by: Jon Kohler <jon(a)nutanix.com> Co-developed-by: Gauri Patwardhan <gauri.patwardhan(a)nutanix.com> Signed-off-by: Gauri Patwardhan <gauri.patwardhan(a)nutanix.com> Co-developed-by: Rahul Chunduru <rahul.chunduru(a)nutanix.com> Signed-off-by: Rahul Chunduru <rahul.chunduru(a)nutanix.com> Signed-off-by: Harshit Agarwal <harshit(a)nutanix.com> Tested-by: Will Ton <william.ton(a)nutanix.com> Reviewed-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> Cc: stable(a)vger.kernel.org --- Changes in v2: - As per Steve's suggestion, removed some checks that are done after obtaining the lock that are no longer needed with the addition of new check. - Moved up is_migration_disabled check. - Link to v1: https://lore.kernel.org/lkml/20250211054646.23987-1-harshit@nutanix.com/ Changes in v3: - Updated commit message to add stable maintainers and reviewed-by tag. - Link to v2: https://lore.kernel.org/lkml/20250214170844.201692-1-harshit@nutanix.com/ --- kernel/sched/rt.c | 54 +++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4b8e33c615b1..4762dd3f50c5 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1885,6 +1885,27 @@ static int find_lowest_rq(struct task_struct *task) return -1; } +static struct task_struct *pick_next_pushable_task(struct rq *rq) +{ + struct task_struct *p; + + if (!has_pushable_tasks(rq)) + return NULL; + + p = plist_first_entry(&rq->rt.pushable_tasks, + struct task_struct, pushable_tasks); + + BUG_ON(rq->cpu != task_cpu(p)); + BUG_ON(task_current(rq, p)); + BUG_ON(task_current_donor(rq, p)); + BUG_ON(p->nr_cpus_allowed <= 1); + + BUG_ON(!task_on_rq_queued(p)); + BUG_ON(!rt_task(p)); + + return p; +} + /* Will lock the rq it finds */ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) { @@ -1915,18 +1936,16 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) /* * We had to unlock the run queue. In * the mean time, task could have - * migrated already or had its affinity changed. - * Also make sure that it wasn't scheduled on its rq. + * migrated already or had its affinity changed, + * therefore check if the task is still at the + * head of the pushable tasks list. * It is possible the task was scheduled, set * "migrate_disabled" and then got preempted, so we must * check the task migration disable flag here too. */ - if (unlikely(task_rq(task) != rq || + if (unlikely(is_migration_disabled(task) || !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || - task_on_cpu(rq, task) || - !rt_task(task) || - is_migration_disabled(task) || - !task_on_rq_queued(task))) { + task != pick_next_pushable_task(rq))) { double_unlock_balance(rq, lowest_rq); lowest_rq = NULL; @@ -1946,27 +1965,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) return lowest_rq; } -static struct task_struct *pick_next_pushable_task(struct rq *rq) -{ - struct task_struct *p; - - if (!has_pushable_tasks(rq)) - return NULL; - - p = plist_first_entry(&rq->rt.pushable_tasks, - struct task_struct, pushable_tasks); - - BUG_ON(rq->cpu != task_cpu(p)); - BUG_ON(task_current(rq, p)); - BUG_ON(task_current_donor(rq, p)); - BUG_ON(p->nr_cpus_allowed <= 1); - - BUG_ON(!task_on_rq_queued(p)); - BUG_ON(!rt_task(p)); - - return p; -} - /* * If the current CPU has more than one RT task, see if the non * running task can migrate over to a CPU that is running a task -- 2.22.3

3 months

6
11
0 0

[PATCH v2] sched/deadline: Fix race in push_dl_task

by Harshit Agarwal

This fix is the deadline version of the change made to the rt scheduler titled: "sched/rt: Fix race in push_rt_task". Here is the summary of the issue: When a CPU chooses to call push_dl_task and picks a task to push to another CPU's runqueue then it will call find_lock_later_rq method which would take a double lock on both CPUs' runqueues. If one of the locks aren't readily available, it may lead to dropping the current runqueue lock and reacquiring both the locks at once. During this window it is possible that the task is already migrated and is running on some other CPU. These cases are already handled. However, if the task is migrated and has already been executed and another CPU is now trying to wake it up (ttwu) such that it is queued again on the runqeue (on_rq is 1) and also if the task was run by the same CPU, then the current checks will pass even though the task was migrated out and is no longer in the pushable tasks list. Please go through the original change for more details on the issue. In this fix, after the lock is obtained inside the find_lock_later_rq we ensure that the task is still at the head of pushable tasks list. Also removed some checks that are no longer needed with the addition this new check. However, the check of pushable tasks list only applies when find_lock_later_rq is called by push_dl_task. For the other caller i.e. dl_task_offline_migration, we use the existing checks. Signed-off-by: Harshit Agarwal <harshit(a)nutanix.com> Cc: stable(a)vger.kernel.org --- Changes in v2: - As per Juri's suggestion, moved the check inside find_lock_later_rq similar to rt change. Here we distinguish among the push_dl_task caller vs dl_task_offline_migration by checking if the task is throttled or not. - Fixed the commit message to refer to the rt change by title. - Link to v1: https://lore.kernel.org/lkml/20250307204255.60640-1-harshit@nutanix.com/ --- kernel/sched/deadline.c | 66 ++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 38e4537790af..2366801b4557 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2621,6 +2621,25 @@ static int find_later_rq(struct task_struct *task) return -1; } +static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) +{ + struct task_struct *p; + + if (!has_pushable_dl_tasks(rq)) + return NULL; + + p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root)); + + WARN_ON_ONCE(rq->cpu != task_cpu(p)); + WARN_ON_ONCE(task_current(rq, p)); + WARN_ON_ONCE(p->nr_cpus_allowed <= 1); + + WARN_ON_ONCE(!task_on_rq_queued(p)); + WARN_ON_ONCE(!dl_task(p)); + + return p; +} + /* Locks the rq it finds */ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) { @@ -2648,12 +2667,30 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) /* Retry if something changed. */ if (double_lock_balance(rq, later_rq)) { - if (unlikely(task_rq(task) != rq || + /* + * We had to unlock the run queue. In the meantime, + * task could have migrated already or had its affinity + * changed. + * It is possible the task was scheduled, set + * "migrate_disabled" and then got preempted, so we must + * check the task migration disable flag here too. + * For throttled task (dl_task_offline_migration), we + * check if the task is migrated to a different rq or + * is not a dl task anymore. + * For the non-throttled task (push_dl_task), the check + * to ensure that this task is still at the head of the + * pushable tasks list is enough. + */ + if (unlikely(is_migration_disabled(task) || !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) || - task_on_cpu(rq, task) || - !dl_task(task) || - is_migration_disabled(task) || - !task_on_rq_queued(task))) { + (task->dl.dl_throttled && + (task_rq(task) != rq || + task_on_cpu(rq, task) || + !dl_task(task) || + !task_on_rq_queued(task))) || + (!task->dl.dl_throttled && + task != pick_next_pushable_dl_task(rq)))) { + double_unlock_balance(rq, later_rq); later_rq = NULL; break; @@ -2676,25 +2713,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) return later_rq; } -static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) -{ - struct task_struct *p; - - if (!has_pushable_dl_tasks(rq)) - return NULL; - - p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root)); - - WARN_ON_ONCE(rq->cpu != task_cpu(p)); - WARN_ON_ONCE(task_current(rq, p)); - WARN_ON_ONCE(p->nr_cpus_allowed <= 1); - - WARN_ON_ONCE(!task_on_rq_queued(p)); - WARN_ON_ONCE(!dl_task(p)); - - return p; -} - /* * See if the non running -deadline tasks on this rq * can be sent to some other CPU where they can preempt -- 2.39.3

3 months

2
2
0 0

[PATCH 1/3] can: kvaser_pciefd: Force IRQ edge in case of nested IRQ

by Axel Forsman

Avoid the driver missing IRQs by temporarily masking IRQs in the ISR to enforce an edge even if a different IRQ is signalled before handled IRQs are cleared. Fixes: 48f827d4f48f ("can: kvaser_pciefd: Move reset of DMA RX buffers to the end of the ISR") Cc: stable(a)vger.kernel.org Signed-off-by: Axel Forsman <axfo(a)kvaser.com> Tested-by: Jimmy Assarsson <extja(a)kvaser.com> Reviewed-by: Jimmy Assarsson <extja(a)kvaser.com> --- drivers/net/can/kvaser_pciefd.c | 83 ++++++++++++++++----------------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index fa04a7ced02b..0d1b895509c3 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1646,24 +1646,28 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) return res; } -static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) +static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) { + __le32 __iomem *srb_cmd_reg = KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG; u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) - kvaser_pciefd_read_buffer(pcie, 0); + iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { + kvaser_pciefd_read_buffer(pcie, 0); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, srb_cmd_reg); /* Rearm buffer */ + } + + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { kvaser_pciefd_read_buffer(pcie, 1); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, srb_cmd_reg); /* Rearm buffer */ + } if (unlikely(irq & KVASER_PCIEFD_SRB_IRQ_DOF0 || irq & KVASER_PCIEFD_SRB_IRQ_DOF1 || irq & KVASER_PCIEFD_SRB_IRQ_DUF0 || irq & KVASER_PCIEFD_SRB_IRQ_DUF1)) dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq); - - iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - return irq; } static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can) @@ -1691,29 +1695,22 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev; const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); - u32 srb_irq = 0; - u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) return IRQ_NONE; + iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + if (pci_irq & irq_mask->kcan_rx0) - srb_irq = kvaser_pciefd_receive_irq(pcie); + kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { if (pci_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; - - if (srb_release) - iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); return IRQ_HANDLED; } @@ -1733,13 +1730,22 @@ static void kvaser_pciefd_teardown_can_ctrls(struct kvaser_pciefd *pcie) } } +static void kvaser_pciefd_disable_irq_srcs(struct kvaser_pciefd *pcie) +{ + unsigned int i; + + /* Masking PCI_IRQ is insufficient as running ISR will unmask it */ + iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); + for (i = 0; i < pcie->nr_channels; ++i) + iowrite32(0, pcie->can[i]->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); +} + static int kvaser_pciefd_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int ret; struct kvaser_pciefd *pcie; const struct kvaser_pciefd_irq_mask *irq_mask; - void __iomem *irq_en_base; pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -1805,8 +1811,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); /* Enable PCI interrupts */ - irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie); - iowrite32(irq_mask->all, irq_en_base); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); /* Ready the DMA buffers */ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); @@ -1820,8 +1825,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return 0; err_free_irq: - /* Disable PCI interrupts */ - iowrite32(0, irq_en_base); + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); err_pci_free_irq_vectors: @@ -1844,35 +1848,26 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return ret; } -static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie) -{ - int i; - - for (i = 0; i < pcie->nr_channels; i++) { - struct kvaser_pciefd_can *can = pcie->can[i]; - - if (can) { - iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); - unregister_candev(can->can.dev); - del_timer(&can->bec_poll_timer); - kvaser_pciefd_pwm_stop(can); - free_candev(can->can.dev); - } - } -} - static void kvaser_pciefd_remove(struct pci_dev *pdev) { struct kvaser_pciefd *pcie = pci_get_drvdata(pdev); + unsigned int i; - kvaser_pciefd_remove_all_ctrls(pcie); + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; - /* Disable interrupts */ - iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG); - iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + unregister_candev(can->can.dev); + del_timer(&can->bec_poll_timer); + kvaser_pciefd_pwm_stop(can); + } + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); pci_free_irq_vectors(pcie->pci); + + for (i = 0; i < pcie->nr_channels; ++i) + free_candev(pcie->can[i]->can.dev); + pci_iounmap(pdev, pcie->reg_base); pci_release_regions(pdev); pci_disable_device(pdev); -- 2.47.2

3 months

2
2
0 0

[PATCH] firmware: arm_scmi: Fix timeout checks on polling path

by Cristian Marussi

Polling mode transactions wait for a reply busy-looping without holding a spinlock, but currently the timeout checks are based only on elapsed time: as a result we could hit a false positive whenever our busy-looping thread is pre-empted and scheduled out for a time greater than the polling timeout. Change the checks at the end of the busy-loop to make sure that the polling wasn't indeed successful or an out-of-order reply caused the polling to be forcibly terminated. Fixes: 31d2f803c19c ("firmware: arm_scmi: Add sync_cmds_completed_on_ret transport flag") Reported-by: Huangjie <huangjie1663(a)phytium.com.cn> Closes: https://lore.kernel.org/arm-scmi/20250123083323.2363749-1-jackhuang021@gmai… Signed-off-by: Cristian Marussi <cristian.marussi(a)arm.com> Cc: <stable(a)vger.kernel.org> # 5.18.x --- This fix got to be backported to 5.4/5.10./5.15 due to small changes in the context --- drivers/firmware/arm_scmi/driver.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 60050da54bf2..e6cf83950875 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1248,7 +1248,8 @@ static void xfer_put(const struct scmi_protocol_handle *ph, } static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, - struct scmi_xfer *xfer, ktime_t stop) + struct scmi_xfer *xfer, ktime_t stop, + bool *ooo) { struct scmi_info *info = handle_to_scmi_info(cinfo->handle); @@ -1257,7 +1258,7 @@ static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, * in case of out-of-order receptions of delayed responses */ return info->desc->ops->poll_done(cinfo, xfer) || - try_wait_for_completion(&xfer->done) || + (*ooo = try_wait_for_completion(&xfer->done)) || ktime_after(ktime_get(), stop); } @@ -1274,15 +1275,17 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, * itself to support synchronous commands replies. */ if (!desc->sync_cmds_completed_on_ret) { + bool ooo = false; + /* * Poll on xfer using transport provided .poll_done(); * assumes no completion interrupt was available. */ ktime_t stop = ktime_add_ms(ktime_get(), timeout_ms); - spin_until_cond(scmi_xfer_done_no_timeout(cinfo, - xfer, stop)); - if (ktime_after(ktime_get(), stop)) { + spin_until_cond(scmi_xfer_done_no_timeout(cinfo, xfer, + stop, &ooo)); + if (!ooo && !info->desc->ops->poll_done(cinfo, xfer)) { dev_err(dev, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); -- 2.47.0

3 months

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror March 2025