April 2024 - Linux-stable-mirror

[PATCH v2] ACPI: CPPC: Fix access width used for PCC registers

by Vanshidhar Konda

commit 2f4a4d63a193be6fd530d180bb13c3592052904c modified cpc_read/cpc_write to use access_width to read CPC registers. For PCC registers the access width field in the ACPI register macro specifies the PCC subspace id. For non-zero PCC subspace id the access width is incorrectly treated as access width. This causes errors when reading from PCC registers in the CPPC driver. For PCC registers base the size of read/write on the bit width field. The debug message in cpc_read/cpc_write is updated to print relevant information for the address space type used to read the register. Signed-off-by: Vanshidhar Konda <vanshikonda(a)os.amperecomputing.com> Tested-by: Jarred White <jarredwhite(a)linux.microsoft.com> Reviewed-by: Jarred White <jarredwhite(a)linux.microsoft.com> Cc: 5.15+ <stable(a)vger.kernel.org> # 5.15+ --- When testing v6.9-rc1 kernel on AmpereOne system dmesg showed that cpufreq policy had failed to initialize on some cores during boot because cpufreq->get() always returned 0. On this system CPPC registers are in PCC subspace index 2 that are 32 bits wide. With this patch the CPPC driver interpreted the access width field as 16 bits, causing the register read to roll over too quickly to provide valid values during frequency computation. v2: - Use size variable in debug print message - Use size instead of reg->bit_width for acpi_os_read_memory and acpi_os_write_memory drivers/acpi/cppc_acpi.c | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 4bfbe55553f4..a037e9d15f48 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1002,14 +1002,14 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; + size = GET_BIT_WIDTH(reg); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); u32 val_u32; acpi_status status; status = acpi_os_read_port((acpi_io_address)reg->address, - &val_u32, width); + &val_u32, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); @@ -1018,17 +1018,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = val_u32; return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_read_ffh(cpu, reg, val); else return acpi_os_read_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); switch (size) { case 8: @@ -1044,8 +1049,13 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = readq_relaxed(vaddr); break; default: - pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot read %u width from for system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot read %u bit width to PCC for ss: %d\n", + size, pcc_ss_id); + } return -EFAULT; } @@ -1063,12 +1073,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = &reg_res->cpc_entry.reg; + size = GET_BIT_WIDTH(reg); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); acpi_status status; status = acpi_os_write_port((acpi_io_address)reg->address, - (u32)val, width); + (u32)val, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to write SystemIO port %llx\n", reg->address); @@ -1076,17 +1087,22 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) } return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_write_ffh(cpu, reg, val); else return acpi_os_write_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) val = MASK_VAL(reg, val); @@ -1105,8 +1121,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) writeq_relaxed(val, vaddr); break; default: - pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot write %u width from for system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", + size, pcc_ss_id); + } ret_val = -EFAULT; break; } -- 2.43.1

1 year, 8 months

2
1
0 0

[PATCH RESEND 0/2] usb: typec: tipd: fix event checking in interrupt service routines

by Javier Carrasco

The ISRs of the tps25750 and tps6598x do not handle generated events properly under all circumstances. The tps6598x ISR does not read all bits of the INT_EVENTX registers, leaving events signaled with bits above 64 unattended. Moreover, these events are not cleared, leaving the interrupt enabled. The tps25750 reads all bits of the INT_EVENT1 register, but the event checking is not right because the same event is checked in two different regions of the same register by means of an OR operation. This series aims to fix both issues by reading all bits of the INT_EVENTX registers, and limiting the event checking to the region where the supported events are defined (currently they are limited to the first 64 bits of the registers, as the are defined as BIT_ULL()). If the need for events above the first 64 bits of the INT_EVENTX registers arises, a different mechanism might be required. But for the current needs, all definitions can be left as they are. Note: resend to add the Cc tag for 'stable' (fixes in the series). Signed-off-by: Javier Carrasco <javier.carrasco(a)wolfvision.net> --- Javier Carrasco (2): usb: typec: tipd: fix event checking for tps25750 usb: typec: tipd: fix event checking for tps6598x drivers/usb/typec/tipd/core.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) --- base-commit: 4cece764965020c22cff7665b18a012006359095 change-id: 20240328-tps6598x_fix_event_handling-3398d3d82f85 Best regards, -- Javier Carrasco <javier.carrasco(a)wolfvision.net>

1 year, 8 months

2
9
0 0

[PATCH net] Bluetooth: hci_event: fix double hci_conn_drop() when conn->state == BT_CONNECTED

by kovalev＠altlinux.org

From: Vasiliy Kovalev <kovalev(a)altlinux.org> There is no need to drop the connection of some functions in which the conn->state in BT_CONNECTED is marked, since in the future the same check takes place (for example, in the hci_encrypt_change_evt() function) and the hci_conn_drop() is called. Otherwise, the conn->refcnt will become below zero, which will trigger a warning and may cause a crash on kernels with the panic_on_warn parameter enabled. Syzkaller hit 'WARNING in hci_conn_timeout' bug. [ 23.485892] Bluetooth: Core ver 2.22 [ 23.485916] NET: Registered PF_BLUETOOTH protocol family [ 23.485917] Bluetooth: HCI device and connection manager initialized [ 23.486407] Bluetooth: HCI socket layer initialized [ 23.486410] Bluetooth: L2CAP socket layer initialized [ 23.486413] Bluetooth: SCO socket layer initialized [ 24.507112] Bluetooth: hci0: unexpected cc 0x0c03 length: 249 > 1 [ 24.507142] Bluetooth: hci0: unexpected cc 0x1003 length: 249 > 9 [ 24.507165] Bluetooth: hci0: unexpected cc 0x1001 length: 249 > 9 [ 24.508091] Bluetooth: hci0: unexpected cc 0x0c23 length: 249 > 4 [ 24.508109] Bluetooth: hci0: unexpected cc 0x0c25 length: 249 > 3 [ 24.508117] Bluetooth: hci0: unexpected cc 0x0c38 length: 249 > 2 [ 24.545962] Bluetooth: BNEP (Ethernet Emulation) ver 1.3 [ 24.545969] Bluetooth: BNEP filters: protocol multicast [ 24.545973] Bluetooth: BNEP socket layer initialized [ 24.547521] Bluetooth: MGMT ver 1.22 [ 26.553008] Bluetooth: hci0: command tx timeout [ 26.561518] Bluetooth: RFCOMM TTY layer initialized [ 26.561526] Bluetooth: RFCOMM socket layer initialized [ 26.561532] Bluetooth: RFCOMM ver 1.11 [ 28.602024] Bluetooth: hci0: Opcode 0x0c13 failed: -110 [ 28.602054] Bluetooth: hci0: command tx timeout [ 30.650011] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 30.650021] Bluetooth: hci0: command tx timeout [ 32.696973] Bluetooth: hci0: command tx timeout [ 32.696985] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 34.744973] Bluetooth: hci0: command 0x0406 tx timeout [ 34.745008] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 36.792966] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 36.792980] Bluetooth: hci0: command 0x0406 tx timeout [ 38.841027] Bluetooth: hci0: command 0x0406 tx timeout [ 38.841035] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 40.889026] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 40.889999] Bluetooth: hci0: command 0x0406 tx timeout [ 40.890012] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 40.893629] NET: Registered PF_ALG protocol family [ 42.937008] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 42.937023] Bluetooth: hci0: command 0x0406 tx timeout [ 44.984984] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 44.985008] Bluetooth: hci0: command 0x0406 tx timeout [ 47.033023] Bluetooth: hci0: Opcode 0x0c1a failed: -110 [ 47.033044] Bluetooth: hci0: command 0x0406 tx timeout [ 49.080976] Bluetooth: hci0: command 0x0406 tx timeout [ 49.080985] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 51.129140] Bluetooth: hci0: Opcode 0x0c24 failed: -110 [ 51.130051] Bluetooth: hci0: command 0x0406 tx timeout [ 53.177011] Bluetooth: hci0: command 0x0406 tx timeout [ 55.225969] Bluetooth: hci0: command 0x0406 tx timeout [ 57.272968] Bluetooth: hci0: command 0x0406 tx timeout [ 59.320982] Bluetooth: hci0: command 0x0406 tx timeout [ 61.368989] Bluetooth: hci0: command 0x0406 tx timeout [ 148.474066] ------------[ cut here ]------------ [ 148.474072] WARNING: CPU: 0 PID: 3835 at net/bluetooth/hci_conn.c:612 hci_conn_timeout+0x16/0x60 [bluetooth] [ 148.474115] Modules linked in: cmac algif_hash algif_skcipher af_alg rfcomm bnep hci_vhci bluetooth ecdh_generic uinput af_packet rfkill joydev hid_generic usbhid hid qrtr intel_rapl_msr intel_rapl_common intel_pmc_core kvm_intel nls_utf8 iTCO_wdt intel_pmc_bxt iTCO_vendor_support nls_cp866 vfat fat kvm irqbypass crct10dif_pclmul crc32_pclmul snd_hda_codec_generic crc32c_intel ghash_clmulni_intel ledtrig_audio sha512_ssse3 snd_hda_intel sha256_ssse3 sha1_ssse3 snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec aesni_intel crypto_simd cryptd i2c_i801 snd_hda_core psmouse snd_hwdep i2c_smbus xhci_pci pcspkr snd_pcm lpc_ich xhci_pci_renesas xhci_hcd tiny_power_button qemu_fw_cfg button sch_fq_codel vboxvideo drm_vram_helper drm_ttm_helper ttm vboxsf vboxguest snd_seq_midi snd_seq_midi_event snd_seq snd_rawmidi snd_seq_device snd_timer snd soundcore msr fuse efi_pstore dm_mod ip_tables x_tables autofs4 virtio_gpu virtio_net virtio_dma_buf drm_shmem_helper net_failover drm_kms_helper [ 148.474210] virtio_rng drm virtio_scsi rng_core virtio_console virtio_balloon virtio_blk failover ahci libahci libata evdev input_leds serio_raw scsi_mod scsi_common virtio_pci virtio_pci_legacy_dev virtio_pci_modern_dev virtio_ring virtio intel_agp intel_gtt [ 148.474234] CPU: 0 PID: 3835 Comm: kworker/u5:2 Not tainted 6.1.85-un-def-alt1 #1 [ 148.474238] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 [ 148.474241] Workqueue: hci0 hci_conn_timeout [bluetooth] [ 148.474265] RIP: 0010:hci_conn_timeout+0x16/0x60 [bluetooth] [ 148.474288] Code: 00 00 66 89 44 24 06 e8 58 a7 ff ff eb a8 e8 41 7b ee c1 90 0f 1f 44 00 00 8b 87 20 fd ff ff 85 c0 78 07 74 07 c3 cc cc cc cc <0f> 0b 55 0f b6 87 49 fd ff ff 48 8d af 10 fd ff ff 3c 01 74 12 be [ 148.474291] RSP: 0018:ffffa7fd80b53e90 EFLAGS: 00010286 [ 148.474295] RAX: 00000000fffe728b RBX: ffff8959c46ab180 RCX: ffff8959c3b70028 [ 148.474297] RDX: 0000000000000001 RSI: ffff8959c86ce0b0 RDI: ffff895a105aeaf0 [ 148.474299] RBP: ffff895a105aeaf0 R08: ffff8959c86ce0b0 R09: ffff8959c46ab1f4 [ 148.474301] R10: 0000000000000005 R11: 0000000000000005 R12: ffff8959c3b70000 [ 148.474302] R13: ffff8959ec495400 R14: 0000000000000000 R15: ffff8959ec495405 [ 148.474305] FS: 0000000000000000(0000) GS:ffff895a3dc00000(0000) knlGS:0000000000000000 [ 148.474308] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 148.474310] CR2: 00007f2a1b7baa48 CR3: 0000000034b98000 CR4: 0000000000750ef0 [ 148.474317] PKRU: 55555554 [ 148.474319] Call Trace: [ 148.474323] <TASK> [ 148.474327] ? __warn+0x79/0xc0 [ 148.474343] ? hci_conn_timeout+0x16/0x60 [bluetooth] [ 148.474364] ? report_bug+0xff/0x150 [ 148.474375] ? handle_bug+0x49/0xa0 [ 148.474398] ? exc_invalid_op+0x14/0x70 [ 148.474402] ? asm_exc_invalid_op+0x16/0x20 [ 148.474408] ? hci_conn_timeout+0x16/0x60 [bluetooth] [ 148.474441] process_one_work+0x217/0x3e0 [ 148.474467] worker_thread+0x4d/0x3c0 [ 148.474473] ? process_one_work+0x3e0/0x3e0 [ 148.474478] kthread+0xd6/0x100 [ 148.474482] ? kthread_complete_and_exit+0x20/0x20 [ 148.474486] ret_from_fork+0x1f/0x30 [ 148.474500] </TASK> [ 148.474502] ---[ end trace 0000000000000000 ]--- Fixes: 0fe29fd1cd77 ("Bluetooth: Read LE remote features during connection establishment") Fixes: 769be974d0c7 ("[Bluetooth] Use ACL config stage to retrieve remote features") Fixes: f8558555f31e ("[Bluetooth] Initiate authentication during connection establishment") Cc: stable(a)vger.kernel.org Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org> --- net/bluetooth/hci_event.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a8b8cfebe0180c..64477e1bde7cec 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3529,7 +3529,6 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data, } else { conn->state = BT_CONNECTED; hci_connect_cfm(conn, ev->status); - hci_conn_drop(conn); } } else { hci_auth_cfm(conn, ev->status); @@ -3776,7 +3775,6 @@ static void hci_remote_features_evt(struct hci_dev *hdev, void *data, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; hci_connect_cfm(conn, ev->status); - hci_conn_drop(conn); } unlock: @@ -5030,7 +5028,6 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev, void *data, if (!hci_outgoing_auth_needed(hdev, conn)) { conn->state = BT_CONNECTED; hci_connect_cfm(conn, ev->status); - hci_conn_drop(conn); } unlock: @@ -6561,7 +6558,6 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data, conn->state = BT_CONNECTED; hci_connect_cfm(conn, status); - hci_conn_drop(conn); } } -- 2.33.8

1 year, 8 months

2
2
0 0

[git:media_tree/master] media: rc: bpf attach/detach requires write permission

by Mauro Carvalho Chehab

This is an automatic generated email to let you know that the following patch were queued: Subject: media: rc: bpf attach/detach requires write permission Author: Sean Young <sean(a)mess.org> Date: Thu Apr 13 10:50:32 2023 +0200 Note that bpf attach/detach also requires CAP_NET_ADMIN. Cc: stable(a)vger.kernel.org Signed-off-by: Sean Young <sean(a)mess.org> Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org> drivers/media/rc/bpf-lirc.c | 6 +++--- drivers/media/rc/lirc_dev.c | 5 ++++- drivers/media/rc/rc-core-priv.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) --- diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98e81..52d82cbe7685 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->attach_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); @@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) { bpf_prog_put(prog); return PTR_ERR(rcdev); @@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->query.query_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->query.target_fd); + rcdev = rc_dev_get_from_fd(attr->query.target_fd, false); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index a537734832c5..caad59f76793 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } -struct rc_dev *rc_dev_get_from_fd(int fd) +struct rc_dev *rc_dev_get_from_fd(int fd, bool write) { struct fd f = fdget(fd); struct lirc_fh *fh; @@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd) return ERR_PTR(-EINVAL); } + if (write && !(f.file->f_mode & FMODE_WRITE)) + return ERR_PTR(-EPERM); + fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index ef1e95e1af7f..7df949fc65e2 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int lirc_register(struct rc_dev *dev); void lirc_unregister(struct rc_dev *dev); -struct rc_dev *rc_dev_get_from_fd(int fd); +struct rc_dev *rc_dev_get_from_fd(int fd, bool write); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {}

1 year, 8 months

1
0
0 0

[PATCH 6.1 0/6] backport xfs fix patches reported by xfs/179/270/557/606

by Mahmoud Adam

Hi, These patches fix and reported by xfstests tests xfs/179 xfs/270 xfs/557 xfs/606, the patchset were tested to confirm they fix those tests. all are clean picks. thanks, MNAdam

1 year, 8 months

5
13
0 0

[PATCH net] Bluetooth: hci_event: fix possible multiple drops by marked conn->state after hci_disconnect()

by kovalev＠altlinux.org

From: Vasiliy Kovalev <kovalev(a)altlinux.org> When returning from the hci_disconnect() function, the conn->state continues to be set to BT_CONNECTED and hci_conn_drop() is executed, which decrements the conn->refcnt. Syzkaller has generated a reproducer that results in multiple calls to hci_encrypt_change_evt() of the same conn object. -- hci_encrypt_change_evt(){ // conn->state == BT_CONNECTED hci_disconnect(){ hci_abort_conn(); } hci_conn_drop(); // conn->state == BT_CONNECTED } -- This behavior can cause the conn->refcnt to go far into negative values and cause problems. To get around this, you need to change the conn->state, namely to BT_DISCONN, as it was before. Fixes: a13f316e90fd ("Bluetooth: hci_conn: Consolidate code for aborting connections") Cc: stable(a)vger.kernel.org Signed-off-by: Vasiliy Kovalev <kovalev(a)altlinux.org> --- net/bluetooth/hci_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 64477e1bde7cec..e0477021183f9b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2989,6 +2989,7 @@ static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_drop(conn); + conn->state = BT_DISCONN; unlock: hci_dev_unlock(hdev); @@ -3654,6 +3655,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, hci_encrypt_cfm(conn, ev->status); hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_drop(conn); + conn->state = BT_DISCONN; goto unlock; } @@ -5248,6 +5250,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, void *data, if (ev->status && conn->state == BT_CONNECTED) { hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_drop(conn); + conn->state = BT_DISCONN; goto unlock; } -- 2.33.8

1 year, 8 months

1
1
0 0

[PATCH v1 0/4] docs: stable-kernel-rules: fine-tuning and 'no semi-automatic backport'

by Thorsten Leemhuis

After a recent discussion regarding "do we need a 'nobackport' tag" I set out to create one change for stable-kernel-rules.rst. This is now the second patch in the series, which links to that discussion; the other stuff is fine-tuning that happened along the way. Ciao, Thorsten Thorsten Leemhuis (4): docs: stable-kernel-rules: reduce redundancy docs: stable-kernel-rules: mention "no semi-automatic backport" docs: stable-kernel-rules: call mainline by its name and change example docs: stable-kernel-rules: remove code-labels tags Documentation/process/stable-kernel-rules.rst | 50 +++++++------------ 1 file changed, 18 insertions(+), 32 deletions(-) base-commit: 3f86ed6ec0b390c033eae7f9c487a3fea268e027 -- 2.44.0

1 year, 8 months

3
21
0 0

[RESEND. PATCH v2] Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0bda:0x4853

by WangYuli

Add the support ID(0x0bda, 0x4853) to usb_device_id table for Realtek RTL8852BE. Without this change the device utilizes an obsolete version of the firmware that is encoded in it rather than the updated Realtek firmware and config files from the firmware directory. The latter files implement many new features. The device table is as follows: T: Bus=03 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=4853 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Cc: stable(a)vger.kernel.org Signed-off-by: Larry Finger <Larry.Finger(a)lwfinger.net> Signed-off-by: WangYuli <wangyuli(a)uniontech.com> --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c391e612b83b..3356af3a7f61 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -542,6 +542,8 @@ static const struct usb_device_id quirks_table[] = { /* Realtek 8852BE Bluetooth devices */ { USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK | -- 2.43.0

1 year, 8 months

2
1
0 0

[PATCH] sched: Add missing memory barrier in switch_mm_cid

by Mathieu Desnoyers

Many architectures' switch_mm() (e.g. arm64) do not have an smp_mb() which the core scheduler code has depended upon since commit: commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid") If switch_mm() doesn't call smp_mb(), sched_mm_cid_remote_clear() can unset the actively used cid when it fails to observe active task after it sets lazy_put. There *is* a memory barrier between storing to rq->curr and _return to userspace_ (as required by membarrier), but the rseq mm_cid has stricter requirements: the barrier needs to be issued between store to rq->curr and switch_mm_cid(), which happens earlier than: - spin_unlock(), - switch_to(). So it's fine when the architecture switch_mm happens to have that barrier already, but less so when the architecture only provides the full barrier in switch_to() or spin_unlock(). It is a bug in the rseq switch_mm_cid() implementation. All architectures that don't have memory barriers in switch_mm(), but rather have the full barrier either in finish_lock_switch() or switch_to() have them too late for the needs of switch_mm_cid(). Introduce a new smp_mb__after_switch_mm(), defined as smp_mb() in the generic barrier.h header, and use it in switch_mm_cid() for scheduler transitions where switch_mm() is expected to provide a memory barrier. Architectures can override smp_mb__after_switch_mm() if their switch_mm() implementation provides an implicit memory barrier. Override it with a no-op on x86 which implicitly provide this memory barrier by writing to CR3. Link: https://lore.kernel.org/lkml/20240305145335.2696125-1-yeoreum.yun@arm.com/ Reported-by: levi.yun <yeoreum.yun(a)arm.com> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") Cc: <stable(a)vger.kernel.org> # 6.4.x Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Steven Rostedt <rostedt(a)goodmis.org> Cc: Vincent Guittot <vincent.guittot(a)linaro.org> Cc: Juri Lelli <juri.lelli(a)redhat.com> Cc: Dietmar Eggemann <dietmar.eggemann(a)arm.com> Cc: Ben Segall <bsegall(a)google.com> Cc: Mel Gorman <mgorman(a)suse.de> Cc: Daniel Bristot de Oliveira <bristot(a)redhat.com> Cc: Valentin Schneider <vschneid(a)redhat.com> Cc: levi.yun <yeoreum.yun(a)arm.com> Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Cc: Aaron Lu <aaron.lu(a)intel.com> --- arch/x86/include/asm/barrier.h | 3 +++ include/asm-generic/barrier.h | 8 ++++++++ kernel/sched/sched.h | 20 ++++++++++++++------ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 35389b2af88e..0d5e54201eb2 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -79,6 +79,9 @@ do { \ #define __smp_mb__before_atomic() do { } while (0) #define __smp_mb__after_atomic() do { } while (0) +/* Writing to CR3 provides a full memory barrier in switch_mm(). */ +#define smp_mb__after_switch_mm() do { } while (0) + #include <asm-generic/barrier.h> /* diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 961f4d88f9ef..5a6c94d7a598 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -296,5 +296,13 @@ do { \ #define io_stop_wc() do { } while (0) #endif +/* + * Architectures that guarantee an implicit smp_mb() in switch_mm() + * can override smp_mb__after_switch_mm. + */ +#ifndef smp_mb__after_switch_mm +#define smp_mb__after_switch_mm() smp_mb() +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 2e5a95486a42..044d842c696c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -79,6 +79,8 @@ # include <asm/paravirt_api_clock.h> #endif +#include <asm/barrier.h> + #include "cpupri.h" #include "cpudeadline.h" @@ -3481,13 +3483,19 @@ static inline void switch_mm_cid(struct rq *rq, * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu]. * Provide it here. */ - if (!prev->mm) // from kernel + if (!prev->mm) { // from kernel smp_mb(); - /* - * user -> user transition guarantees a memory barrier through - * switch_mm() when current->mm changes. If current->mm is - * unchanged, no barrier is needed. - */ + } else { // from user + /* + * user -> user transition relies on an implicit + * memory barrier in switch_mm() when + * current->mm changes. If the architecture + * switch_mm() does not have an implicit memory + * barrier, it is emitted here. If current->mm + * is unchanged, no barrier is needed. + */ + smp_mb__after_switch_mm(); + } } if (prev->mm_cid_active) { mm_cid_snapshot_time(rq, prev->mm); -- 2.39.2

1 year, 8 months

4
6
0 0

[RESEND PATCH net v4 1/2] soc: fsl: qbman: Always disable interrupts when taking cgr_lock

by Sean Anderson

smp_call_function_single disables IRQs when executing the callback. To prevent deadlocks, we must disable IRQs when taking cgr_lock elsewhere. This is already done by qman_update_cgr and qman_delete_cgr; fix the other lockers. Fixes: 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()") CC: stable(a)vger.kernel.org Signed-off-by: Sean Anderson <sean.anderson(a)seco.com> Reviewed-by: Camelia Groza <camelia.groza(a)nxp.com> Tested-by: Vladimir Oltean <vladimir.oltean(a)nxp.com> --- I got no response the first time I sent this, so I am resending to net. This issue was introduced in a series which went through net, so I hope it makes sense to take it via net. [1] https://lore.kernel.org/linux-arm-kernel/20240108161904.2865093-1-sean.ande… (no changes since v3) Changes in v3: - Change blamed commit to something more appropriate Changes in v2: - Fix one additional call to spin_unlock drivers/soc/fsl/qbman/qman.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 739e4eee6b75..1bf1f1ea67f0 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1456,11 +1456,11 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock(&p->cgr_lock); + spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1476,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2440,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock(&p->cgr_lock); + spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,7 +2477,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock(&p->cgr_lock); + spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } -- 2.35.1.1320.gc452695387.dirty [Embedded World 2024, SECO SpA]<https://www.messe-ticket.de/Nuernberg/embeddedworld2024/Register/ew24517689>

1 year, 8 months

4
5
0 0

Re: 6.8.5 does not boot (regression)

by Linux regression tracking (Thorsten Leemhuis)

On 11.04.24 09:20, Toralf Förster wrote: > It is a remote system, nothing in the logs, system is a hardened Gentoo > Linux, 6.8.4 was fine. > > Linux mr-fox 6.8.4 #4 SMP Thu Apr 4 22:10:47 UTC 2024 x86_64 AMD Ryzen > 9 5950X 16-Core Processor AuthenticAMD GNU/Linux > > Another Gentoo dev reported problems too. > > config is below. Thx for the report, but the harsh reality is: nearly no developer will see your initial report, as you just sent it to LKML, which nearly nobody ready. I CCed a few lists, which might help. But that is unlikely, as this could be cause by all sorts of changes. Which is why we likely need a bisection ( https://docs.kernel.org/admin-guide/verify-bugs-and-bisect-regressions.html ) from somebody affected to make some progress here. That being said: there are a few EFI changes in there that in a case like this are a suspect. I CCed the developer, maybe something rings a bell. Ciao, Thorsten

1 year, 8 months

3
2
0 0

[PATCH v2] slimbus: qcom-ngd-ctrl: Add timeout for wait operation

by Viken Dadhaniya

In current driver qcom_slim_ngd_up_worker() indefinitely waiting for ctrl->qmi_up completion object. This is resulting in workqueue lockup on Kthread. Added wait_for_completion_interruptible_timeout to allow the thread to wait for specific timeout period and bail out instead waiting infinitely. Fixes: a899d324863a ("slimbus: qcom-ngd-ctrl: add Sub System Restart support") Cc: stable(a)vger.kernel.org Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org> Signed-off-by: Viken Dadhaniya <quic_vdadhani(a)quicinc.com> --- v1 -> v2: - Remove macro and add value inline. - add fix, cc and review tag. --- --- drivers/slimbus/qcom-ngd-ctrl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index efeba8275a66..a09a26bf4988 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1451,7 +1451,11 @@ static void qcom_slim_ngd_up_worker(struct work_struct *work) ctrl = container_of(work, struct qcom_slim_ngd_ctrl, ngd_up_work); /* Make sure qmi service is up before continuing */ - wait_for_completion_interruptible(&ctrl->qmi_up); + if (!wait_for_completion_interruptible_timeout(&ctrl->qmi_up, + msecs_to_jiffies(MSEC_PER_SEC))) { + dev_err(ctrl->dev, "QMI wait timeout\n"); + return; + } mutex_lock(&ctrl->ssr_lock); qcom_slim_ngd_enable(ctrl, true); -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation

1 year, 8 months

2
1
0 0

Backported patch for linux-5.4

by Jürgen Groß

Hi, there has been a report of a failure in a 5.4 based kernel, which has been fixed in kernel 5.10 with commit abee7c494d8c41bb388839bccc47e06247f0d7de. Please apply the attached backported patch to the stable 5.4 kernel. Juergen

1 year, 8 months

2
1
0 0

[PATCH 4.19.y v3 0/2] Fix stable-4.19 use-after-free bug

by George Guo

1. About v3-0001-tracing-Remove-unnecessary-hist_data-destroy-in-d.patch: The reason I write the changelog by myself is that no one found the bug at that time, then later the code was removed on upstream, but 4.19-stable has the bug. 2. About v3-0002-tracing-Remove-unnecessary-var-destroy-in-onmax_d.patch I also write the changelog by myself is that the upstream api is changed. refs commits: 466f4528fbc6 ("tracing: Generalize hist trigger onmax and save action") ff9d31d0d466 ("tracing: Remove unnecessary var_ref destroy in track_data_destroy()") George Guo (2): tracing: Remove unnecessary hist_data destroy in destroy_synth_var_refs() tracing: Remove unnecessary var destroy in onmax_destroy() kernel/trace/trace_events_hist.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) -- 2.34.1

1 year, 8 months

2
3
0 0

FAILED: patch "[PATCH] drm/i915/gt: Reset queue_priority_hint on parking" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 4a3859ea5240365d21f6053ee219bb240d520895 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033046-mobility-coherence-2055@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4a3859ea5240365d21f6053ee219bb240d520895 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris(a)chris-wilson.co.uk> Date: Mon, 18 Mar 2024 14:58:47 +0100 Subject: [PATCH] drm/i915/gt: Reset queue_priority_hint on parking Originally, with strict in order execution, we could complete execution only when the queue was empty. Preempt-to-busy allows replacement of an active request that may complete before the preemption is processed by HW. If that happens, the request is retired from the queue, but the queue_priority_hint remains set, preventing direct submission until after the next CS interrupt is processed. This preempt-to-busy race can be triggered by the heartbeat, which will also act as the power-management barrier and upon completion allow us to idle the HW. We may process the completion of the heartbeat, and begin parking the engine before the CS event that restores the queue_priority_hint, causing us to fail the assertion that it is MIN. <3>[ 166.210729] __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 166.210781] Dumping ftrace buffer: <0>[ 166.210795] --------------------------------- ... <0>[ 167.302811] drm_fdin-1097 2..s1. 165741070us : trace_ports: 0000:00:02.0 rcs0: promote { ccid:20 1217:2 prio 0 } <0>[ 167.302861] drm_fdin-1097 2d.s2. 165741072us : execlists_submission_tasklet: 0000:00:02.0 rcs0: preempting last=1217:2, prio=0, hint=2147483646 <0>[ 167.302928] drm_fdin-1097 2d.s2. 165741072us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 1217:2, current 0 <0>[ 167.302992] drm_fdin-1097 2d.s2. 165741073us : __i915_request_submit: 0000:00:02.0 rcs0: fence 3:4660, current 4659 <0>[ 167.303044] drm_fdin-1097 2d.s1. 165741076us : execlists_submission_tasklet: 0000:00:02.0 rcs0: context:3 schedule-in, ccid:40 <0>[ 167.303095] drm_fdin-1097 2d.s1. 165741077us : trace_ports: 0000:00:02.0 rcs0: submit { ccid:40 3:4660* prio 2147483646 } <0>[ 167.303159] kworker/-89 11..... 165741139us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence c90:2, current 2 <0>[ 167.303208] kworker/-89 11..... 165741148us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:c90 unpin <0>[ 167.303272] kworker/-89 11..... 165741159us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 1217:2, current 2 <0>[ 167.303321] kworker/-89 11..... 165741166us : __intel_context_do_unpin: 0000:00:02.0 rcs0: context:1217 unpin <0>[ 167.303384] kworker/-89 11..... 165741170us : i915_request_retire.part.0: 0000:00:02.0 rcs0: fence 3:4660, current 4660 <0>[ 167.303434] kworker/-89 11d..1. 165741172us : __intel_context_retire: 0000:00:02.0 rcs0: context:1216 retire runtime: { total:56028ns, avg:56028ns } <0>[ 167.303484] kworker/-89 11..... 165741198us : __engine_park: 0000:00:02.0 rcs0: parked <0>[ 167.303534] <idle>-0 5d.H3. 165741207us : execlists_irq_handler: 0000:00:02.0 rcs0: semaphore yield: 00000040 <0>[ 167.303583] kworker/-89 11..... 165741397us : __intel_context_retire: 0000:00:02.0 rcs0: context:1217 retire runtime: { total:325575ns, avg:0ns } <0>[ 167.303756] kworker/-89 11..... 165741777us : __intel_context_retire: 0000:00:02.0 rcs0: context:c90 retire runtime: { total:0ns, avg:0ns } <0>[ 167.303806] kworker/-89 11..... 165742017us : __engine_park: __engine_park:283 GEM_BUG_ON(engine->sched_engine->queue_priority_hint != (-((int)(~0U >> 1)) - 1)) <0>[ 167.303811] --------------------------------- <4>[ 167.304722] ------------[ cut here ]------------ <2>[ 167.304725] kernel BUG at drivers/gpu/drm/i915/gt/intel_engine_pm.c:283! <4>[ 167.304731] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI <4>[ 167.304734] CPU: 11 PID: 89 Comm: kworker/11:1 Tainted: G W 6.8.0-rc2-CI_DRM_14193-gc655e0fd2804+ #1 <4>[ 167.304736] Hardware name: Intel Corporation Rocket Lake Client Platform/RocketLake S UDIMM 6L RVP, BIOS RKLSFWI1.R00.3173.A03.2204210138 04/21/2022 <4>[ 167.304738] Workqueue: i915-unordered retire_work_handler [i915] <4>[ 167.304839] RIP: 0010:__engine_park+0x3fd/0x680 [i915] <4>[ 167.304937] Code: 00 48 c7 c2 b0 e5 86 a0 48 8d 3d 00 00 00 00 e8 79 48 d4 e0 bf 01 00 00 00 e8 ef 0a d4 e0 31 f6 bf 09 00 00 00 e8 03 49 c0 e0 <0f> 0b 0f 0b be 01 00 00 00 e8 f5 61 fd ff 31 c0 e9 34 fd ff ff 48 <4>[ 167.304940] RSP: 0018:ffffc9000059fce0 EFLAGS: 00010246 <4>[ 167.304942] RAX: 0000000000000200 RBX: 0000000000000000 RCX: 0000000000000006 <4>[ 167.304944] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 <4>[ 167.304946] RBP: ffff8881330ca1b0 R08: 0000000000000001 R09: 0000000000000001 <4>[ 167.304947] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8881330ca000 <4>[ 167.304948] R13: ffff888110f02aa0 R14: ffff88812d1d0205 R15: ffff88811277d4f0 <4>[ 167.304950] FS: 0000000000000000(0000) GS:ffff88844f780000(0000) knlGS:0000000000000000 <4>[ 167.304952] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 167.304953] CR2: 00007fc362200c40 CR3: 000000013306e003 CR4: 0000000000770ef0 <4>[ 167.304955] PKRU: 55555554 <4>[ 167.304957] Call Trace: <4>[ 167.304958] <TASK> <4>[ 167.305573] ____intel_wakeref_put_last+0x1d/0x80 [i915] <4>[ 167.305685] i915_request_retire.part.0+0x34f/0x600 [i915] <4>[ 167.305800] retire_requests+0x51/0x80 [i915] <4>[ 167.305892] intel_gt_retire_requests_timeout+0x27f/0x700 [i915] <4>[ 167.305985] process_scheduled_works+0x2db/0x530 <4>[ 167.305990] worker_thread+0x18c/0x350 <4>[ 167.305993] kthread+0xfe/0x130 <4>[ 167.305997] ret_from_fork+0x2c/0x50 <4>[ 167.306001] ret_from_fork_asm+0x1b/0x30 <4>[ 167.306004] </TASK> It is necessary for the queue_priority_hint to be lower than the next request submission upon waking up, as we rely on the hint to decide when to kick the tasklet to submit that first request. Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Closes: https://gitlab.freedesktop.org/drm/intel/issues/10154 Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik(a)linux.intel.com> Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Cc: <stable(a)vger.kernel.org> # v5.4+ Reviewed-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com> Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com> Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240318135906.716055-2-janus… (cherry picked from commit 98850e96cf811dc2d0a7d0af491caff9f5d49c1e) Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 96bdb93a948d..fb7bff27b45a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 42aade0faf2d..b061a0a0d6b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3272,6 +3272,9 @@ static void execlists_park(struct intel_engine_cs *engine) { cancel_timer(&engine->execlists.timer); cancel_timer(&engine->execlists.preempt); + + /* Reset upon idling, or we may delay the busy wakeup. */ + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void add_to_engine(struct i915_request *rq)

1 year, 8 months

3
2
0 0

FAILED: patch "[PATCH] x86/mm/pat: fix VM_PAT handling in COW mappings" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 04c35ab3bdae7fefbd7c7a7355f29fa03a035221 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040851-hamster-canary-7b07@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: 04c35ab3bdae ("x86/mm/pat: fix VM_PAT handling in COW mappings") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 04c35ab3bdae7fefbd7c7a7355f29fa03a035221 Mon Sep 17 00:00:00 2001 From: David Hildenbrand <david(a)redhat.com> Date: Wed, 3 Apr 2024 23:21:30 +0200 Subject: [PATCH] x86/mm/pat: fix VM_PAT handling in COW mappings PAT handling won't do the right thing in COW mappings: the first PTE (or, in fact, all PTEs) can be replaced during write faults to point at anon folios. Reliably recovering the correct PFN and cachemode using follow_phys() from PTEs will not work in COW mappings. Using follow_phys(), we might just get the address+protection of the anon folio (which is very wrong), or fail on swap/nonswap entries, failing follow_phys() and triggering a WARN_ON_ONCE() in untrack_pfn() and track_pfn_copy(), not properly calling free_pfn_range(). In free_pfn_range(), we either wouldn't call memtype_free() or would call it with the wrong range, possibly leaking memory. To fix that, let's update follow_phys() to refuse returning anon folios, and fallback to using the stored PFN inside vma->vm_pgoff for COW mappings if we run into that. We will now properly handle untrack_pfn() with COW mappings, where we don't need the cachemode. We'll have to fail fork()->track_pfn_copy() if the first page was replaced by an anon folio, though: we'd have to store the cachemode in the VMA to make this work, likely growing the VMA size. For now, lets keep it simple and let track_pfn_copy() just fail in that case: it would have failed in the past with swap/nonswap entries already, and it would have done the wrong thing with anon folios. Simple reproducer to trigger the WARN_ON_ONCE() in untrack_pfn(): <--- C reproducer ---> #include <stdio.h> #include <sys/mman.h> #include <unistd.h> #include <liburing.h> int main(void) { struct io_uring_params p = {}; int ring_fd; size_t size; char *map; ring_fd = io_uring_setup(1, &p); if (ring_fd < 0) { perror("io_uring_setup"); return 1; } size = p.sq_off.array + p.sq_entries * sizeof(unsigned); /* Map the submission queue ring MAP_PRIVATE */ map = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, ring_fd, IORING_OFF_SQ_RING); if (map == MAP_FAILED) { perror("mmap"); return 1; } /* We have at least one page. Let's COW it. */ *map = 0; pause(); return 0; } <--- C reproducer ---> On a system with 16 GiB RAM and swap configured: # ./iouring & # memhog 16G # killall iouring [ 301.552930] ------------[ cut here ]------------ [ 301.553285] WARNING: CPU: 7 PID: 1402 at arch/x86/mm/pat/memtype.c:1060 untrack_pfn+0xf4/0x100 [ 301.553989] Modules linked in: binfmt_misc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_g [ 301.558232] CPU: 7 PID: 1402 Comm: iouring Not tainted 6.7.5-100.fc38.x86_64 #1 [ 301.558772] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebu4 [ 301.559569] RIP: 0010:untrack_pfn+0xf4/0x100 [ 301.559893] Code: 75 c4 eb cf 48 8b 43 10 8b a8 e8 00 00 00 3b 6b 28 74 b8 48 8b 7b 30 e8 ea 1a f7 000 [ 301.561189] RSP: 0018:ffffba2c0377fab8 EFLAGS: 00010282 [ 301.561590] RAX: 00000000ffffffea RBX: ffff9208c8ce9cc0 RCX: 000000010455e047 [ 301.562105] RDX: 07fffffff0eb1e0a RSI: 0000000000000000 RDI: ffff9208c391d200 [ 301.562628] RBP: 0000000000000000 R08: ffffba2c0377fab8 R09: 0000000000000000 [ 301.563145] R10: ffff9208d2292d50 R11: 0000000000000002 R12: 00007fea890e0000 [ 301.563669] R13: 0000000000000000 R14: ffffba2c0377fc08 R15: 0000000000000000 [ 301.564186] FS: 0000000000000000(0000) GS:ffff920c2fbc0000(0000) knlGS:0000000000000000 [ 301.564773] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 301.565197] CR2: 00007fea88ee8a20 CR3: 00000001033a8000 CR4: 0000000000750ef0 [ 301.565725] PKRU: 55555554 [ 301.565944] Call Trace: [ 301.566148] <TASK> [ 301.566325] ? untrack_pfn+0xf4/0x100 [ 301.566618] ? __warn+0x81/0x130 [ 301.566876] ? untrack_pfn+0xf4/0x100 [ 301.567163] ? report_bug+0x171/0x1a0 [ 301.567466] ? handle_bug+0x3c/0x80 [ 301.567743] ? exc_invalid_op+0x17/0x70 [ 301.568038] ? asm_exc_invalid_op+0x1a/0x20 [ 301.568363] ? untrack_pfn+0xf4/0x100 [ 301.568660] ? untrack_pfn+0x65/0x100 [ 301.568947] unmap_single_vma+0xa6/0xe0 [ 301.569247] unmap_vmas+0xb5/0x190 [ 301.569532] exit_mmap+0xec/0x340 [ 301.569801] __mmput+0x3e/0x130 [ 301.570051] do_exit+0x305/0xaf0 ... Link: https://lkml.kernel.org/r/20240403212131.929421-3-david@redhat.com Signed-off-by: David Hildenbrand <david(a)redhat.com> Reported-by: Wupeng Ma <mawupeng1(a)huawei.com> Closes: https://lkml.kernel.org/r/20240227122814.3781907-1-mawupeng1@huawei.com Fixes: b1a86e15dc03 ("x86, pat: remove the dependency on 'vm_pgoff' in track/untrack pfn vma routines") Fixes: 5899329b1910 ("x86: PAT: implement track/untrack of pfnmap regions for x86 - v3") Acked-by: Ingo Molnar <mingo(a)kernel.org> Cc: Dave Hansen <dave.hansen(a)linux.intel.com> Cc: Andy Lutomirski <luto(a)kernel.org> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Borislav Petkov <bp(a)alien8.de> Cc: "H. Peter Anvin" <hpa(a)zytor.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 0d72183b5dd0..36b603d0cdde 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/mm/memory.c b/mm/memory.c index 904f70b99498..d2155ced45f8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5973,6 +5973,10 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = ptep_get(ptep); + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_folio(vma, address, pte)) + goto unlock; + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock;

1 year, 8 months

3
3
0 0

FAILED: Patch "virtio: reenable config if freezing device failed" failed to apply to 6.1-stable tree

by Sasha Levin

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. Thanks, Sasha ------------------ original commit in Linus's tree ------------------ From 310227f42882c52356b523e2f4e11690eebcd2ab Mon Sep 17 00:00:00 2001 From: David Hildenbrand <david(a)redhat.com> Date: Tue, 13 Feb 2024 14:54:25 +0100 Subject: [PATCH] virtio: reenable config if freezing device failed Currently, we don't reenable the config if freezing the device failed. For example, virtio-mem currently doesn't support suspend+resume, and trying to freeze the device will always fail. Afterwards, the device will no longer respond to resize requests, because it won't get notified about config changes. Let's fix this by re-enabling the config if freezing fails. Fixes: 22b7050a024d ("virtio: defer config changed notifications") Cc: <stable(a)kernel.org> Cc: "Michael S. Tsirkin" <mst(a)redhat.com> Cc: Jason Wang <jasowang(a)redhat.com> Cc: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com> Signed-off-by: David Hildenbrand <david(a)redhat.com> Message-Id: <20240213135425.795001-1-david(a)redhat.com> Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com> --- drivers/virtio/virtio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index f4080692b3513..f513ee21b1c18 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -510,8 +510,10 @@ int virtio_device_freeze(struct virtio_device *dev) if (drv && drv->freeze) { ret = drv->freeze(dev); - if (ret) + if (ret) { + virtio_config_enable(dev); return ret; + } } if (dev->config->destroy_avq) -- 2.43.0

1 year, 8 months

3
2
0 0

[PATCH 4.19.y] drm/vkms: call drm_atomic_helper_shutdown before drm_dev_put()

by Guo Mengqi

commit 73a82b22963d ("drm/atomic: Fix potential use-after-free in nonblocking commits") introduced drm_dev_get/put() to drm_atomic_helper_shutdown(). And this cause problem in vkms driver exit process. vkms_exit() drm_dev_put() vkms_release() drm_atomic_helper_shutdown() drm_dev_get() drm_dev_put() vkms_release() ------ null pointer access Using 4.19 stable x86 image on qemu, below stacktrace can be triggered by load and unload vkms.ko. root:~ # insmod vkms.ko [ 142.135449] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013). [ 142.138713] [drm] Driver supports precise vblank timestamp query. [ 142.142390] [drm] Initialized vkms 1.0.0 20180514 for virtual device on minor 0 root:~ # rmmod vkms.ko [ 144.093710] BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 [ 144.097491] PGD 800000023624e067 P4D 800000023624e067 PUD 22ab59067 PMD 0 [ 144.100802] Oops: 0000 [#1] SMP PTI [ 144.102502] CPU: 0 PID: 3615 Comm: rmmod Not tainted 4.19.310 #1 [ 144.104452] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 144.107238] RIP: 0010:device_del+0x34/0x3a0 ... [ 144.131323] Call Trace: [ 144.131962] ? __die+0x7d/0xc0 [ 144.132711] ? no_context+0x152/0x3b0 [ 144.133605] ? wake_up_q+0x70/0x70 [ 144.134436] ? __do_page_fault+0x342/0x4b0 [ 144.135445] ? __switch_to_asm+0x41/0x70 [ 144.136416] ? __switch_to_asm+0x35/0x70 [ 144.137366] ? page_fault+0x1e/0x30 [ 144.138214] ? __drm_atomic_state_free+0x51/0x60 [ 144.139331] ? device_del+0x34/0x3a0 [ 144.140197] platform_device_del.part.14+0x19/0x70 [ 144.141348] platform_device_unregister+0xe/0x20 [ 144.142458] vkms_release+0x10/0x30 [vkms] [ 144.143449] __drm_atomic_helper_disable_all.constprop.31+0x13b/0x150 [ 144.144980] drm_atomic_helper_shutdown+0x4b/0x90 [ 144.146102] vkms_release+0x18/0x30 [vkms] [ 144.147107] vkms_exit+0x29/0x8ec [vkms] [ 144.148053] __x64_sys_delete_module+0x155/0x220 [ 144.149168] do_syscall_64+0x43/0x100 [ 144.150056] entry_SYSCALL_64_after_hwframe+0x5c/0xc1 It seems that the proper unload sequence is: drm_atomic_helper_shutdown(); drm_dev_put(); Just put drm_atomic_helper_shutdown() before drm_dev_put() should solve the problem. Note that vkms exit code is refactored by 53d77aaa3f76 ("drm/vkms: Use devm_drm_dev_alloc") in tags/v5.10-rc1. So this bug only exists on 4.19 and 5.4. Fixes: 73a82b22963d ("drm/atomic: Fix potential use-after-free in nonblocking commits") Signed-off-by: Guo Mengqi <guomengqi3(a)huawei.com> --- drivers/gpu/drm/vkms/vkms_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index b1201c18d3eb..d32e08f17427 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -39,7 +39,6 @@ static void vkms_release(struct drm_device *dev) struct vkms_device *vkms = container_of(dev, struct vkms_device, drm); platform_device_unregister(vkms->platform); - drm_atomic_helper_shutdown(&vkms->drm); drm_mode_config_cleanup(&vkms->drm); drm_dev_fini(&vkms->drm); } @@ -137,6 +136,7 @@ static void __exit vkms_exit(void) } drm_dev_unregister(&vkms_device->drm); + drm_atomic_helper_shutdown(&vkms_device->drm); drm_dev_put(&vkms_device->drm); kfree(vkms_device); -- 2.17.1

1 year, 8 months

3
3
0 0

Re: Broken Domain Validation in 6.1.84+

by Martin K. Petersen

Dave, >> Could you please try the patch below on top of v6.1.80? > Works okay on top of v6.1.80: > > [ 30.952668] scsi 6:0:0:0: Direct-Access HP 73.4G ST373207LW HPC1 PQ: 0 ANSI: 3 > [ 31.072592] scsi target6:0:0: Beginning Domain Validation > [ 31.139334] scsi 6:0:0:0: Power-on or device reset occurred > [ 31.186227] scsi target6:0:0: Ending Domain Validation > [ 31.240482] scsi target6:0:0: FAST-160 WIDE SCSI 320.0 MB/s DT IU QAS RTI WRFLOW PCOMP (6.25 ns, offset 63) > [ 31.462587] ata5: SATA link down (SStatus 0 SControl 0) > [ 31.618798] scsi 6:0:2:0: Direct-Access HP 73.4G ST373207LW HPC1 PQ: 0 ANSI: 3 > [ 31.732588] scsi target6:0:2: Beginning Domain Validation > [ 31.799201] scsi 6:0:2:0: Power-on or device reset occurred > [ 31.846724] scsi target6:0:2: Ending Domain Validation > [ 31.900822] scsi target6:0:2: FAST-160 WIDE SCSI 320.0 MB/s DT IU QAS RTI WRFLOW PCOMP (6.25 ns, offset 63) Great, thanks for testing! Greg, please revert the following commits from linux-6.1.y: b73dd5f99972 ("scsi: sd: usb_storage: uas: Access media prior to querying device properties") cf33e6ca12d8 ("scsi: core: Add struct for args to execution functions") and include the patch below instead. Thank you! -- Martin K. Petersen Oracle Linux Engineering From 87441914d491c01b73b949663c101056a9d9b8c7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" <martin.petersen(a)oracle.com> Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: [PATCH] scsi: sd: usb_storage: uas: Access media prior to querying device properties [ Upstream commit 321da3dc1f3c92a12e3c5da934090d2992a8814c ] It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.c… Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable(a)vger.kernel.org Reported-by: Tasos Sahanidis <tasos(a)tasossah.com> Reviewed-by: Ewan D. Milne <emilne(a)redhat.com> Reviewed-by: Bart Van Assche <bvanassche(a)acm.org> Tested-by: Tasos Sahanidis <tasos(a)tasossah.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> Signed-off-by: Sasha Levin <sashal(a)kernel.org> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 31b5273f43a7..349b1455a2c6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3284,6 +3284,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_req(sdkp->device, cmd, DMA_FROM_DEVICE, buffer, buf_len, + NULL, SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3323,7 +3341,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da53..12cf9940e5b6 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index de3836412bf3..ed22053b3252 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d2751ed536df..1504d3137cc6 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -204,6 +204,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */

1 year, 8 months

3
2
0 0

v5.15+ backport request

by Ard Biesheuvel

please backport e7d24c0aa8e678f41 gcc-plugins/stackleak: Avoid .head.text section to stable kernels v5.15 and newer. This addresses the regression reported here: https://lkml.kernel.org/r/dc118105-b97c-4e51-9a42-a918fa875967%40hardfalcon… On v5.15, there is a dependency that needs to be backported first: ae978009fc013e3166c9f523f8b17e41a3c0286e gcc-plugins/stackleak: Ignore .noinstr.text and .entry.text The particular issue that this patch fixes does not exist [yet] in v6.1 and v5.15, but I am working on backports that would introduce it. But even without those backports, this change is important as it prevents input sections from being instrumented by stackleak that may not tolerate this for other reasons too. Thanks, Ard.

1 year, 8 months

2
1
0 0

Hibernate stuck after recent kernel/workqueue.c changes in Stable 6.6.23

by Linux regression tracking (Thorsten Leemhuis)

Hi stable team, there is a report that the recent backport of 5797b1c18919cd ("workqueue: Implement system-wide nr_active enforcement for unbound workqueues") [from Tejun] to 6.6.y (as 5a70baec2294) broke hibernate for a user. 6.6.24-rc1 did not fix this problem; reverting the culprit does. > With kernel 6.6.23 hibernating usually hangs here: the display stays > on but the mouse pointer does not move and the keyboard does not work. > But SysRq REISUB does reboot. Sometimes it seems to hibernate: the > computer powers down and can be waked up and the previous display comes > visible, but it is stuck there. See https://bugzilla.kernel.org/show_bug.cgi?id=218658 for details. Note, you have to use bugzilla to reach the reporter, as I sadly[1] can not CCed them in mails like this. Side note: there is a mainline report about problems due to 5797b1c18919cd ("workqueue: Implement system-wide nr_active enforcement for unbound workqueues") as well, but it's about "nohz_full=0 prevents kernel from booting": https://bugzilla.kernel.org/show_bug.cgi?id=218665; will forward that separately to Tejun. Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat) -- Everything you wanna know about Linux kernel regression tracking: https://linux-regtracking.leemhuis.info/about/#tldr If I did something stupid, please tell me, as explained on that page. [1] because bugzilla.kernel.org tells users upon registration their "email address will never be displayed to logged out users" #regzbot introduced: 5a70baec2294e8a7d0fcc4558741c23e752dad #regzbot from: Petri Kaukasoina #regzbot duplicate: https://bugzilla.kernel.org/show_bug.cgi?id=218658 #regzbot title: workqueue: hubernate usually hangs when going to sleep #regzbot ignore-activity

1 year, 8 months

5
11
0 0

[REGRESSION] Loss of some SMART information in v6.1.81

by Cyril Brulebois

Hi, Munin uses the following command to get sensor-type information out of SMART-aware disks (e.g. temperature): /usr/sbin/smartctl -A --nocheck=standby -d ata /dev/sda This broke following an upgrade from v6.1.76 (as found in Debian 12) to v6.1.82 (as currently found in the proposed-updates repository for the next point release of Debian 12), with smartctl's now reporting: smartctl 7.3 2022-02-28 r5338 [x86_64-linux-6.1.0-19-amd64] (local build) Copyright (C) 2002-22, Bruce Allen, Christian Franke, www.smartmontools.org Device is in SLEEP mode, exit(2) This happens on baremetal with 2 pairs of disks: - 2×ST4000VN008-2DR1 (sda, sdb) - 2×ST8000VN004-2M21 (sdc, sdd) and that's an obvious lie with one pair doing system stuff and the other one doing media stuff. This also happens within a Debian 12 QEMU VM running on a Debian 12 libvirt host, when using a SATA disk, which is what I've used to test various builds from the stable/linux-6.1.y branch and associated tags. Building stable releases, I pinpointed it as a regression between v6.1.80 and v6.1.81, then pinpointed it to commit cf33e6ca12d8. #regzbot introduced: v6.1.80..v6.1.81 #regzbot introduced: cf33e6ca12d8 This is also affecting v6.1.84 and v6.1.85 (released during my git bisect session). Reported in Debian via: https://bugs.debian.org/1068675 (which included a trace with the distribution-provided v6.1.82 package). Most recent trace, with v6.1.85 (mainline, using the distribution's config but without any patches): [ 30.547027] ------------[ cut here ]------------ [ 30.547034] WARNING: CPU: 0 PID: 697 at drivers/scsi/scsi_lib.c:214 scsi_execute_cmd+0x42/0x2c0 [scsi_mod] [ 30.547082] Modules linked in: tls tun intel_rapl_msr intel_rapl_common kvm_intel kvm irqbypass ghash_clmulni_intel sha512_ssse3 sha512_generic sha256_ssse3 sha1_ssse3 snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi aesni_intel snd_hda_codec crypto_simd cryptd rapl snd_hda_core snd_hwdep bochs drm_vram_helper pcspkr drm_ttm_helper snd_pcm iTCO_wdt snd_timer intel_pmc_bxt ttm iTCO_vendor_support snd watchdog soundcore virtio_console virtio_balloon drm_kms_helper button joydev evdev serio_raw sg binfmt_misc fuse loop drm efi_pstore dm_mod configfs qemu_fw_cfg virtio_rng ip_tables x_tables autofs4 ext4 crc32c_generic crc16 mbcache jbd2 hid_generic usbhid hid sd_mod t10_pi crc64_rocksoft crc64 crc_t10dif crct10dif_generic ahci libahci virtio_scsi virtio_blk virtio_net net_failover failover xhci_pci crct10dif_pclmul crct10dif_common crc32_pclmul libata crc32c_intel xhci_hcd psmouse i2c_i801 i2c_smbus scsi_mod scsi_common lpc_ich virtio_pci [ 30.547194] virtio_pci_legacy_dev virtio_pci_modern_dev usbcore usb_common virtio virtio_ring [ 30.547205] CPU: 0 PID: 697 Comm: smartctl Not tainted 6.1.85 #1 [ 30.547210] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 30.547217] RIP: 0010:scsi_execute_cmd+0x42/0x2c0 [scsi_mod] [ 30.547247] Code: 55 48 89 fd 53 48 83 ec 10 4c 8b 64 24 50 48 89 0c 24 4d 85 e4 0f 84 02 02 00 00 49 83 3c 24 00 74 24 41 83 7c 24 08 60 74 1c <0f> 0b bd ea ff ff ff 48 83 c4 10 89 e8 5b 5d 41 5c 41 5d 41 5e 41 [ 30.547251] RSP: 0018:ffffa70f80defbd0 EFLAGS: 00010287 [ 30.547256] RAX: ffffa70f80defc30 RBX: ffff9ab18b085000 RCX: 0000000000000000 [ 30.547259] RDX: 0000000000000022 RSI: 0000000000000022 RDI: ffff9ab18b085000 [ 30.547262] RBP: ffff9ab18b085000 R08: 0000000000000000 R09: 00000000000009c4 [ 30.547265] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa70f80defc30 [ 30.547268] R13: 0000000000000000 R14: 00000000000009c4 R15: ffffa70f80defc60 [ 30.547271] FS: 00007f8ee64ad840(0000) GS:ffff9ab1bec00000(0000) knlGS:0000000000000000 [ 30.547275] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 30.547278] CR2: 00007fff08df0bc0 CR3: 000000000439a003 CR4: 0000000000170ef0 [ 30.547291] Call Trace: [ 30.547296] <TASK> [ 30.547301] ? __warn+0x7d/0xc0 [ 30.547308] ? scsi_execute_cmd+0x42/0x2c0 [scsi_mod] [ 30.547338] ? report_bug+0xe2/0x150 [ 30.547348] ? handle_bug+0x41/0x70 [ 30.547354] ? exc_invalid_op+0x13/0x60 [ 30.547358] ? asm_exc_invalid_op+0x16/0x20 [ 30.547368] ? scsi_execute_cmd+0x42/0x2c0 [scsi_mod] [ 30.547397] ata_cmd_ioctl+0x144/0x2f0 [libata] [ 30.547448] scsi_ioctl+0x3f5/0x930 [scsi_mod] [ 30.547477] ? scsi_block_when_processing_errors+0x22/0x100 [scsi_mod] [ 30.547503] ? __mod_lruvec_page_state+0x93/0x140 [ 30.547508] ? scsi_ioctl_block_when_processing_errors+0x45/0x50 [scsi_mod] [ 30.547535] blkdev_ioctl+0x133/0x270 [ 30.547553] __x64_sys_ioctl+0x90/0xd0 [ 30.547564] do_syscall_64+0x55/0xb0 [ 30.547574] ? handle_mm_fault+0xdb/0x2d0 [ 30.547582] ? do_user_addr_fault+0x1b0/0x580 [ 30.547589] ? exit_to_user_mode_prepare+0x40/0x1e0 [ 30.547596] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 30.547608] RIP: 0033:0x7f8ee611cc5b [ 30.547617] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1c 48 8b 44 24 18 64 48 2b 04 25 28 00 00 [ 30.547621] RSP: 002b:00007fff08df0960 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 30.547626] RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f8ee611cc5b [ 30.547629] RDX: 00007fff08df0bc0 RSI: 000000000000031f RDI: 0000000000000003 [ 30.547632] RBP: 00007fff08df1040 R08: 0000000000000000 R09: 0000000000000000 [ 30.547634] R10: e7e85eefeeee1b19 R11: 0000000000000246 R12: 000056348ba28600 [ 30.547637] R13: 00007fff08df0bc0 R14: 00007fff08df12e0 R15: 0000000000000000 [ 30.547642] </TASK> [ 30.547644] ---[ end trace 0000000000000000 ]--- Cheers, -- Cyril Brulebois (kibi(a)debian.org) <https://debamax.com/> D-I release manager -- Release team member -- Freelance Consultant

1 year, 8 months

2
2
0 0

RE: [RFC] rtw88: Fix startup problems for SDIO wifi plus UART Bluetooth

by Ping-Ke Shih

Larry Finger <Larry.Finger(a)gmail.com> wrote: > As discussed in the links below, the SDIO part of RTW8821CS fails to > start correctly if such startup happens while the UART portion of > the chip is initializing. I checked with SDIO team internally, but they didn't meet this case, so we may take this workaround. SDIO team wonder if something other than BT cause this failure, and after system boots everything will be well. Could you boot the system without WiFi/BT drivers, but insmod drivers manually after booting? > --- > drivers/net/wireless/realtek/rtw88/sdio.c | 28 +++++++++++++++++++++++ > 1 file changed, 28 insertions(+) > > diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c > index 0cae5746f540..eec0ad85be72 100644 > --- a/drivers/net/wireless/realtek/rtw88/sdio.c > +++ b/drivers/net/wireless/realtek/rtw88/sdio.c > @@ -1325,6 +1325,34 @@ int rtw_sdio_probe(struct sdio_func *sdio_func, [...] > + mdelay(500); Will it better to use sleep function?

1 year, 8 months

2
2
0 0

[RFC] rtw88: Fix startup problems for SDIO wifi plus UART Bluetooth

by Larry Finger

As discussed in the links below, the SDIO part of RTW8821CS fails to start correctly if such startup happens while the UART portion of the chip is initializing. The logged results with such failure is [ 10.230516] rtw_8821cs mmc3:0001:1: Start of rtw_sdio_probe [ 10.306569] Bluetooth: HCI UART driver ver 2.3 [ 10.306717] Bluetooth: HCI UART protocol Three-wire (H5) registered [ 10.307167] of_dma_request_slave_channel: dma-names property of node '/serial@fe650000' missing or empty [ 10.307199] dw-apb-uart fe650000.serial: failed to request DMA [ 10.543474] rtw_8821cs mmc3:0001:1: Firmware version 24.8.0, H2C version 12 [ 10.730744] rtw_8821cs mmc3:0001:1: sdio read32 failed (0x11080): -110 [ 10.730923] rtw_8821cs mmc3:0001:1: sdio write32 failed (0x11080): -110 Due to the above errors, wifi fails to work. For those instances when wifi works, the following is logged: [ 10.452861] Bluetooth: HCI UART protocol Three-wire (H5) registered [ 10.453580] of_dma_request_slave_channel: dma-names property of node '/serial@fe650000' missing or empty [ 10.453621] dw-apb-uart fe650000.serial: failed to request DMA [ 10.455741] rtw_8821cs mmc3:0001:1: Start of rtw_sdio_probe [ 10.639186] rtw_8821cs mmc3:0001:1: Firmware version 24.8.0, H2C version 12 In this case, SDIO wifi works correctly. The correct case is ensured by adding an mdelay(500) statement before the call to rtw_core_init(). No adverse effects are observed. Link: https://1EHFQ.trk.elasticemail.com/tracking/click?d=1UfsVowwwMAM6kBoyumkHP3… Link: https://1EHFQ.trk.elasticemail.com/tracking/click?d=XUEf4t8W9xt0czASPOeeDt8… Fixes: 65371a3f14e7 ("wifi: rtw88: sdio: Add HCI implementation for SDIO based chipsets") Signed-off-by: Larry Finger <Larry.Finger(a)gmail.com> Cc: stable(a)vger.kernel.org # v6.4+ --- drivers/net/wireless/realtek/rtw88/sdio.c | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c index 0cae5746f540..eec0ad85be72 100644 --- a/drivers/net/wireless/realtek/rtw88/sdio.c +++ b/drivers/net/wireless/realtek/rtw88/sdio.c @@ -1325,6 +1325,34 @@ int rtw_sdio_probe(struct sdio_func *sdio_func, rtwdev->hci.ops = &rtw_sdio_ops; rtwdev->hci.type = RTW_HCI_TYPE_SDIO; + /* Insert a delay of 500 ms. Without the delay, the wifi part + * and the UART that controls Bluetooth interfere with one + * another resulting in the following being logged: + * + * Start of SDIO probe function. + * Bluetooth: HCI UART driver ver 2.3 + * Bluetooth: HCI UART protocol Three-wire (H5) registered + * of_dma_request_slave_channel: dma-names property of node '/serial@fe650000' + * missing or empty + * dw-apb-uart fe650000.serial: failed to request DMA +` * rtw_8821cs mmc3:0001:1: Firmware version 24.8.0, H2C version 12 + * rtw_8821cs mmc3:0001:1: sdio read32 failed (0x11080): -110 + * + * If the UART is finished initializing before the SDIO probe + * function startw, the following is logged: + * + * Bluetooth: HCI UART protocol Three-wire (H5) registered + * of_dma_request_slave_channel: dma-names property of node '/serial@fe650000' + * missing or empty + * dw-apb-uart fe650000.serial: failed to request DMA + * Start of SDIO probe function. + * rtw_8821cs mmc3:0001:1: Firmware version 24.8.0, H2C version 12 + * Bluetooth: hci0: RTL: examining hci_ver=08 hci_rev=000c lmp_ver=08 lmp_subver=8821 + * SDIO wifi works correctly. + * + * No adverse effects are observed from the delay. + */ + mdelay(500); ret = rtw_core_init(rtwdev); if (ret) goto err_release_hw; -- 2.44.0 https://1EHFQ.trk.elasticemail.com/tracking/unsubscribe?d=XjvOA0R6jwFES_UmJ…

1 year, 8 months

1
0
0 0

Re: KASAN: slab-out-of-bounds Write in ops_init

by Eric Dumazet

On Wed, Apr 10, 2024 at 10:31 PM Cem Topcuoglu <topcuoglu.c(a)northeastern.edu> wrote: > > Hi, > > > > We encountered a bug labelled “KASAN: slab-out-of-bounds Write in ops_init” while fuzzing kernel version 5.15.124 with Syzkaller (lines exist in 5.15.154 as well). > > > > In the net_namespace.c file, we have an if condition at line 89. Subsequently, Syzkaller encounters the bug at line 90. > > > > 89 if (old_ng->s.len > id) { > > 90 old_ng->ptr[id] = data; > > 91 return 0; > > 92 } > > > > Upon inspecting the net_generic struct, we noticed that this struct uses union which puts the array and the header (including the array length information) together. > > We suspect that with this union, modifying the ng->ptr[0] is essentially modifying ng->s.len, which might fail the check in 89. This might be the cause for Syzkaller detecting this slab-out-of-bound. > Look for MIN_PERNET_OPS_ID (this should be 3) ng->ptr[0] , [1], [2] can not be overwritten. Do you have a repro ? Also please use the latest stable (5.15.154). > Since we are CS PhD students and Linux hobbyists, we do not have a full understanding of what could lead to this. We would really appreciate if you guys can share some insights into this matter : ) > > > > We attached the syzkaller’s bug report below. > > > > ================================================================== > > BUG: KASAN: slab-out-of-bounds in net_assign_generic > > usr/src/kernel/net/core/net_namespace.c:90 [inline] > > BUG: KASAN: slab-out-of-bounds in ops_init+0x44b/0x4d0 > > usr/src/kernel/net/core/net_namespace.c:129 > > Write of size 8 at addr ffff888043c62ae8 by task (coredump)/5424 > > CPU: 1 PID: 5424 Comm: (coredump) Not tainted 5.15.124-yocto-standard #1 > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 > > Call Trace: > > <TASK> > > __dump_stack usr/src/kernel/lib/dump_stack.c:88 [inline] > > dump_stack_lvl+0x51/0x70 usr/src/kernel/lib/dump_stack.c:106 > > print_address_description.constprop.0+0x24/0x140 usr/src/kernel/mm/kasan/report.c:248 > > __kasan_report usr/src/kernel/mm/kasan/report.c:434 [inline] > > kasan_report.cold+0x7d/0x117 usr/src/kernel/mm/kasan/report.c:451 > > __asan_report_store8_noabort+0x17/0x20 usr/src/kernel/mm/kasan/report_generic.c:314 > > net_assign_generic usr/src/kernel/net/core/net_namespace.c:90 [inline] > > ops_init+0x44b/0x4d0 usr/src/kernel/net/core/net_namespace.c:129 > > setup_net+0x40a/0x970 usr/src/kernel/net/core/net_namespace.c:329 > > copy_net_ns+0x2ac/0x680 usr/src/kernel/net/core/net_namespace.c:473 > > create_new_namespaces+0x390/0xa50 usr/src/kernel/kernel/nsproxy.c:110 > > unshare_nsproxy_namespaces+0xb0/0x1d0 usr/src/kernel/kernel/nsproxy.c:226 > > ksys_unshare+0x30c/0x850 usr/src/kernel/kernel/fork.c:3094 > > __do_sys_unshare usr/src/kernel/kernel/fork.c:3168 [inline] > > __se_sys_unshare usr/src/kernel/kernel/fork.c:3166 [inline] > > __x64_sys_unshare+0x36/0x50 usr/src/kernel/kernel/fork.c:3166 > > do_syscall_x64 usr/src/kernel/arch/x86/entry/common.c:50 [inline] > > do_syscall_64+0x40/0x90 usr/src/kernel/arch/x86/entry/common.c:80 > > entry_SYSCALL_64_after_hwframe+0x61/0xcb > > RIP: 0033:0x7fbafce1b39b > > Code: 73 01 c3 48 8b 0d 85 2a 0e 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 > > 00 90 f3 0f 1e fa b8 10 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 55 2a 0e 00 f7 > > d8 64 89 01 48 > > RSP: 002b:00007ffddc8dfda8 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 > > RAX: ffffffffffffffda RBX: 0000557e645dd018 RCX: 00007fbafce1b39b > > RDX: 0000000000000000 RSI: 00007ffddc8dfd10 RDI: 0000000040000000 > > RBP: 00007ffddc8dfde0 R08: 0000000000000000 R09: 00007ffd00000067 > > R10: 0000000000000000 R11: 0000000000000246 R12: 00000000fffffff5 > > R13: 00007fbafd26ba60 R14: 0000000040000000 R15: 0000000000000000 > > </TASK> > > Allocated by task 5424: > > kasan_save_stack+0x26/0x60 usr/src/kernel/mm/kasan/common.c:38 > > kasan_set_track usr/src/kernel/mm/kasan/common.c:46 [inline] > > set_alloc_info usr/src/kernel/mm/kasan/common.c:434 [inline] > > ____kasan_kmalloc usr/src/kernel/mm/kasan/common.c:513 [inline] > > ____kasan_kmalloc usr/src/kernel/mm/kasan/common.c:472 [inline] > > __kasan_kmalloc+0xae/0xe0 usr/src/kernel/mm/kasan/common.c:522 > > kasan_kmalloc usr/src/kernel/include/linux/kasan.h:264 [inline] > > __kmalloc+0x308/0x560 usr/src/kernel/mm/slub.c:4407 > > kmalloc usr/src/kernel/include/linux/slab.h:596 [inline] > > kzalloc usr/src/kernel/include/linux/slab.h:721 [inline] > > net_alloc_generic+0x28/0x80 usr/src/kernel/net/core/net_namespace.c:74 > > net_alloc usr/src/kernel/net/core/net_namespace.c:401 [inline] > > copy_net_ns+0xc3/0x680 usr/src/kernel/net/core/net_namespace.c:460 > > create_new_namespaces+0x390/0xa50 usr/src/kernel/kernel/nsproxy.c:110 > > unshare_nsproxy_namespaces+0xb0/0x1d0 usr/src/kernel/kernel/nsproxy.c:226 > > ksys_unshare+0x30c/0x850 usr/src/kernel/kernel/fork.c:3094 > > __do_sys_unshare usr/src/kernel/kernel/fork.c:3168 [inline] > > __se_sys_unshare usr/src/kernel/kernel/fork.c:3166 [inline] > > __x64_sys_unshare+0x36/0x50 usr/src/kernel/kernel/fork.c:3166 > > do_syscall_x64 usr/src/kernel/arch/x86/entry/common.c:50 [inline] > > do_syscall_64+0x40/0x90 usr/src/kernel/arch/x86/entry/common.c:80 > > entry_SYSCALL_64_after_hwframe+0x61/0xcb > > The buggy address belongs to the object at ffff888043c62a00 > > which belongs to the cache kmalloc-256 of size 256 > > The buggy address is located 232 bytes inside of > > 256-byte region [ffff888043c62a00, ffff888043c62b00) > > The buggy address belongs to the page: > > page:000000008dd0a6b6 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x43c62 > > head:000000008dd0a6b6 order:1 compound_mapcount:0 > > flags: 0x4000000000010200(slab|head|zone=1) > > raw: 4000000000010200 ffffea0001108f00 0000000700000007 ffff888001041b40 > > raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 > > page dumped because: kasan: bad access detected > > Memory state around the buggy address: > > ffff888043c62980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > > ffff888043c62a00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > > >ffff888043c62a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc > > ^ > > ffff888043c62b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > > ffff888043c62b80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > > ================================================================== > > kmemleak: 2 new suspected memory leaks (see /sys/kernel/debug/kmemleak) > > > > Best > >

1 year, 8 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror April 2024