- Linux-stable-mirror - lists.linaro.org

FAILED: patch "[PATCH] scsi: mpi3mr: Validate SAS port assignments" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x b9e63d6c7c0e94a99e1af7c9c0c7fad13a2f2453 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102028-henchman-favorable-6119@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b9e63d6c7c0e94a99e1af7c9c0c7fad13a2f2453 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar <ranjan.kumar(a)broadcom.com> Date: Tue, 8 Oct 2024 13:13:53 +0530 Subject: [PATCH] scsi: mpi3mr: Validate SAS port assignments A sanity check on phy_mask was added in commit 3668651def2c ("scsi: mpi3mr: Sanitise num_phys"). This causes warning messages when more than 64 phys are detected and devices connected to phys greater than 64 are dropped. The phy_mask bitmap is only needed for controller phys and not required for expander phys. Controller phys can go up to a maximum of 64 and therefore u64 is good enough to contain phy_mask bitmap. To suppress those warnings and allow devices to be discovered as before the offending commit, restrict the phy_mask setting and lowest phy setting only to the controller phys. Fixes: 3668651def2c ("scsi: mpi3mr: Sanitise num_phys") Cc: stable(a)vger.kernel.org Reported-by: kernel test robot <lkp(a)intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202410051943.Mp9o5DlF-lkp@intel.com/ Reported-by: Alexander Motin <mav(a)ixsystems.com> Signed-off-by: Ranjan Kumar <ranjan.kumar(a)broadcom.com> Link: https://lore.kernel.org/r/20241008074353.200379-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index fcb0fa31536b..16e0baeb8799 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -542,8 +542,8 @@ struct mpi3mr_hba_port { * @port_list: List of ports belonging to a SAS node * @num_phys: Number of phys associated with port * @marked_responding: used while refresing the sas ports - * @lowest_phy: lowest phy ID of current sas port - * @phy_mask: phy_mask of current sas port + * @lowest_phy: lowest phy ID of current sas port, valid for controller port + * @phy_mask: phy_mask of current sas port, valid for controller port * @hba_port: HBA port entry * @remote_identify: Attached device identification * @rphy: SAS transport layer rphy object diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index ccd23def2e0c..0ba9e6a6a13c 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -590,12 +590,13 @@ static enum sas_linkrate mpi3mr_convert_phy_link_rate(u8 link_rate) * @mrioc: Adapter instance reference * @mr_sas_port: Internal Port object * @mr_sas_phy: Internal Phy object + * @host_node: Flag to indicate this is a host_node * * Return: None. */ static void mpi3mr_delete_sas_phy(struct mpi3mr_ioc *mrioc, struct mpi3mr_sas_port *mr_sas_port, - struct mpi3mr_sas_phy *mr_sas_phy) + struct mpi3mr_sas_phy *mr_sas_phy, u8 host_node) { u64 sas_address = mr_sas_port->remote_identify.sas_address; @@ -605,9 +606,13 @@ static void mpi3mr_delete_sas_phy(struct mpi3mr_ioc *mrioc, list_del(&mr_sas_phy->port_siblings); mr_sas_port->num_phys--; - mr_sas_port->phy_mask &= ~(1 << mr_sas_phy->phy_id); - if (mr_sas_port->lowest_phy == mr_sas_phy->phy_id) - mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + + if (host_node) { + mr_sas_port->phy_mask &= ~(1 << mr_sas_phy->phy_id); + + if (mr_sas_port->lowest_phy == mr_sas_phy->phy_id) + mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + } sas_port_delete_phy(mr_sas_port->port, mr_sas_phy->phy); mr_sas_phy->phy_belongs_to_port = 0; } @@ -617,12 +622,13 @@ static void mpi3mr_delete_sas_phy(struct mpi3mr_ioc *mrioc, * @mrioc: Adapter instance reference * @mr_sas_port: Internal Port object * @mr_sas_phy: Internal Phy object + * @host_node: Flag to indicate this is a host_node * * Return: None. */ static void mpi3mr_add_sas_phy(struct mpi3mr_ioc *mrioc, struct mpi3mr_sas_port *mr_sas_port, - struct mpi3mr_sas_phy *mr_sas_phy) + struct mpi3mr_sas_phy *mr_sas_phy, u8 host_node) { u64 sas_address = mr_sas_port->remote_identify.sas_address; @@ -632,9 +638,12 @@ static void mpi3mr_add_sas_phy(struct mpi3mr_ioc *mrioc, list_add_tail(&mr_sas_phy->port_siblings, &mr_sas_port->phy_list); mr_sas_port->num_phys++; - mr_sas_port->phy_mask |= (1 << mr_sas_phy->phy_id); - if (mr_sas_phy->phy_id < mr_sas_port->lowest_phy) - mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + if (host_node) { + mr_sas_port->phy_mask |= (1 << mr_sas_phy->phy_id); + + if (mr_sas_phy->phy_id < mr_sas_port->lowest_phy) + mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + } sas_port_add_phy(mr_sas_port->port, mr_sas_phy->phy); mr_sas_phy->phy_belongs_to_port = 1; } @@ -675,7 +684,7 @@ static void mpi3mr_add_phy_to_an_existing_port(struct mpi3mr_ioc *mrioc, if (srch_phy == mr_sas_phy) return; } - mpi3mr_add_sas_phy(mrioc, mr_sas_port, mr_sas_phy); + mpi3mr_add_sas_phy(mrioc, mr_sas_port, mr_sas_phy, mr_sas_node->host_node); return; } } @@ -736,7 +745,7 @@ static void mpi3mr_del_phy_from_an_existing_port(struct mpi3mr_ioc *mrioc, mpi3mr_delete_sas_port(mrioc, mr_sas_port); else mpi3mr_delete_sas_phy(mrioc, mr_sas_port, - mr_sas_phy); + mr_sas_phy, mr_sas_node->host_node); return; } } @@ -1028,7 +1037,7 @@ mpi3mr_alloc_hba_port(struct mpi3mr_ioc *mrioc, u16 port_id) /** * mpi3mr_get_hba_port_by_id - find hba port by id * @mrioc: Adapter instance reference - * @port_id - Port ID to search + * @port_id: Port ID to search * * Return: mpi3mr_hba_port reference for the matched port */ @@ -1367,7 +1376,8 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, mpi3mr_sas_port_sanity_check(mrioc, mr_sas_node, mr_sas_port->remote_identify.sas_address, hba_port); - if (mr_sas_node->num_phys >= sizeof(mr_sas_port->phy_mask) * 8) + if (mr_sas_node->host_node && mr_sas_node->num_phys >= + sizeof(mr_sas_port->phy_mask) * 8) ioc_info(mrioc, "max port count %u could be too high\n", mr_sas_node->num_phys); @@ -1377,7 +1387,7 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, (mr_sas_node->phy[i].hba_port != hba_port)) continue; - if (i >= sizeof(mr_sas_port->phy_mask) * 8) { + if (mr_sas_node->host_node && (i >= sizeof(mr_sas_port->phy_mask) * 8)) { ioc_warn(mrioc, "skipping port %u, max allowed value is %zu\n", i, sizeof(mr_sas_port->phy_mask) * 8); goto out_fail; @@ -1385,7 +1395,8 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, list_add_tail(&mr_sas_node->phy[i].port_siblings, &mr_sas_port->phy_list); mr_sas_port->num_phys++; - mr_sas_port->phy_mask |= (1 << i); + if (mr_sas_node->host_node) + mr_sas_port->phy_mask |= (1 << i); } if (!mr_sas_port->num_phys) { @@ -1394,7 +1405,8 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, goto out_fail; } - mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + if (mr_sas_node->host_node) + mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; if (mr_sas_port->remote_identify.device_type == SAS_END_DEVICE) { tgtdev = mpi3mr_get_tgtdev_by_addr(mrioc,

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] scsi: mpi3mr: Validate SAS port assignments" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x b9e63d6c7c0e94a99e1af7c9c0c7fad13a2f2453 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102027-late-goggles-8ae6@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b9e63d6c7c0e94a99e1af7c9c0c7fad13a2f2453 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar <ranjan.kumar(a)broadcom.com> Date: Tue, 8 Oct 2024 13:13:53 +0530 Subject: [PATCH] scsi: mpi3mr: Validate SAS port assignments A sanity check on phy_mask was added in commit 3668651def2c ("scsi: mpi3mr: Sanitise num_phys"). This causes warning messages when more than 64 phys are detected and devices connected to phys greater than 64 are dropped. The phy_mask bitmap is only needed for controller phys and not required for expander phys. Controller phys can go up to a maximum of 64 and therefore u64 is good enough to contain phy_mask bitmap. To suppress those warnings and allow devices to be discovered as before the offending commit, restrict the phy_mask setting and lowest phy setting only to the controller phys. Fixes: 3668651def2c ("scsi: mpi3mr: Sanitise num_phys") Cc: stable(a)vger.kernel.org Reported-by: kernel test robot <lkp(a)intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202410051943.Mp9o5DlF-lkp@intel.com/ Reported-by: Alexander Motin <mav(a)ixsystems.com> Signed-off-by: Ranjan Kumar <ranjan.kumar(a)broadcom.com> Link: https://lore.kernel.org/r/20241008074353.200379-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index fcb0fa31536b..16e0baeb8799 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -542,8 +542,8 @@ struct mpi3mr_hba_port { * @port_list: List of ports belonging to a SAS node * @num_phys: Number of phys associated with port * @marked_responding: used while refresing the sas ports - * @lowest_phy: lowest phy ID of current sas port - * @phy_mask: phy_mask of current sas port + * @lowest_phy: lowest phy ID of current sas port, valid for controller port + * @phy_mask: phy_mask of current sas port, valid for controller port * @hba_port: HBA port entry * @remote_identify: Attached device identification * @rphy: SAS transport layer rphy object diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index ccd23def2e0c..0ba9e6a6a13c 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -590,12 +590,13 @@ static enum sas_linkrate mpi3mr_convert_phy_link_rate(u8 link_rate) * @mrioc: Adapter instance reference * @mr_sas_port: Internal Port object * @mr_sas_phy: Internal Phy object + * @host_node: Flag to indicate this is a host_node * * Return: None. */ static void mpi3mr_delete_sas_phy(struct mpi3mr_ioc *mrioc, struct mpi3mr_sas_port *mr_sas_port, - struct mpi3mr_sas_phy *mr_sas_phy) + struct mpi3mr_sas_phy *mr_sas_phy, u8 host_node) { u64 sas_address = mr_sas_port->remote_identify.sas_address; @@ -605,9 +606,13 @@ static void mpi3mr_delete_sas_phy(struct mpi3mr_ioc *mrioc, list_del(&mr_sas_phy->port_siblings); mr_sas_port->num_phys--; - mr_sas_port->phy_mask &= ~(1 << mr_sas_phy->phy_id); - if (mr_sas_port->lowest_phy == mr_sas_phy->phy_id) - mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + + if (host_node) { + mr_sas_port->phy_mask &= ~(1 << mr_sas_phy->phy_id); + + if (mr_sas_port->lowest_phy == mr_sas_phy->phy_id) + mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + } sas_port_delete_phy(mr_sas_port->port, mr_sas_phy->phy); mr_sas_phy->phy_belongs_to_port = 0; } @@ -617,12 +622,13 @@ static void mpi3mr_delete_sas_phy(struct mpi3mr_ioc *mrioc, * @mrioc: Adapter instance reference * @mr_sas_port: Internal Port object * @mr_sas_phy: Internal Phy object + * @host_node: Flag to indicate this is a host_node * * Return: None. */ static void mpi3mr_add_sas_phy(struct mpi3mr_ioc *mrioc, struct mpi3mr_sas_port *mr_sas_port, - struct mpi3mr_sas_phy *mr_sas_phy) + struct mpi3mr_sas_phy *mr_sas_phy, u8 host_node) { u64 sas_address = mr_sas_port->remote_identify.sas_address; @@ -632,9 +638,12 @@ static void mpi3mr_add_sas_phy(struct mpi3mr_ioc *mrioc, list_add_tail(&mr_sas_phy->port_siblings, &mr_sas_port->phy_list); mr_sas_port->num_phys++; - mr_sas_port->phy_mask |= (1 << mr_sas_phy->phy_id); - if (mr_sas_phy->phy_id < mr_sas_port->lowest_phy) - mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + if (host_node) { + mr_sas_port->phy_mask |= (1 << mr_sas_phy->phy_id); + + if (mr_sas_phy->phy_id < mr_sas_port->lowest_phy) + mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + } sas_port_add_phy(mr_sas_port->port, mr_sas_phy->phy); mr_sas_phy->phy_belongs_to_port = 1; } @@ -675,7 +684,7 @@ static void mpi3mr_add_phy_to_an_existing_port(struct mpi3mr_ioc *mrioc, if (srch_phy == mr_sas_phy) return; } - mpi3mr_add_sas_phy(mrioc, mr_sas_port, mr_sas_phy); + mpi3mr_add_sas_phy(mrioc, mr_sas_port, mr_sas_phy, mr_sas_node->host_node); return; } } @@ -736,7 +745,7 @@ static void mpi3mr_del_phy_from_an_existing_port(struct mpi3mr_ioc *mrioc, mpi3mr_delete_sas_port(mrioc, mr_sas_port); else mpi3mr_delete_sas_phy(mrioc, mr_sas_port, - mr_sas_phy); + mr_sas_phy, mr_sas_node->host_node); return; } } @@ -1028,7 +1037,7 @@ mpi3mr_alloc_hba_port(struct mpi3mr_ioc *mrioc, u16 port_id) /** * mpi3mr_get_hba_port_by_id - find hba port by id * @mrioc: Adapter instance reference - * @port_id - Port ID to search + * @port_id: Port ID to search * * Return: mpi3mr_hba_port reference for the matched port */ @@ -1367,7 +1376,8 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, mpi3mr_sas_port_sanity_check(mrioc, mr_sas_node, mr_sas_port->remote_identify.sas_address, hba_port); - if (mr_sas_node->num_phys >= sizeof(mr_sas_port->phy_mask) * 8) + if (mr_sas_node->host_node && mr_sas_node->num_phys >= + sizeof(mr_sas_port->phy_mask) * 8) ioc_info(mrioc, "max port count %u could be too high\n", mr_sas_node->num_phys); @@ -1377,7 +1387,7 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, (mr_sas_node->phy[i].hba_port != hba_port)) continue; - if (i >= sizeof(mr_sas_port->phy_mask) * 8) { + if (mr_sas_node->host_node && (i >= sizeof(mr_sas_port->phy_mask) * 8)) { ioc_warn(mrioc, "skipping port %u, max allowed value is %zu\n", i, sizeof(mr_sas_port->phy_mask) * 8); goto out_fail; @@ -1385,7 +1395,8 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, list_add_tail(&mr_sas_node->phy[i].port_siblings, &mr_sas_port->phy_list); mr_sas_port->num_phys++; - mr_sas_port->phy_mask |= (1 << i); + if (mr_sas_node->host_node) + mr_sas_port->phy_mask |= (1 << i); } if (!mr_sas_port->num_phys) { @@ -1394,7 +1405,8 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, goto out_fail; } - mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; + if (mr_sas_node->host_node) + mr_sas_port->lowest_phy = ffs(mr_sas_port->phy_mask) - 1; if (mr_sas_port->remote_identify.device_type == SAS_END_DEVICE) { tgtdev = mpi3mr_get_tgtdev_by_addr(mrioc,

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x e972b08b91ef48488bae9789f03cfedb148667fb # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102019-roundness-penholder-bb3f@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e972b08b91ef48488bae9789f03cfedb148667fb Mon Sep 17 00:00:00 2001 From: Omar Sandoval <osandov(a)fb.com> Date: Tue, 15 Oct 2024 10:59:46 -0700 Subject: [PATCH] blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function race We're seeing crashes from rq_qos_wake_function that look like this: BUG: unable to handle page fault for address: ffffafe180a40084 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 100000067 P4D 100000067 PUD 10027c067 PMD 10115d067 PTE 0 Oops: Oops: 0002 [#1] PREEMPT SMP PTI CPU: 17 UID: 0 PID: 0 Comm: swapper/17 Not tainted 6.12.0-rc3-00013-geca631b8fe80 #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:_raw_spin_lock_irqsave+0x1d/0x40 Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 41 54 9c 41 5c fa 65 ff 05 62 97 30 4c 31 c0 ba 01 00 00 00 <f0> 0f b1 17 75 0a 4c 89 e0 41 5c c3 cc cc cc cc 89 c6 e8 2c 0b 00 RSP: 0018:ffffafe180580ca0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffffafe180a3f7a8 RCX: 0000000000000011 RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffffafe180a40084 RBP: 0000000000000000 R08: 00000000001e7240 R09: 0000000000000011 R10: 0000000000000028 R11: 0000000000000888 R12: 0000000000000002 R13: ffffafe180a40084 R14: 0000000000000000 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff9aaf1f280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffafe180a40084 CR3: 000000010e428002 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <IRQ> try_to_wake_up+0x5a/0x6a0 rq_qos_wake_function+0x71/0x80 __wake_up_common+0x75/0xa0 __wake_up+0x36/0x60 scale_up.part.0+0x50/0x110 wb_timer_fn+0x227/0x450 ... So rq_qos_wake_function() calls wake_up_process(data->task), which calls try_to_wake_up(), which faults in raw_spin_lock_irqsave(&p->pi_lock). p comes from data->task, and data comes from the waitqueue entry, which is stored on the waiter's stack in rq_qos_wait(). Analyzing the core dump with drgn, I found that the waiter had already woken up and moved on to a completely unrelated code path, clobbering what was previously data->task. Meanwhile, the waker was passing the clobbered garbage in data->task to wake_up_process(), leading to the crash. What's happening is that in between rq_qos_wake_function() deleting the waitqueue entry and calling wake_up_process(), rq_qos_wait() is finding that it already got a token and returning. The race looks like this: rq_qos_wait() rq_qos_wake_function() ============================================================== prepare_to_wait_exclusive() data->got_token = true; list_del_init(&curr->entry); if (data.got_token) break; finish_wait(&rqw->wait, &data.wq); ^- returns immediately because list_empty_careful(&wq_entry->entry) is true ... return, go do something else ... wake_up_process(data->task) (NO LONGER VALID!)-^ Normally, finish_wait() is supposed to synchronize against the waker. But, as noted above, it is returning immediately because the waitqueue entry has already been removed from the waitqueue. The bug is that rq_qos_wake_function() is accessing the waitqueue entry AFTER deleting it. Note that autoremove_wake_function() wakes the waiter and THEN deletes the waitqueue entry, which is the proper order. Fix it by swapping the order. We also need to use list_del_init_careful() to match the list_empty_careful() in finish_wait(). Fixes: 38cfb5a45ee0 ("blk-wbt: improve waking of tasks") Cc: stable(a)vger.kernel.org Signed-off-by: Omar Sandoval <osandov(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com> Link: https://lore.kernel.org/r/d3bee2463a67b1ee597211823bf7ad3721c26e41.17290145… Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 2cfb297d9a62..058f92c4f9d5 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, data->got_token = true; smp_wmb(); - list_del_init(&curr->entry); wake_up_process(data->task); + list_del_init_careful(&curr->entry); return 1; }

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 2c02f7375e658ae93d57a31a66f91b62754ef8f1 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102001-badly-overvalue-6662@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt(a)goodmis.org> Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '<idle>' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) <idle>-0 | 0.811 us | __enqueue_entity(); 1) <idle>-0 | 5.626 us | } /* enqueue_entity */ 1) <idle>-0 | | dl_server_update_idle_time() { 1) <idle>-0 | | dl_scaled_delta_exec() { 1) <idle>-0 | 0.450 us | arch_scale_cpu_capacity(); 1) <idle>-0 | 1.242 us | } 1) <idle>-0 | 1.908 us | } 1) <idle>-0 | | dl_server_start() { 1) <idle>-0 | | enqueue_dl_entity() { 1) <idle>-0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable(a)vger.kernel.org Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 2c02f7375e658ae93d57a31a66f91b62754ef8f1 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102000-mortician-chant-190e@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt(a)goodmis.org> Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '<idle>' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) <idle>-0 | 0.811 us | __enqueue_entity(); 1) <idle>-0 | 5.626 us | } /* enqueue_entity */ 1) <idle>-0 | | dl_server_update_idle_time() { 1) <idle>-0 | | dl_scaled_delta_exec() { 1) <idle>-0 | 0.450 us | arch_scale_cpu_capacity(); 1) <idle>-0 | 1.242 us | } 1) <idle>-0 | 1.908 us | } 1) <idle>-0 | | dl_server_start() { 1) <idle>-0 | | enqueue_dl_entity() { 1) <idle>-0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable(a)vger.kernel.org Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 2c02f7375e658ae93d57a31a66f91b62754ef8f1 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102058-headphone-embody-6747@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt(a)goodmis.org> Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '<idle>' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) <idle>-0 | 0.811 us | __enqueue_entity(); 1) <idle>-0 | 5.626 us | } /* enqueue_entity */ 1) <idle>-0 | | dl_server_update_idle_time() { 1) <idle>-0 | | dl_scaled_delta_exec() { 1) <idle>-0 | 0.450 us | arch_scale_cpu_capacity(); 1) <idle>-0 | 1.242 us | } 1) <idle>-0 | 1.908 us | } 1) <idle>-0 | | dl_server_start() { 1) <idle>-0 | | enqueue_dl_entity() { 1) <idle>-0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable(a)vger.kernel.org Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 2c02f7375e658ae93d57a31a66f91b62754ef8f1 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102057-skipper-growl-db34@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt(a)goodmis.org> Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '<idle>' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) <idle>-0 | 0.811 us | __enqueue_entity(); 1) <idle>-0 | 5.626 us | } /* enqueue_entity */ 1) <idle>-0 | | dl_server_update_idle_time() { 1) <idle>-0 | | dl_scaled_delta_exec() { 1) <idle>-0 | 0.450 us | arch_scale_cpu_capacity(); 1) <idle>-0 | 1.242 us | } 1) <idle>-0 | 1.908 us | } 1) <idle>-0 | | dl_server_start() { 1) <idle>-0 | | enqueue_dl_entity() { 1) <idle>-0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable(a)vger.kernel.org Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 2c02f7375e658ae93d57a31a66f91b62754ef8f1 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102055-nugget-delicious-edfe@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt(a)goodmis.org> Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '<idle>' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) <idle>-0 | 0.811 us | __enqueue_entity(); 1) <idle>-0 | 5.626 us | } /* enqueue_entity */ 1) <idle>-0 | | dl_server_update_idle_time() { 1) <idle>-0 | | dl_scaled_delta_exec() { 1) <idle>-0 | 0.450 us | arch_scale_cpu_capacity(); 1) <idle>-0 | 1.242 us | } 1) <idle>-0 | 1.908 us | } 1) <idle>-0 | | dl_server_start() { 1) <idle>-0 | | enqueue_dl_entity() { 1) <idle>-0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable(a)vger.kernel.org Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 2c02f7375e658ae93d57a31a66f91b62754ef8f1 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102054-nineteen-exemplary-3f78@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt <rostedt(a)goodmis.org> Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '<idle>' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) <idle>-0 | 0.811 us | __enqueue_entity(); 1) <idle>-0 | 5.626 us | } /* enqueue_entity */ 1) <idle>-0 | | dl_server_update_idle_time() { 1) <idle>-0 | | dl_scaled_delta_exec() { 1) <idle>-0 | 0.450 us | arch_scale_cpu_capacity(); 1) <idle>-0 | 1.242 us | } 1) <idle>-0 | 1.908 us | } 1) <idle>-0 | | dl_server_start() { 1) <idle>-0 | | enqueue_dl_entity() { 1) <idle>-0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable(a)vger.kernel.org Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> Cc: Mark Rutland <mark.rutland(a)arm.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)

11 months, 1 week

1
0
0 0

[PATCH 0/6] phy: core: Fix bugs for several APIs and simplify an API

by Zijun Hu

This patch series is to fix bugs for below APIs: devm_phy_put() devm_of_phy_provider_unregister() devm_phy_destroy() phy_get() of_phy_get() devm_of_phy_get_by_index() And simplify API of_phy_simple_xlate(). Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com> --- Zijun Hu (6): phy: core: Fix API devm_phy_put() can not release the phy phy: core: Fix API devm_of_phy_provider_unregister() can not unregister the phy provider phy: core: Fix API devm_phy_destroy() can not destroy the phy phy: core: Add missing of_node_put() for an error handling path of _of_phy_get() phy: core: Add missing of_node_put() in of_phy_provider_lookup() phy: core: Simplify API of_phy_simple_xlate() implementation drivers/phy/phy-core.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) --- base-commit: d8f9d6d826fc15780451802796bb88ec52978f17 change-id: 20241020-phy_core_fix-e3ad65db98f7 Best regards, -- Zijun Hu <quic_zijuhu(a)quicinc.com>

11 months, 1 week

3
10
0 0

[PATCH mm-unstable v1] mm: allow set/clear page_type again

by Yu Zhao

Some page flags (page->flags) were converted to page types (page->page_types). A recent example is PG_hugetlb. From the exclusive writer's perspective, e.g., a thread doing __folio_set_hugetlb(), there is a difference between the page flag and type APIs: the former allows the same non-atomic operation to be repeated whereas the latter does not. For example, calling __folio_set_hugetlb() twice triggers VM_BUG_ON_FOLIO(), since the second call expects the type (PG_hugetlb) not to be set previously. Using add_hugetlb_folio() as an example, it calls __folio_set_hugetlb() in the following error-handling path. And when that happens, it triggers the aforementioned VM_BUG_ON_FOLIO(). if (folio_test_hugetlb(folio)) { rc = hugetlb_vmemmap_restore_folio(h, folio); if (rc) { spin_lock_irq(&hugetlb_lock); add_hugetlb_folio(h, folio, false); ... It is possible to make hugeTLB comply with the new requirements from the page type API. However, a straightforward fix would be to just allow the same page type to be set or cleared again inside the API, to avoid any changes to its callers. Fixes: d99e3140a4d3 ("mm: turn folio_test_hugetlb into a PageType") Signed-off-by: Yu Zhao <yuzhao(a)google.com> Cc: <stable(a)vger.kernel.org> --- include/linux/page-flags.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ccf3c78faefc..e80665bc51fa 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -977,12 +977,16 @@ static __always_inline bool folio_test_##fname(const struct folio *folio) \ } \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ + if (folio_test_##fname(folio)) \ + return; \ VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ folio); \ folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ + if (folio->page.page_type == UINT_MAX) \ + return; \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ folio->page.page_type = UINT_MAX; \ } @@ -995,11 +999,15 @@ static __always_inline int Page##uname(const struct page *page) \ } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ + if (Page##uname(page)) \ + return; \ VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ page->page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ + if (page->page_type == UINT_MAX) \ + return; \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type = UINT_MAX; \ } -- 2.47.0.rc1.288.g06298d1525-goog

11 months, 1 week

1
0
0 0

[PATCH net] netfilter: xtables: fix a bad copypaste in xt_nflog module

by Ignat Korchagin

For the nflog_tg_reg struct under the CONFIG_IP6_NF_IPTABLES switch family should probably be NFPROTO_IPV6 Fixes: 0bfcb7b71e73 ("netfilter: xtables: avoid NFPROTO_UNSPEC where needed") Cc: stable(a)vger.kernel.org Signed-off-by: Ignat Korchagin <ignat(a)cloudflare.com> --- net/netfilter/xt_NFLOG.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index d80abd6ccaf8..6dcf4bc7e30b 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -79,7 +79,7 @@ static struct xt_target nflog_tg_reg[] __read_mostly = { { .name = "NFLOG", .revision = 0, - .family = NFPROTO_IPV4, + .family = NFPROTO_IPV6, .checkentry = nflog_tg_check, .destroy = nflog_tg_destroy, .target = nflog_tg, -- 2.39.5

11 months, 1 week

2
1
0 0

[PATCH v6 5/5] tpm: flush the auth session only when /dev/tpm0 is open

by Jarkko Sakkinen

Instead of flushing and reloading the auth session for every single transaction, keep the session open unless /dev/tpm0 is used. In practice this means applying TPM2_SA_CONTINUE_SESSION to the session attributes. Flush the session always when /dev/tpm0 is written. Reported-by: Pengyu Ma <mapengyu(a)gmail.com> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219229 Cc: stable(a)vger.kernel.org # v6.10+ Fixes: 7ca110f2679b ("tpm: Address !chip->auth in tpm_buf_append_hmac_session*()") Tested-by: Pengyu Ma <mapengyu(a)gmail.com> Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org> --- v5: - No changes. v4: - Changed as bug. v3: - Refined the commit message. - Removed the conditional for applying TPM2_SA_CONTINUE_SESSION only when /dev/tpm0 is open. It is not required as the auth session is flushed, not saved. v2: - A new patch. --- drivers/char/tpm/tpm-chip.c | 1 + drivers/char/tpm/tpm-dev-common.c | 1 + drivers/char/tpm/tpm-interface.c | 1 + drivers/char/tpm/tpm2-sessions.c | 3 +++ 4 files changed, 6 insertions(+) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 0ea00e32f575..7a6bb30d1f32 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -680,6 +680,7 @@ void tpm_chip_unregister(struct tpm_chip *chip) rc = tpm_try_get_ops(chip); if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) { + tpm2_end_auth_session(chip); tpm2_flush_context(chip, chip->null_key); chip->null_key = 0; } diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index 4bc07963e260..c6fdeb4feaef 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -29,6 +29,7 @@ static ssize_t tpm_dev_transmit(struct tpm_chip *chip, struct tpm_space *space, #ifdef CONFIG_TCG_TPM2_HMAC if (chip->flags & TPM_CHIP_FLAG_TPM2) { + tpm2_end_auth_session(chip); tpm2_flush_context(chip, chip->null_key); chip->null_key = 0; } diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index bfa47d48b0f2..2363018fa8fb 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -381,6 +381,7 @@ int tpm_pm_suspend(struct device *dev) if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) { #ifdef CONFIG_TCG_TPM2_HMAC + tpm2_end_auth_session(chip); tpm2_flush_context(chip, chip->null_key); chip->null_key = 0; #endif diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 78d344607b53..844cfc338f33 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -333,6 +333,9 @@ void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, } #ifdef CONFIG_TCG_TPM2_HMAC + /* The first write to /dev/tpm{rm0} will flush the session. */ + attributes |= TPM2_SA_CONTINUE_SESSION; + /* * The Architecture Guide requires us to strip trailing zeros * before computing the HMAC -- 2.47.0

11 months, 1 week

1
1
0 0

[PATCH v6 1/5] tpm: Return on tpm2_create_null_primary() failure

by Jarkko Sakkinen

tpm2_sessions_init() does not ignore the result of tpm2_create_null_primary(). Address this by returning -ENODEV to the caller. Given that upper layers cannot help healing the situation further, deal with the TPM error here by Cc: stable(a)vger.kernel.org # v6.10+ Fixes: d2add27cf2b8 ("tpm: Add NULL primary creation") Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org> --- v6: - Address: https://lore.kernel.org/linux-integrity/69c893e7-6b87-4daa-80db-44d1120e80f… as TPM RC is taken care of at the call site. Add also the missing documentation for the return values. v5: - Do not print klog messages on error, as tpm2_save_context() already takes care of this. v4: - Fixed up stable version. v3: - Handle TPM and POSIX error separately and return -ENODEV always back to the caller. v2: - Refined the commit message. --- drivers/char/tpm/tpm2-sessions.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 511c67061728..253639767c1e 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -1347,6 +1347,11 @@ static int tpm2_create_null_primary(struct tpm_chip *chip) * * Derive and context save the null primary and allocate memory in the * struct tpm_chip for the authorizations. + * + * Return: + * * 0 - OK + * * -errno - A system error + * * TPM_RC - A TPM error */ int tpm2_sessions_init(struct tpm_chip *chip) { @@ -1354,7 +1359,7 @@ int tpm2_sessions_init(struct tpm_chip *chip) rc = tpm2_create_null_primary(chip); if (rc) - dev_err(&chip->dev, "TPM: security failed (NULL seed derivation): %d\n", rc); + return rc; chip->auth = kmalloc(sizeof(*chip->auth), GFP_KERNEL); if (!chip->auth) -- 2.47.0

11 months, 1 week

1
1
0 0

[PATCH 1/2] KVM: arm64: Don't retire aborted MMIO instruction

by Oliver Upton

Returning an abort to the guest for an unsupported MMIO access is a documented feature of the KVM UAPI. Nevertheless, it's clear that this plumbing has seen limited testing, since userspace can trivially cause a WARN in the MMIO return: WARNING: CPU: 0 PID: 30558 at arch/arm64/include/asm/kvm_emulate.h:536 kvm_handle_mmio_return+0x46c/0x5c4 arch/arm64/include/asm/kvm_emulate.h:536 Call trace: kvm_handle_mmio_return+0x46c/0x5c4 arch/arm64/include/asm/kvm_emulate.h:536 kvm_arch_vcpu_ioctl_run+0x98/0x15b4 arch/arm64/kvm/arm.c:1133 kvm_vcpu_ioctl+0x75c/0xa78 virt/kvm/kvm_main.c:4487 __do_sys_ioctl fs/ioctl.c:51 [inline] __se_sys_ioctl fs/ioctl.c:893 [inline] __arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:893 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x1e0/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x38/0x68 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x90/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 The splat is complaining that KVM is advancing PC while an exception is pending, i.e. that KVM is retiring the MMIO instruction despite a pending external abort. Womp womp. Fix the glaring UAPI bug by skipping over all the MMIO emulation in case there is a pending synchronous exception. Note that while userspace is capable of pending an asynchronous exception (SError, IRQ, or FIQ), it is still safe to retire the MMIO instruction in this case as (1) they are by definition asynchronous, and (2) KVM relies on hardware support for pending/delivering these exceptions instead of the software state machine for advancing PC. Cc: stable(a)vger.kernel.org Fixes: da345174ceca ("KVM: arm/arm64: Allow user injection of external data aborts") Reported-by: Alexander Potapenko <glider(a)google.com> Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev> --- arch/arm64/include/asm/kvm_emulate.h | 25 +++++++++++++++++++++++++ arch/arm64/kvm/mmio.c | 7 +++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index a601a9305b10..1b229099f684 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -544,6 +544,31 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) +static inline bool kvm_pending_sync_exception(struct kvm_vcpu *vcpu) +{ + if (!vcpu_get_flag(vcpu, PENDING_EXCEPTION)) + return false; + + if (vcpu_el1_is_32bit(vcpu)) { + switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) { + case unpack_vcpu_flag(EXCEPT_AA32_UND): + case unpack_vcpu_flag(EXCEPT_AA32_IABT): + case unpack_vcpu_flag(EXCEPT_AA32_DABT): + return true; + default: + return false; + } + } else { + switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) { + case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC): + case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC): + return true; + default: + return false; + } + } +} + #define __build_check_all_or_none(r, bits) \ BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits)) diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index cd6b7b83e2c3..0155ba665717 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -84,8 +84,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) unsigned int len; int mask; - /* Detect an already handled MMIO return */ - if (unlikely(!vcpu->mmio_needed)) + /* + * Detect if the MMIO return was already handled or if userspace aborted + * the MMIO access. + */ + if (unlikely(!vcpu->mmio_needed || kvm_pending_sync_exception(vcpu))) return 1; vcpu->mmio_needed = 0; -- 2.47.0.rc1.288.g06298d1525-goog

11 months, 1 week

2
2
0 0

[PATCH] xhci: Fix Link TRB DMA in command ring stopped completion event

by Faisal Hassan

During the aborting of a command, the software receives a command completion event for the command ring stopped, with the TRB pointing to the next TRB after the aborted command. If the command we abort is located just before the Link TRB in the command ring, then during the 'command ring stopped' completion event, the xHC gives the Link TRB in the event's cmd DMA, which causes a mismatch in handling command completion event. To handle this situation, an additional check has been added to ignore the mismatch error and continue the operation. Cc: stable(a)vger.kernel.org Signed-off-by: Faisal Hassan <quic_faisalh(a)quicinc.com> --- drivers/usb/host/xhci-ring.c | 38 +++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b2950c35c740..43926c378df9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -126,6 +126,32 @@ static void inc_td_cnt(struct urb *urb) urb_priv->num_tds_done++; } +/* + * Return true if the DMA is pointing to a Link TRB in the ring; + * otherwise, return false. + */ +static bool is_dma_link_trb(struct xhci_ring *ring, dma_addr_t dma) +{ + struct xhci_segment *seg; + union xhci_trb *trb; + dma_addr_t trb_dma; + int i; + + seg = ring->first_seg; + do { + for (i = 0; i < TRBS_PER_SEGMENT; i++) { + trb = &seg->trbs[i]; + trb_dma = seg->dma + (i * sizeof(union xhci_trb)); + + if (trb_is_link(trb) && trb_dma == dma) + return true; + } + seg = seg->next; + } while (seg != ring->first_seg); + + return false; +} + static void trb_to_noop(union xhci_trb *trb, u32 noop_type) { if (trb_is_link(trb)) { @@ -1718,13 +1744,21 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, trace_xhci_handle_command(xhci->cmd_ring, &cmd_trb->generic); + cmd_comp_code = GET_COMP_CODE(le32_to_cpu(event->status)); cmd_dequeue_dma = xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg, cmd_trb); /* * Check whether the completion event is for our internal kept * command. + * For the 'command ring stopped' completion event, there is a + * risk of a mismatch in dequeue pointers if we abort the command + * just before the link TRB in the command ring. In this scenario, + * the cmd_dma in the event would point to a link TRB, while the + * software dequeue pointer circles back to the start. */ - if (!cmd_dequeue_dma || cmd_dma != (u64)cmd_dequeue_dma) { + if ((!cmd_dequeue_dma || cmd_dma != (u64)cmd_dequeue_dma) && + !(cmd_comp_code == COMP_COMMAND_RING_STOPPED && + is_dma_link_trb(xhci->cmd_ring, cmd_dma))) { xhci_warn(xhci, "ERROR mismatched command completion event\n"); return; @@ -1734,8 +1768,6 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cancel_delayed_work(&xhci->cmd_timer); - cmd_comp_code = GET_COMP_CODE(le32_to_cpu(event->status)); - /* If CMD ring stopped we own the trbs between enqueue and dequeue */ if (cmd_comp_code == COMP_COMMAND_RING_STOPPED) { complete_all(&xhci->cmd_ring_stop_completion); -- 2.17.1

11 months, 1 week

3
4
0 0

FAILED: patch "[PATCH] nilfs2: propagate directory read errors from" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 08cfa12adf888db98879dbd735bc741360a34168 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101851-wok-iguana-839c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 08cfa12adf888db98879dbd735bc741360a34168 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Date: Fri, 4 Oct 2024 12:35:31 +0900 Subject: [PATCH] nilfs2: propagate directory read errors from nilfs_find_entry() Syzbot reported that a task hang occurs in vcs_open() during a fuzzing test for nilfs2. The root cause of this problem is that in nilfs_find_entry(), which searches for directory entries, ignores errors when loading a directory page/folio via nilfs_get_folio() fails. If the filesystem images is corrupted, and the i_size of the directory inode is large, and the directory page/folio is successfully read but fails the sanity check, for example when it is zero-filled, nilfs_check_folio() may continue to spit out error messages in bursts. Fix this issue by propagating the error to the callers when loading a page/folio fails in nilfs_find_entry(). The current interface of nilfs_find_entry() and its callers is outdated and cannot propagate error codes such as -EIO and -ENOMEM returned via nilfs_find_entry(), so fix it together. Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Reported-by: Lizhi Xu <lizhi.xu(a)windriver.com> Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com Reported-by: syzbot+8a192e8d090fa9a31135(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135 Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index fe5b1a30c509..a8602729586a 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -289,7 +289,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) * The folio is mapped and unlocked. When the caller is finished with * the entry, it should call folio_release_kmap(). * - * On failure, returns NULL and the caller should ignore foliop. + * On failure, returns an error pointer and the caller should ignore foliop. */ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct folio **foliop) @@ -312,22 +312,24 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, do { char *kaddr = nilfs_get_folio(dir, n, foliop); - if (!IS_ERR(kaddr)) { - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, - "zero-length directory entry"); - folio_release_kmap(*foliop, kaddr); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - folio_release_kmap(*foliop, kaddr); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; /* next folio is past the blocks we've got */ @@ -340,7 +342,7 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: ei->i_dir_start_lookup = n; @@ -384,18 +386,18 @@ fail: return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; struct folio *folio; de = nilfs_find_entry(dir, qstr, &folio); - if (de) { - res = le64_to_cpu(de->inode); - folio_release_kmap(folio, de); - } - return res; + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index c950139db6ef..4905063790c5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + } + return d_splice_alias(inode, dentry); } @@ -263,10 +271,11 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) struct folio *folio; int err; - err = -ENOENT; de = nilfs_find_entry(dir, &dentry->d_name, &folio); - if (!de) + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } inode = d_inode(dentry); err = -EIO; @@ -362,10 +371,11 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (unlikely(err)) return err; - err = -ENOENT; old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); - if (!old_de) + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } if (S_ISDIR(old_inode->i_mode)) { err = -EIO; @@ -382,10 +392,12 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (dir_de && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); goto out_dir; + } nilfs_set_link(new_dir, new_de, new_folio, old_inode); folio_release_kmap(new_folio, new_de); nilfs_mark_inode_dirty(new_dir); @@ -440,12 +452,13 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); root = NILFS_I(d_inode(child))->i_root; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index fb1c4c5bae7c..45d03826eaf1 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -254,7 +254,7 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) /* dir.c */ int nilfs_add_link(struct dentry *, struct inode *); -ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); int nilfs_make_empty(struct inode *, struct inode *); struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, struct folio **);

11 months, 1 week

2
1
0 0

FAILED: patch "[PATCH] nilfs2: propagate directory read errors from" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 08cfa12adf888db98879dbd735bc741360a34168 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101851-treble-echo-a580@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 08cfa12adf888db98879dbd735bc741360a34168 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Date: Fri, 4 Oct 2024 12:35:31 +0900 Subject: [PATCH] nilfs2: propagate directory read errors from nilfs_find_entry() Syzbot reported that a task hang occurs in vcs_open() during a fuzzing test for nilfs2. The root cause of this problem is that in nilfs_find_entry(), which searches for directory entries, ignores errors when loading a directory page/folio via nilfs_get_folio() fails. If the filesystem images is corrupted, and the i_size of the directory inode is large, and the directory page/folio is successfully read but fails the sanity check, for example when it is zero-filled, nilfs_check_folio() may continue to spit out error messages in bursts. Fix this issue by propagating the error to the callers when loading a page/folio fails in nilfs_find_entry(). The current interface of nilfs_find_entry() and its callers is outdated and cannot propagate error codes such as -EIO and -ENOMEM returned via nilfs_find_entry(), so fix it together. Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Reported-by: Lizhi Xu <lizhi.xu(a)windriver.com> Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com Reported-by: syzbot+8a192e8d090fa9a31135(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135 Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index fe5b1a30c509..a8602729586a 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -289,7 +289,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) * The folio is mapped and unlocked. When the caller is finished with * the entry, it should call folio_release_kmap(). * - * On failure, returns NULL and the caller should ignore foliop. + * On failure, returns an error pointer and the caller should ignore foliop. */ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct folio **foliop) @@ -312,22 +312,24 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, do { char *kaddr = nilfs_get_folio(dir, n, foliop); - if (!IS_ERR(kaddr)) { - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, - "zero-length directory entry"); - folio_release_kmap(*foliop, kaddr); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - folio_release_kmap(*foliop, kaddr); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; /* next folio is past the blocks we've got */ @@ -340,7 +342,7 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: ei->i_dir_start_lookup = n; @@ -384,18 +386,18 @@ fail: return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; struct folio *folio; de = nilfs_find_entry(dir, qstr, &folio); - if (de) { - res = le64_to_cpu(de->inode); - folio_release_kmap(folio, de); - } - return res; + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index c950139db6ef..4905063790c5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + } + return d_splice_alias(inode, dentry); } @@ -263,10 +271,11 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) struct folio *folio; int err; - err = -ENOENT; de = nilfs_find_entry(dir, &dentry->d_name, &folio); - if (!de) + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } inode = d_inode(dentry); err = -EIO; @@ -362,10 +371,11 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (unlikely(err)) return err; - err = -ENOENT; old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); - if (!old_de) + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } if (S_ISDIR(old_inode->i_mode)) { err = -EIO; @@ -382,10 +392,12 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (dir_de && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); goto out_dir; + } nilfs_set_link(new_dir, new_de, new_folio, old_inode); folio_release_kmap(new_folio, new_de); nilfs_mark_inode_dirty(new_dir); @@ -440,12 +452,13 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); root = NILFS_I(d_inode(child))->i_root; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index fb1c4c5bae7c..45d03826eaf1 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -254,7 +254,7 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) /* dir.c */ int nilfs_add_link(struct dentry *, struct inode *); -ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); int nilfs_make_empty(struct inode *, struct inode *); struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, struct folio **);

11 months, 1 week

2
1
0 0

[PATCH 0/2] vfs: fstatat, statx: Consistently accept AT_EMPTY_PATH and NULL path

by Xi Ruoyao

Since Linux 6.11 we support AT_EMPTY_PATH and NULL path for fstatat and statx in "some circumstances" mostly for performance and allowing seccomp audition. But to make the API easier to be documented and used, we should just treat AT_EMPTY_PATH and NULL as is AT_EMPTY_PATH and empty string even if there are no performance or seccomp benefits. Cc: Miao Wang <shankerwangmiao(a)gmail.com> Cc: linux-fsdevel(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Cc: stable(a)vger.kernel.org Xi Ruoyao (2): vfs: support fstatat(..., NULL, AT_EMPTY_PATH | AT_NO_AUTOMOUNT, ...) vfs: Make sure {statx,fstatat}(..., AT_EMPTY_PATH | ..., NULL, ...) behave as (..., AT_EMPTY_PATH | ..., "", ...) fs/stat.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) -- 2.46.2

11 months, 1 week

3
7
0 0

[PATCH net] mctp i2c: handle NULL header address

by Matt Johnston via B4 Relay

From: Matt Johnston <matt(a)codeconstruct.com.au> daddr can be NULL if there is no neighbour table entry present, in that case the tx packet should be dropped. Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Cc: stable(a)vger.kernel.org Reported-by: Dung Cao <dung(a)os.amperecomputing.com> Signed-off-by: Matt Johnston <matt(a)codeconstruct.com.au> --- drivers/net/mctp/mctp-i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git drivers/net/mctp/mctp-i2c.c drivers/net/mctp/mctp-i2c.c index 4dc057c121f5..e70fb6687994 100644 --- drivers/net/mctp/mctp-i2c.c +++ drivers/net/mctp/mctp-i2c.c @@ -588,6 +588,9 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev, if (len > MCTP_I2C_MAXMTU) return -EMSGSIZE; + if (!daddr || !saddr) + return -EINVAL; + lldst = *((u8 *)daddr); llsrc = *((u8 *)saddr); --- base-commit: cb560795c8c2ceca1d36a95f0d1b2eafc4074e37 change-id: 20241018-mctp-i2c-null-dest-a0ba271e0c48 Best regards, -- Matt Johnston <matt(a)codeconstruct.com.au>

11 months, 1 week

3
2
0 0

[PATCH 3/8] drm/i915/pmu: Fix crash due to use-after-free

by Lucas De Marchi

When an i915 PMU counter is enabled and the driver is then unbound, the PMU will be unregistered via perf_pmu_unregister(), however the event will still be alive. i915 currently tries to deal with this situation by: a) Marking the pmu as "closed" and shortcut the calls from perf b) Taking a reference from i915, that is put back when the event is destroyed. c) Setting event_init to NULL to avoid any further event (a) is ugly, but may be left as is since it protects not trying to access the HW that is now gone. Unless a pmu driver can call perf_pmu_unregister() and not receive any more calls, it's a necessary ugliness. (b) doesn't really work: when the event is destroyed and the i915 ref is put it may free the i915 object, that contains the pmu, not only the event. After event->destroy() callback, perf still expects the pmu object to be alive. Instead of pigging back on the event->destroy() to take and put the device reference, implement the new get()/put() on the pmu object for that purpose. (c) is only done to have a flag to avoid some function entrypoints when pmu is unregistered. Cc: stable(a)vger.kernel.org # 5.11+ Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com> --- drivers/gpu/drm/i915/i915_pmu.c | 36 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 4d05d98f51b8e..dc9f753369170 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -515,15 +515,6 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static void i915_pmu_event_destroy(struct perf_event *event) -{ - struct i915_pmu *pmu = event_to_pmu(event); - struct drm_i915_private *i915 = pmu_to_i915(pmu); - - drm_WARN_ON(&i915->drm, event->parent); - - drm_dev_put(&i915->drm); -} static int engine_event_status(struct intel_engine_cs *engine, @@ -629,11 +620,6 @@ static int i915_pmu_event_init(struct perf_event *event) if (ret) return ret; - if (!event->parent) { - drm_dev_get(&i915->drm); - event->destroy = i915_pmu_event_destroy; - } - return 0; } @@ -872,6 +858,24 @@ static int i915_pmu_event_event_idx(struct perf_event *event) return 0; } +static struct pmu *i915_pmu_get(struct pmu *base) +{ + struct i915_pmu *pmu = container_of(base, struct i915_pmu, base); + struct drm_i915_private *i915 = pmu_to_i915(pmu); + + drm_dev_get(&i915->drm); + + return base; +} + +static void i915_pmu_put(struct pmu *base) +{ + struct i915_pmu *pmu = container_of(base, struct i915_pmu, base); + struct drm_i915_private *i915 = pmu_to_i915(pmu); + + drm_dev_put(&i915->drm); +} + struct i915_str_attribute { struct device_attribute attr; const char *str; @@ -1154,6 +1158,8 @@ static void free_pmu(struct drm_device *dev, void *res) struct i915_pmu *pmu = res; struct drm_i915_private *i915 = pmu_to_i915(pmu); + perf_pmu_free(&pmu->base); + free_event_attributes(pmu); kfree(pmu->base.attr_groups); if (IS_DGFX(i915)) @@ -1299,6 +1305,8 @@ void i915_pmu_register(struct drm_i915_private *i915) pmu->base.stop = i915_pmu_event_stop; pmu->base.read = i915_pmu_event_read; pmu->base.event_idx = i915_pmu_event_event_idx; + pmu->base.get = i915_pmu_get; + pmu->base.put = i915_pmu_put; ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) -- 2.47.0

11 months, 1 week

2
1
0 0

[PATCH 6.11.y] mm: vmscan.c: fix OOM on swap stress test

by Chris Li

[ Upstream commit 0885ef4705607936fc36a38fd74356e1c465b023 ] I found a regression on mm-unstable during my swap stress test, using tmpfs to compile linux. The test OOM very soon after the make spawns many cc processes. It bisects down to this change: 33dfe9204f29b415bbc0abb1a50642d1ba94f5e9 (mm/gup: clear the LRU flag of a page before adding to LRU batch) Yu Zhao propose the fix: "I think this is one of the potential side effects -- Huge mentioned earlier about isolate_lru_folios():" I test that with it the swap stress test no longer OOM. Link: https://lore.kernel.org/r/CAOUHufYi9h0kz5uW3LHHS3ZrVwEq-kKp8S6N-MZUmErNAXoX… Link: https://lkml.kernel.org/r/20240905-lru-flag-v2-1-8a2d9046c594@kernel.org Fixes: 33dfe9204f29 ("mm/gup: clear the LRU flag of a page before adding to LRU batch") Signed-off-by: Chris Li <chrisl(a)kernel.org> Suggested-by: Yu Zhao <yuzhao(a)google.com> Suggested-by: Hugh Dickins <hughd(a)google.com> Closes: https://lore.kernel.org/all/CAF8kJuNP5iTj2p07QgHSGOJsiUfYpJ2f4R1Q5-3BN9JiD9… Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index bd489c1af2289..a8d61a8b68944 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4300,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* ineligible */ - if (zone > sc->reclaim_idx) { + if (!folio_test_lru(folio) || zone > sc->reclaim_idx) { gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; --- base-commit: 8e24a758d14c0b1cd42ab0aea980a1030eea811f change-id: 20241015-stable-oom-fix-a6ab273b1817 Best regards, -- Chris Li <chrisl(a)kernel.org>

11 months, 1 week

3
2
0 0

+ revert-selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: Revert "selftests/mm: replace atomic_bool with pthread_barrier_t" has been added to the -mm mm-hotfixes-unstable branch. Its filename is revert-selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Edward Liaw <edliaw(a)google.com> Subject: Revert "selftests/mm: replace atomic_bool with pthread_barrier_t" Date: Fri, 18 Oct 2024 17:17:23 +0000 This reverts commit e61ef21e27e8deed8c474e9f47f4aa7bc37e138c. uffd_poll_thread may be called by other tests that do not initialize the pthread_barrier, so this approach is not correct. This will revert to using atomic_bool instead. Link: https://lkml.kernel.org/r/20241018171734.2315053-3-edliaw@google.com Fixes: e61ef21e27e8 ("selftests/mm: replace atomic_bool with pthread_barrier_t") Signed-off-by: Edward Liaw <edliaw(a)google.com> Cc: Ryan Roberts <ryan.roberts(a)arm.com> Cc: Peter Xu <peterx(a)redhat.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- tools/testing/selftests/mm/uffd-common.c | 5 ++--- tools/testing/selftests/mm/uffd-common.h | 3 ++- tools/testing/selftests/mm/uffd-unit-tests.c | 14 ++++++-------- 3 files changed, 10 insertions(+), 12 deletions(-) --- a/tools/testing/selftests/mm/uffd-common.c~revert-selftests-mm-replace-atomic_bool-with-pthread_barrier_t +++ a/tools/testing/selftests/mm/uffd-common.c @@ -18,7 +18,7 @@ bool test_uffdio_wp = true; unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; -pthread_barrier_t ready_for_fork; +atomic_bool ready_for_fork; static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -519,8 +519,7 @@ void *uffd_poll_thread(void *arg) pollfd[1].fd = pipefd[cpu*2]; pollfd[1].events = POLLIN; - /* Ready for parent thread to fork */ - pthread_barrier_wait(&ready_for_fork); + ready_for_fork = true; for (;;) { ret = poll(pollfd, 2, -1); --- a/tools/testing/selftests/mm/uffd-common.h~revert-selftests-mm-replace-atomic_bool-with-pthread_barrier_t +++ a/tools/testing/selftests/mm/uffd-common.h @@ -33,6 +33,7 @@ #include <inttypes.h> #include <stdint.h> #include <sys/random.h> +#include <stdatomic.h> #include "../kselftest.h" #include "vm_util.h" @@ -104,7 +105,7 @@ extern bool map_shared; extern bool test_uffdio_wp; extern unsigned long long *count_verify; extern volatile bool test_uffdio_copy_eexist; -extern pthread_barrier_t ready_for_fork; +extern atomic_bool ready_for_fork; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; --- a/tools/testing/selftests/mm/uffd-unit-tests.c~revert-selftests-mm-replace-atomic_bool-with-pthread_barrier_t +++ a/tools/testing/selftests/mm/uffd-unit-tests.c @@ -774,7 +774,7 @@ static void uffd_sigbus_test_common(bool char c; struct uffd_args args = { 0 }; - pthread_barrier_init(&ready_for_fork, NULL, 2); + ready_for_fork = false; fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); @@ -791,9 +791,8 @@ static void uffd_sigbus_test_common(bool if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - /* Wait for child thread to start before forking */ - pthread_barrier_wait(&ready_for_fork); - pthread_barrier_destroy(&ready_for_fork); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); if (pid < 0) @@ -834,7 +833,7 @@ static void uffd_events_test_common(bool char c; struct uffd_args args = { 0 }; - pthread_barrier_init(&ready_for_fork, NULL, 2); + ready_for_fork = false; fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); if (uffd_register(uffd, area_dst, nr_pages * page_size, @@ -845,9 +844,8 @@ static void uffd_events_test_common(bool if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - /* Wait for child thread to start before forking */ - pthread_barrier_wait(&ready_for_fork); - pthread_barrier_destroy(&ready_for_fork); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); if (pid < 0) _ Patches currently in -mm which might be from edliaw(a)google.com are revert-selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch revert-selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch selftests-mm-fix-deadlock-for-fork-after-pthread_create-with-atomic_bool.patch

11 months, 1 week

1
0
0 0

+ revert-selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: Revert "selftests/mm: fix deadlock for fork after pthread_create on ARM" has been added to the -mm mm-hotfixes-unstable branch. Its filename is revert-selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Edward Liaw <edliaw(a)google.com> Subject: Revert "selftests/mm: fix deadlock for fork after pthread_create on ARM" Date: Fri, 18 Oct 2024 17:17:22 +0000 Patch series "selftests/mm: revert pthread_barrier change" On Android arm, pthread_create followed by a fork caused a deadlock in the case where the fork required work to be completed by the created thread. The previous patches incorrectly assumed that the parent would always initialize the pthread_barrier for the child thread. This reverts the change and replaces the fix for wp-fork-with-event with the original use of atomic_bool. This patch (of 3): This reverts commit e142cc87ac4ec618f2ccf5f68aedcd6e28a59d9d. fork_event_consumer may be called by other tests that do not initialize the pthread_barrier, so this approach is not correct. The subsequent patch will revert to using atomic_bool instead. Link: https://lkml.kernel.org/r/20241018171734.2315053-1-edliaw@google.com Link: https://lkml.kernel.org/r/20241018171734.2315053-2-edliaw@google.com Fixes: e142cc87ac4e ("fix deadlock for fork after pthread_create on ARM") Signed-off-by: Edward Liaw <edliaw(a)google.com> Cc: Ryan Roberts <ryan.roberts(a)arm.com> Cc: Peter Xu <peterx(a)redhat.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- tools/testing/selftests/mm/uffd-unit-tests.c | 7 ------- 1 file changed, 7 deletions(-) --- a/tools/testing/selftests/mm/uffd-unit-tests.c~revert-selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm +++ a/tools/testing/selftests/mm/uffd-unit-tests.c @@ -241,9 +241,6 @@ static void *fork_event_consumer(void *d fork_event_args *args = data; struct uffd_msg msg = { 0 }; - /* Ready for parent thread to fork */ - pthread_barrier_wait(&ready_for_fork); - /* Read until a full msg received */ while (uffd_read_msg(args->parent_uffd, &msg)); @@ -311,12 +308,8 @@ static int pagemap_test_fork(int uffd, b /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { - pthread_barrier_init(&ready_for_fork, NULL, 2); if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); - /* Wait for child thread to start before forking */ - pthread_barrier_wait(&ready_for_fork); - pthread_barrier_destroy(&ready_for_fork); } child = fork(); _ Patches currently in -mm which might be from edliaw(a)google.com are revert-selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch revert-selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch selftests-mm-fix-deadlock-for-fork-after-pthread_create-with-atomic_bool.patch

11 months, 1 week

1
0
0 0

[PATCH v3 1/1] nfsd: fix race between laundromat and free_stateid

by Olga Kornievskaia

There is a race between laundromat handling of revoked delegations and a client sending free_stateid operation. Laundromat thread finds that delegation has expired and needs to be revoked so it marks the delegation stid revoked and it puts it on a reaper list but then it unlock the state lock and the actual delegation revocation happens without the lock. Once the stid is marked revoked a racing free_stateid processing thread does the following (1) it calls list_del_init() which removes it from the reaper list and (2) frees the delegation stid structure. The laundromat thread ends up not calling the revoke_delegation() function for this particular delegation but that means it will no release the lock lease that exists on the file. Now, a new open for this file comes in and ends up finding that lease list isn't empty and calls nfsd_breaker_owns_lease() which ends up trying to derefence a freed delegation stateid. Leading to the followint use-after-free KASAN warning: kernel: ================================================================== kernel: BUG: KASAN: slab-use-after-free in nfsd_breaker_owns_lease+0x140/0x160 [nfsd] kernel: Read of size 8 at addr ffff0000e73cd0c8 by task nfsd/6205 kernel: kernel: CPU: 2 UID: 0 PID: 6205 Comm: nfsd Kdump: loaded Not tainted 6.11.0-rc7+ #9 kernel: Hardware name: Apple Inc. Apple Virtualization Generic Platform, BIOS 2069.0.0.0.0 08/03/2024 kernel: Call trace: kernel: dump_backtrace+0x98/0x120 kernel: show_stack+0x1c/0x30 kernel: dump_stack_lvl+0x80/0xe8 kernel: print_address_description.constprop.0+0x84/0x390 kernel: print_report+0xa4/0x268 kernel: kasan_report+0xb4/0xf8 kernel: __asan_report_load8_noabort+0x1c/0x28 kernel: nfsd_breaker_owns_lease+0x140/0x160 [nfsd] kernel: nfsd_file_do_acquire+0xb3c/0x11d0 [nfsd] kernel: nfsd_file_acquire_opened+0x84/0x110 [nfsd] kernel: nfs4_get_vfs_file+0x634/0x958 [nfsd] kernel: nfsd4_process_open2+0xa40/0x1a40 [nfsd] kernel: nfsd4_open+0xa08/0xe80 [nfsd] kernel: nfsd4_proc_compound+0xb8c/0x2130 [nfsd] kernel: nfsd_dispatch+0x22c/0x718 [nfsd] kernel: svc_process_common+0x8e8/0x1960 [sunrpc] kernel: svc_process+0x3d4/0x7e0 [sunrpc] kernel: svc_handle_xprt+0x828/0xe10 [sunrpc] kernel: svc_recv+0x2cc/0x6a8 [sunrpc] kernel: nfsd+0x270/0x400 [nfsd] kernel: kthread+0x288/0x310 kernel: ret_from_fork+0x10/0x20 This patch proposes a fixed that's based on adding 2 new additional stid's sc_status values that help coordinate between the laundromat and other operations (nfsd4_free_stateid() and nfsd4_delegreturn()). First to make sure, that once the stid is marked revoked, it is not removed by the nfsd4_free_stateid(), the laundromat take a reference on the stateid. Then, coordinating whether the stid has been put on the cl_revoked list or we are processing FREE_STATEID and need to make sure to remove it from the list, each check that state and act accordingly. If laundromat has added to the cl_revoke list before the arrival of FREE_STATEID, then nfsd4_free_stateid() knows to remove it from the list. If nfsd4_free_stateid() finds that operations arrived before laundromat has placed it on cl_revoke list, it marks the state freed and then laundromat will no longer add it to the list. Also, for nfsd4_delegreturn() when looking for the specified stid, we need to access stid that are marked removed or freeable, it means the laundromat has started processing it but hasn't finished and this delegreturn needs to return nfserr_deleg_revoked and not nfserr_bad_stateid. The latter will not trigger a FREE_STATEID and the lack of it will leave this stid on the cl_revoked list indefinitely. Fixes: 2d4a532d385f ("nfsd: ensure that clp->cl_revoked list is protected by clp->cl_lock") CC: stable(a)vger.kernel.org Signed-off-by: Olga Kornievskaia <okorniev(a)redhat.com> --- v3. (1) adds refcount to nfsd4_revoke_states() (2) adds comments to revoke_delegation(), adds the WARN_ON_ONCE to make sure stid state is what is expected and changes unlock placement. --- fs/nfsd/nfs4state.c | 46 +++++++++++++++++++++++++++++++++++++-------- fs/nfsd/state.h | 2 ++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7905ab9d8bc6..28e9b52b01fd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1351,21 +1351,47 @@ static void destroy_delegation(struct nfs4_delegation *dp) destroy_unhashed_deleg(dp); } +/** + * revoke_delegation - perform nfs4 delegation structure cleanup + * @dp: pointer to the delegation + * + * This function assumes that it's called either from the administrative + * interface (nfsd4_revoke_states()) that's revoking a specific delegation + * stateid or it's called from a laundromat thread (nfsd4_landromat()) that + * determined that this specific state has expired and needs to be revoked + * (both mark state with the appropriate stid sc_status mode). It is also + * assumed that a reference was take on the @dp state. + * + * If this function finds that the @dp state is SC_STATUS_FREED it means + * that a FREE_STATEID operation for this stateid has been processed and + * we can proceed to removing it from recalled list. However, if @dp state + * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked + * list and wait for the FREE_STATEID to arrive from the client. At the same + * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the + * nfsd4_free_stateid() function that this stateid has already been added + * to the cl_revoked list and that nfsd4_free_stateid() is now responsible + * for removing it from the list. Inspection of where the delegation state + * in the revocation process is protected by the clp->cl_lock. + */ static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); + WARN_ON_ONCE(!(dp->dl_stid.sc_status & + (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); - if (dp->dl_stid.sc_status & - (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)) { - spin_lock(&clp->cl_lock); - refcount_inc(&dp->dl_stid.sc_count); - list_add(&dp->dl_recall_lru, &clp->cl_revoked); - spin_unlock(&clp->cl_lock); + spin_lock(&clp->cl_lock); + if (dp->dl_stid.sc_status & SC_STATUS_FREED) { + list_del_init(&dp->dl_recall_lru); + goto out; } + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + dp->dl_stid.sc_status |= SC_STATUS_FREEABLE; +out: + spin_unlock(&clp->cl_lock); destroy_unhashed_deleg(dp); } @@ -1772,6 +1798,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb) mutex_unlock(&stp->st_mutex); break; case SC_TYPE_DELEG: + refcount_inc(&stid->sc_count); dp = delegstateid(stid); spin_lock(&state_lock); if (!unhash_delegation_locked( @@ -6606,6 +6633,7 @@ nfs4_laundromat(struct nfsd_net *nn) dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (!state_expired(&lt, dp->dl_time)) break; + refcount_inc(&dp->dl_stid.sc_count); unhash_delegation_locked(dp, SC_STATUS_REVOKED); list_add(&dp->dl_recall_lru, &reaplist); } @@ -7218,7 +7246,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, s->sc_status |= SC_STATUS_CLOSED; spin_unlock(&s->sc_lock); dp = delegstateid(s); - list_del_init(&dp->dl_recall_lru); + if (s->sc_status & SC_STATUS_FREEABLE) + list_del_init(&dp->dl_recall_lru); + s->sc_status |= SC_STATUS_FREED; spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; @@ -7548,7 +7578,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, 0, &s, nn); + status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED|SC_STATUS_FREEABLE, &s, nn); if (status) goto out; dp = delegstateid(s); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6351e6eca7cc..cc00d6b64b88 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -114,6 +114,8 @@ struct nfs4_stid { /* For a deleg stateid kept around only to process free_stateid's: */ #define SC_STATUS_REVOKED BIT(1) #define SC_STATUS_ADMIN_REVOKED BIT(2) +#define SC_STATUS_FREEABLE BIT(3) +#define SC_STATUS_FREED BIT(4) unsigned short sc_status; struct list_head sc_cp_list; -- 2.43.5

11 months, 1 week

3
2
0 0

[PATCH 6.11.y 0/3] : Yu Zhao's memory fix backport

by chrisl＠kernel.org

A few commits from Yu Zhao have been merged into 6.12. They need to be backported to 6.11. - c2a967f6ab0ec ("mm/hugetlb_vmemmap: don't synchronize_rcu() without HVO") - 95599ef684d01 ("mm/codetag: fix pgalloc_tag_split()") - e0a955bf7f61c ("mm/codetag: add pgalloc_tag_copy()") --- Yu Zhao (3): mm/hugetlb_vmemmap: don't synchronize_rcu() without HVO mm/codetag: fix pgalloc_tag_split() mm/codetag: add pgalloc_tag_copy() include/linux/alloc_tag.h | 24 ++++++++----------- include/linux/mm.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pgalloc_tag.h | 31 ------------------------ mm/huge_memory.c | 2 +- mm/hugetlb_vmemmap.c | 40 +++++++++++++++---------------- mm/migrate.c | 1 + mm/page_alloc.c | 4 ++-- 7 files changed, 91 insertions(+), 68 deletions(-) --- base-commit: 8e24a758d14c0b1cd42ab0aea980a1030eea811f change-id: 20241016-stable-yuzhao-7779910482e8 Best regards, -- Chris Li <chrisl(a)kernel.org>

11 months, 1 week

5
9
0 0

[PATCH RESEND] fs/bfs: fix possible NULL pointer dereference caused by empty i_op/i_fop

by Andrew Kanner

Syzkaller reported and reproduced the following issue: loop0: detected capacity change from 0 to 64 overlayfs: fs on './file0' does not support file handles, \ falling back to index=off,nfs_export=off. BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] Comm: syz-executor169 Not tainted 6.11.0-rc5-syzkaller-00176-g20371ba12063 #0 [...] Call Trace: <TASK> __lookup_slow+0x28c/0x3f0 fs/namei.c:1718 lookup_slow fs/namei.c:1735 [inline] lookup_one_unlocked+0x1a4/0x290 fs/namei.c:2898 ovl_lookup_positive_unlocked fs/overlayfs/namei.c:210 [inline] ovl_lookup_single+0x200/0xbd0 fs/overlayfs/namei.c:240 ovl_lookup_layer+0x417/0x510 fs/overlayfs/namei.c:333 ovl_lookup+0xcf7/0x2a60 fs/overlayfs/namei.c:1124 lookup_one_qstr_excl+0x11f/0x260 fs/namei.c:1633 filename_create+0x297/0x540 fs/namei.c:3980 do_mknodat+0x18b/0x5b0 fs/namei.c:4125 __do_sys_mknod fs/namei.c:4171 [inline] __se_sys_mknod fs/namei.c:4169 [inline] __x64_sys_mknod+0x8c/0xa0 fs/namei.c:4169 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fc4b42b2839 However, the actual root cause is not related to overlayfs: (gdb) p lower.dentry->d_inode->i_op $6 = (const struct inode_operations *) 0xffffffff8242fcc0 <empty_iops> (gdb) p lower.dentry->d_inode->i_op->lookup $7 = (struct dentry *(*) \ (struct inode *, struct dentry *, unsigned int)) 0x0 The inode, which is passed to ovl_lookup(), has an empty i_op, so the following __lookup_slow() hit NULL doing it's job: old = inode->i_op->lookup(inode, dentry, flags); bfs_fill_super()->bfs_iget() are skipping i_op/i_fop initialization if vnode type is not BFS_VDIR or BFS_VREG (e.g. corrupted fs). Adding extra error handling fixes the issue and syzkaller repro doesn't trigger anything bad anymore. Issue affects kernel builds with CONFIG_BFS_FS=y or =m only. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+94891a5155abdf6821b7(a)syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/0000000000003d5bc30617238b6d@google.com/T/ Signed-off-by: Andrew Kanner <andrew.kanner(a)gmail.com> Cc: stable(a)vger.kernel.org --- fs/bfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index db81570c9637..e590b231ad20 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -70,6 +70,10 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &bfs_file_inops; inode->i_fop = &bfs_file_operations; inode->i_mapping->a_ops = &bfs_aops; + } else { + pr_err("Bad i_vtype for inode %s:%08lx\n", inode->i_sb->s_id, ino); + brelse(bh); + goto error; } BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); -- 2.39.3

11 months, 1 week

1
0
0 0

[PATCH v2] nfsd: fix race between laundromat and free_stateid

by Olga Kornievskaia

There is a race between laundromat handling of revoked delegations and a client sending free_stateid operation. Laundromat thread finds that delegation has expired and needs to be revoked so it marks the delegation stid revoked and it puts it on a reaper list but then it unlock the state lock and the actual delegation revocation happens without the lock. Once the stid is marked revoked a racing free_stateid processing thread does the following (1) it calls list_del_init() which removes it from the reaper list and (2) frees the delegation stid structure. The laundromat thread ends up not calling the revoke_delegation() function for this particular delegation but that means it will no release the lock lease that exists on the file. Now, a new open for this file comes in and ends up finding that lease list isn't empty and calls nfsd_breaker_owns_lease() which ends up trying to derefence a freed delegation stateid. Leading to the followint use-after-free KASAN warning: kernel: ================================================================== kernel: BUG: KASAN: slab-use-after-free in nfsd_breaker_owns_lease+0x140/0x160 [nfsd] kernel: Read of size 8 at addr ffff0000e73cd0c8 by task nfsd/6205 kernel: kernel: CPU: 2 UID: 0 PID: 6205 Comm: nfsd Kdump: loaded Not tainted 6.11.0-rc7+ #9 kernel: Hardware name: Apple Inc. Apple Virtualization Generic Platform, BIOS 2069.0.0.0.0 08/03/2024 kernel: Call trace: kernel: dump_backtrace+0x98/0x120 kernel: show_stack+0x1c/0x30 kernel: dump_stack_lvl+0x80/0xe8 kernel: print_address_description.constprop.0+0x84/0x390 kernel: print_report+0xa4/0x268 kernel: kasan_report+0xb4/0xf8 kernel: __asan_report_load8_noabort+0x1c/0x28 kernel: nfsd_breaker_owns_lease+0x140/0x160 [nfsd] kernel: leases_conflict+0x68/0x370 kernel: __break_lease+0x204/0xc38 kernel: nfsd_open_break_lease+0x8c/0xf0 [nfsd] kernel: nfsd_file_do_acquire+0xb3c/0x11d0 [nfsd] kernel: nfsd_file_acquire_opened+0x84/0x110 [nfsd] kernel: nfs4_get_vfs_file+0x634/0x958 [nfsd] kernel: nfsd4_process_open2+0xa40/0x1a40 [nfsd] kernel: nfsd4_open+0xa08/0xe80 [nfsd] kernel: nfsd4_proc_compound+0xb8c/0x2130 [nfsd] kernel: nfsd_dispatch+0x22c/0x718 [nfsd] kernel: svc_process_common+0x8e8/0x1960 [sunrpc] kernel: svc_process+0x3d4/0x7e0 [sunrpc] kernel: svc_handle_xprt+0x828/0xe10 [sunrpc] kernel: svc_recv+0x2cc/0x6a8 [sunrpc] kernel: nfsd+0x270/0x400 [nfsd] kernel: kthread+0x288/0x310 kernel: ret_from_fork+0x10/0x20 This patch proposes a fix that's based on adding 2 new additional stid's sc_status values that help coordinate between the laundromat and other operations (nfsd4_free_stateid() and nfsd4_delegreturn()). First to make sure, that once the stid is marked revoked, it is not removed by the nfsd4_free_stateid(), the laundromat take a reference on the stateid. Then, coordinating whether the stid has been put on the cl_revoked list or we are processing FREE_STATEID and need to make sure to remove it from the list, each check that state and act accordingly. If laundromat has added to the cl_revoke list before the arrival of FREE_STATEID, then nfsd4_free_stateid() knows to remove it from the list. If nfsd4_free_stateid() finds that operations arrived before laundromat has placed it on cl_revoke list, it marks the state freed and then laundromat will no longer add it to the list. Also, for nfsd4_delegreturn() when looking for the specified stid, we need to access stid that are marked removed or freeable, it means the laundromat has started processing it but hasn't finished and this delegreturn needs to return nfserr_deleg_revoked and not nfserr_bad_stateid. The latter will not trigger a FREE_STATEID and the lack of it will leave this stid on the cl_revoked list indefinitely. Fixes: 2d4a532d385f ("nfsd: ensure that clp->cl_revoked list is protected by clp->cl_lock") CC: stable(a)vger.kernel.org Signed-off-by: Olga Kornievskaia <okorniev(a)redhat.com> --- fs/nfsd/nfs4state.c | 15 ++++++++++++--- fs/nfsd/state.h | 2 ++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ac1859c7cc9d..cb989802e896 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1370,10 +1370,16 @@ static void revoke_delegation(struct nfs4_delegation *dp) if (dp->dl_stid.sc_status & (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)) { spin_lock(&clp->cl_lock); - refcount_inc(&dp->dl_stid.sc_count); + if (dp->dl_stid.sc_status & SC_STATUS_FREED) { + list_del_init(&dp->dl_recall_lru); + spin_unlock(&clp->cl_lock); + goto out; + } list_add(&dp->dl_recall_lru, &clp->cl_revoked); + dp->dl_stid.sc_status |= SC_STATUS_FREEABLE; spin_unlock(&clp->cl_lock); } +out: destroy_unhashed_deleg(dp); } @@ -6545,6 +6551,7 @@ nfs4_laundromat(struct nfsd_net *nn) dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (!state_expired(&lt, dp->dl_time)) break; + refcount_inc(&dp->dl_stid.sc_count); unhash_delegation_locked(dp, SC_STATUS_REVOKED); list_add(&dp->dl_recall_lru, &reaplist); } @@ -7156,7 +7163,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (s->sc_status & SC_STATUS_REVOKED) { spin_unlock(&s->sc_lock); dp = delegstateid(s); - list_del_init(&dp->dl_recall_lru); + if (s->sc_status & SC_STATUS_FREEABLE) + list_del_init(&dp->dl_recall_lru); + s->sc_status |= SC_STATUS_FREED; spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; @@ -7486,7 +7495,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, 0, &s, nn); + status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED|SC_STATUS_FREEABLE, &s, nn); if (status) goto out; dp = delegstateid(s); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 79c743c01a47..35b3564c065f 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -114,6 +114,8 @@ struct nfs4_stid { /* For a deleg stateid kept around only to process free_stateid's: */ #define SC_STATUS_REVOKED BIT(1) #define SC_STATUS_ADMIN_REVOKED BIT(2) +#define SC_STATUS_FREEABLE BIT(3) +#define SC_STATUS_FREED BIT(4) unsigned short sc_status; struct list_head sc_cp_list; -- 2.43.5

11 months, 1 week

3
2
0 0

[PATCH] resource,kexec: walk_system_ram_res_rev must retain resource flags

by Gregory Price

walk_system_ram_res_rev() erroneously discards resource flags when passing the information to the callback. This causes systems with IORESOURCE_SYSRAM_DRIVER_MANAGED memory to have these resources selected during kexec to store kexec buffers if that memory happens to be at placed above normal system ram. This leads to undefined behavior after reboot. If the kexec buffer is never touched, nothing happens. If the kexec buffer is touched, it could lead to a crash (like below) or undefined behavior. Tested on a system with CXL memory expanders with driver managed memory, TPM enabled, and CONFIG_IMA_KEXEC=y. Adding printk's showed the flags were being discarded and as a result the check for IORESOURCE_SYSRAM_DRIVER_MANAGED passes. find_next_iomem_res: name(System RAM (kmem)) start(10000000000) end(1034fffffff) flags(83000200) locate_mem_hole_top_down: start(10000000000) end(1034fffffff) flags(0) [.] BUG: unable to handle page fault for address: ffff89834ffff000 [.] #PF: supervisor read access in kernel mode [.] #PF: error_code(0x0000) - not-present page [.] PGD c04c8bf067 P4D c04c8bf067 PUD c04c8be067 PMD 0 [.] Oops: 0000 [#1] SMP [.] RIP: 0010:ima_restore_measurement_list+0x95/0x4b0 [.] RSP: 0018:ffffc900000d3a80 EFLAGS: 00010286 [.] RAX: 0000000000001000 RBX: 0000000000000000 RCX: ffff89834ffff000 [.] RDX: 0000000000000018 RSI: ffff89834ffff000 RDI: ffff89834ffff018 [.] RBP: ffffc900000d3ba0 R08: 0000000000000020 R09: ffff888132b8a900 [.] R10: 4000000000000000 R11: 000000003a616d69 R12: 0000000000000000 [.] R13: ffffffff8404ac28 R14: 0000000000000000 R15: ffff89834ffff000 [.] FS: 0000000000000000(0000) GS:ffff893d44640000(0000) knlGS:0000000000000000 [.] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [.] ata5: SATA link down (SStatus 0 SControl 300) [.] CR2: ffff89834ffff000 CR3: 000001034d00f001 CR4: 0000000000770ef0 [.] PKRU: 55555554 [.] Call Trace: [.] <TASK> [.] ? __die+0x78/0xc0 [.] ? page_fault_oops+0x2a8/0x3a0 [.] ? exc_page_fault+0x84/0x130 [.] ? asm_exc_page_fault+0x22/0x30 [.] ? ima_restore_measurement_list+0x95/0x4b0 [.] ? template_desc_init_fields+0x317/0x410 [.] ? crypto_alloc_tfm_node+0x9c/0xc0 [.] ? init_ima_lsm+0x30/0x30 [.] ima_load_kexec_buffer+0x72/0xa0 [.] ima_init+0x44/0xa0 [.] __initstub__kmod_ima__373_1201_init_ima7+0x1e/0xb0 [.] ? init_ima_lsm+0x30/0x30 [.] do_one_initcall+0xad/0x200 [.] ? idr_alloc_cyclic+0xaa/0x110 [.] ? new_slab+0x12c/0x420 [.] ? new_slab+0x12c/0x420 [.] ? number+0x12a/0x430 [.] ? sysvec_apic_timer_interrupt+0xa/0x80 [.] ? asm_sysvec_apic_timer_interrupt+0x16/0x20 [.] ? parse_args+0xd4/0x380 [.] ? parse_args+0x14b/0x380 [.] kernel_init_freeable+0x1c1/0x2b0 [.] ? rest_init+0xb0/0xb0 [.] kernel_init+0x16/0x1a0 [.] ret_from_fork+0x2f/0x40 [.] ? rest_init+0xb0/0xb0 [.] ret_from_fork_asm+0x11/0x20 [.] </TASK> Link: https://lore.kernel.org/all/20231114091658.228030-1-bhe@redhat.com/ Fixes: 7acf164b259d ("resource: add walk_system_ram_res_rev()") Cc: stable(a)vger.kernel.org Signed-off-by: Gregory Price <gourry(a)gourry.net> --- kernel/resource.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index b730bd28b422..4101016e8b20 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -459,9 +459,7 @@ int walk_system_ram_res_rev(u64 start, u64 end, void *arg, rams_size += 16; } - rams[i].start = res.start; - rams[i++].end = res.end; - + rams[i++] = res; start = res.end + 1; } -- 2.43.0

11 months, 1 week

4
10
0 0

[RFC][PATCH] mm: Split locks in remap_file_pages()

by Roberto Sassu

From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com> Commit ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in remap_file_pages()") fixed a security issue, it added an LSM check when trying to remap file pages, so that LSMs have the opportunity to evaluate such action like for other memory operations such as mmap() and mprotect(). However, that commit called security_mmap_file() inside the mmap_lock lock, while the other calls do it before taking the lock, after commit 8b3ec6814c83 ("take security_mmap_file() outside of ->mmap_sem"). This caused lock inversion issue with IMA which was taking the mmap_lock and i_mutex lock in the opposite way when the remap_file_pages() system call was called. Solve the issue by splitting the critical region in remap_file_pages() in two regions: the first takes a read lock of mmap_lock and retrieves the VMA and the file associated, and calculate the 'prot' and 'flags' variable; the second takes a write lock on mmap_lock, checks that the VMA flags and the VMA file descriptor are the same as the ones obtained in the first critical region (otherwise the system call fails), and calls do_mmap(). In between, after releasing the read lock and taking the write lock, call security_mmap_file(), and solve the lock inversion issue. Cc: stable(a)vger.kernel.org Fixes: ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in remap_file_pages()") Reported-by: syzbot+91ae49e1c1a2634d20c0(a)syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-security-module/66f7b10e.050a0220.46d20.0036.… Reviewed-by: Roberto Sassu <roberto.sassu(a)huawei.com> (Calculate prot and flags earlier) Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> --- mm/mmap.c | 62 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 9c0fb43064b5..762944427e03 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1640,6 +1640,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long populate = 0; unsigned long ret = -EINVAL; struct file *file; + vm_flags_t vm_flags; pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/mm/remap_file_pages.rst.\n", current->comm, current->pid); @@ -1656,12 +1657,53 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (pgoff + (size >> PAGE_SHIFT) < pgoff) return ret; - if (mmap_write_lock_killable(mm)) + if (mmap_read_lock_killable(mm)) + return -EINTR; + + vma = vma_lookup(mm, start); + + if (!vma || !(vma->vm_flags & VM_SHARED)) { + mmap_read_unlock(mm); + return -EINVAL; + } + + prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; + prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; + prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; + + flags &= MAP_NONBLOCK; + flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + + /* Save vm_flags used to calculate prot and flags, and recheck later. */ + vm_flags = vma->vm_flags; + file = get_file(vma->vm_file); + + mmap_read_unlock(mm); + + ret = security_mmap_file(file, prot, flags); + if (ret) { + fput(file); + return ret; + } + + ret = -EINVAL; + + if (mmap_write_lock_killable(mm)) { + fput(file); return -EINTR; + } vma = vma_lookup(mm, start); - if (!vma || !(vma->vm_flags & VM_SHARED)) + if (!vma) + goto out; + + if (vma->vm_flags != vm_flags) + goto out; + + if (vma->vm_file != file) goto out; if (start + size > vma->vm_end) { @@ -1689,25 +1731,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, goto out; } - prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; - prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; - prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; - - flags &= MAP_NONBLOCK; - flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; - if (vma->vm_flags & VM_LOCKED) - flags |= MAP_LOCKED; - - file = get_file(vma->vm_file); - ret = security_mmap_file(vma->vm_file, prot, flags); - if (ret) - goto out_fput; ret = do_mmap(vma->vm_file, start, size, prot, flags, 0, pgoff, &populate, NULL); -out_fput: - fput(file); out: mmap_write_unlock(mm); + fput(file); if (populate) mm_populate(ret, populate); if (!IS_ERR_VALUE(ret)) -- 2.34.1

11 months, 1 week

3
4
0 0

[PATCH v3] comedi: Flush partial mappings in error case

by Jann Horn

If some remap_pfn_range() calls succeeded before one failed, we still have buffer pages mapped into the userspace page tables when we drop the buffer reference with comedi_buf_map_put(bm). The userspace mappings are only cleaned up later in the mmap error path. Fix it by explicitly flushing all mappings in our VMA on the error path. See commit 79a61cc3fc04 ("mm: avoid leaving partial pfn mappings around in error case"). Cc: stable(a)vger.kernel.org Fixes: ed9eccbe8970 ("Staging: add comedi core") Signed-off-by: Jann Horn <jannh(a)google.com> --- Note: compile-tested only; I don't actually have comedi hardware, and I don't know anything about comedi. --- Changes in v3: - gate zapping ptes on CONFIG_MMU (Intel kernel test robot) - Link to v2: https://lore.kernel.org/r/20241015-comedi-tlb-v2-1-cafb0e27dd9a@google.com Changes in v2: - only do the zapping in the pfnmap path (Ian Abbott) - use zap_vma_ptes() instead of zap_page_range_single() (Ian Abbott) - Link to v1: https://lore.kernel.org/r/20241014-comedi-tlb-v1-1-4b699144b438@google.com --- drivers/comedi/comedi_fops.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index 1b481731df96..b9df9b19d4bd 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c @@ -2407,6 +2407,18 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma) start += PAGE_SIZE; } + +#ifdef CONFIG_MMU + /* + * Leaving behind a partial mapping of a buffer we're about to + * drop is unsafe, see remap_pfn_range_notrack(). + * We need to zap the range here ourselves instead of relying + * on the automatic zapping in remap_pfn_range() because we call + * remap_pfn_range() in a loop. + */ + if (retval) + zap_vma_ptes(vma, vma->vm_start, size); +#endif } if (retval == 0) { --- base-commit: 6485cf5ea253d40d507cd71253c9568c5470cd27 change-id: 20241014-comedi-tlb-400246505961 -- Jann Horn <jannh(a)google.com>

11 months, 1 week

2
2
0 0

[PATCH RESEND] vdpa: solidrun: Fix UB bug with devres

by Philipp Stanner

In psnet_open_pf_bar() and snet_open_vf_bar() a string later passed to pcim_iomap_regions() is placed on the stack. Neither pcim_iomap_regions() nor the functions it calls copy that string. Should the string later ever be used, this, consequently, causes undefined behavior since the stack frame will by then have disappeared. Fix the bug by allocating the strings on the heap through devm_kasprintf(). Cc: stable(a)vger.kernel.org # v6.3 Fixes: 51a8f9d7f587 ("virtio: vdpa: new SolidNET DPU driver.") Reported-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr> Closes: https://lore.kernel.org/all/74e9109a-ac59-49e2-9b1d-d825c9c9f891@wanadoo.fr/ Suggested-by: Andy Shevchenko <andy(a)kernel.org> Signed-off-by: Philipp Stanner <pstanner(a)redhat.com> --- drivers/vdpa/solidrun/snet_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index 99428a04068d..c8b74980dbd1 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -555,7 +555,7 @@ static const struct vdpa_config_ops snet_config_ops = { static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) { - char name[50]; + char *name; int ret, i, mask = 0; /* We don't know which BAR will be used to communicate.. * We will map every bar with len > 0. @@ -573,7 +573,10 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) return -ENODEV; } - snprintf(name, sizeof(name), "psnet[%s]-bars", pci_name(pdev)); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev)); + if (!name) + return -ENOMEM; + ret = pcim_iomap_regions(pdev, mask, name); if (ret) { SNET_ERR(pdev, "Failed to request and map PCI BARs\n"); @@ -590,10 +593,13 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet) static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet) { - char name[50]; + char *name; int ret; - snprintf(name, sizeof(name), "snet[%s]-bar", pci_name(pdev)); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "snet[%s]-bars", pci_name(pdev)); + if (!name) + return -ENOMEM; + /* Request and map BAR */ ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name); if (ret) { -- 2.46.1

11 months, 1 week

5
7
0 0

FAILED: patch "[PATCH] KVM: s390: gaccess: Check if guest address is in memslot" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x e8061f06185be0a06a73760d6526b8b0feadfe52 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101827-implosion-twilight-c8e1@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e8061f06185be0a06a73760d6526b8b0feadfe52 Mon Sep 17 00:00:00 2001 From: Nico Boehr <nrb(a)linux.ibm.com> Date: Tue, 17 Sep 2024 17:18:33 +0200 Subject: [PATCH] KVM: s390: gaccess: Check if guest address is in memslot Previously, access_guest_page() did not check whether the given guest address is inside of a memslot. This is not a problem, since kvm_write_guest_page/kvm_read_guest_page return -EFAULT in this case. However, -EFAULT is also returned when copy_to/from_user fails. When emulating a guest instruction, the address being outside a memslot usually means that an addressing exception should be injected into the guest. Failure in copy_to/from_user however indicates that something is wrong in userspace and hence should be handled there. To be able to distinguish these two cases, return PGM_ADDRESSING in access_guest_page() when the guest address is outside guest memory. In access_guest_real(), populate vcpu->arch.pgm.code such that kvm_s390_inject_prog_cond() can be used in the caller for injecting into the guest (if applicable). Since this adds a new return value to access_guest_page(), we need to make sure that other callers are not confused by the new positive return value. There are the following users of access_guest_page(): - access_guest_with_key() does the checking itself (in guest_range_to_gpas()), so this case should never happen. Even if, the handling is set up properly. - access_guest_real() just passes the return code to its callers, which are: - read_guest_real() - see below - write_guest_real() - see below There are the following users of read_guest_real(): - ar_translation() in gaccess.c which already returns PGM_* - setup_apcb10(), setup_apcb00(), setup_apcb11() in vsie.c which always return -EFAULT on read_guest_read() nonzero return - no change - shadow_crycb(), handle_stfle() always present this as validity, this could be handled better but doesn't change current behaviour - no change There are the following users of write_guest_real(): - kvm_s390_store_status_unloaded() always returns -EFAULT on write_guest_real() failure. Fixes: 2293897805c2 ("KVM: s390: add architecture compliant guest access functions") Cc: stable(a)vger.kernel.org Signed-off-by: Nico Boehr <nrb(a)linux.ibm.com> Reviewed-by: Heiko Carstens <hca(a)linux.ibm.com> Link: https://lore.kernel.org/r/20240917151904.74314-2-nrb@linux.ibm.com Acked-by: Janosch Frank <frankja(a)linux.ibm.com> Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index e65f597e3044..a688351f4ab5 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -828,6 +828,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -985,6 +987,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index b320d12aa049..3fde45a151f2 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, * @len: number of bytes to copy * * Copy @len bytes from @data (kernel space) to @gra (guest real address). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest low address and key protection are not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying from @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to guest memory. */ @@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, * @len: number of bytes to copy * * Copy @len bytes from @gra (guest real address) to @data (kernel space). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest key protection is not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying to @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to kernel space. */

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] KVM: s390: gaccess: Check if guest address is in memslot" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x e8061f06185be0a06a73760d6526b8b0feadfe52 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101826-gracious-singer-816f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e8061f06185be0a06a73760d6526b8b0feadfe52 Mon Sep 17 00:00:00 2001 From: Nico Boehr <nrb(a)linux.ibm.com> Date: Tue, 17 Sep 2024 17:18:33 +0200 Subject: [PATCH] KVM: s390: gaccess: Check if guest address is in memslot Previously, access_guest_page() did not check whether the given guest address is inside of a memslot. This is not a problem, since kvm_write_guest_page/kvm_read_guest_page return -EFAULT in this case. However, -EFAULT is also returned when copy_to/from_user fails. When emulating a guest instruction, the address being outside a memslot usually means that an addressing exception should be injected into the guest. Failure in copy_to/from_user however indicates that something is wrong in userspace and hence should be handled there. To be able to distinguish these two cases, return PGM_ADDRESSING in access_guest_page() when the guest address is outside guest memory. In access_guest_real(), populate vcpu->arch.pgm.code such that kvm_s390_inject_prog_cond() can be used in the caller for injecting into the guest (if applicable). Since this adds a new return value to access_guest_page(), we need to make sure that other callers are not confused by the new positive return value. There are the following users of access_guest_page(): - access_guest_with_key() does the checking itself (in guest_range_to_gpas()), so this case should never happen. Even if, the handling is set up properly. - access_guest_real() just passes the return code to its callers, which are: - read_guest_real() - see below - write_guest_real() - see below There are the following users of read_guest_real(): - ar_translation() in gaccess.c which already returns PGM_* - setup_apcb10(), setup_apcb00(), setup_apcb11() in vsie.c which always return -EFAULT on read_guest_read() nonzero return - no change - shadow_crycb(), handle_stfle() always present this as validity, this could be handled better but doesn't change current behaviour - no change There are the following users of write_guest_real(): - kvm_s390_store_status_unloaded() always returns -EFAULT on write_guest_real() failure. Fixes: 2293897805c2 ("KVM: s390: add architecture compliant guest access functions") Cc: stable(a)vger.kernel.org Signed-off-by: Nico Boehr <nrb(a)linux.ibm.com> Reviewed-by: Heiko Carstens <hca(a)linux.ibm.com> Link: https://lore.kernel.org/r/20240917151904.74314-2-nrb@linux.ibm.com Acked-by: Janosch Frank <frankja(a)linux.ibm.com> Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index e65f597e3044..a688351f4ab5 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -828,6 +828,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -985,6 +987,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index b320d12aa049..3fde45a151f2 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, * @len: number of bytes to copy * * Copy @len bytes from @data (kernel space) to @gra (guest real address). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest low address and key protection are not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying from @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to guest memory. */ @@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, * @len: number of bytes to copy * * Copy @len bytes from @gra (guest real address) to @data (kernel space). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest key protection is not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying to @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to kernel space. */

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] KVM: s390: gaccess: Check if guest address is in memslot" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x e8061f06185be0a06a73760d6526b8b0feadfe52 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101824-departure-oversight-aa1e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e8061f06185be0a06a73760d6526b8b0feadfe52 Mon Sep 17 00:00:00 2001 From: Nico Boehr <nrb(a)linux.ibm.com> Date: Tue, 17 Sep 2024 17:18:33 +0200 Subject: [PATCH] KVM: s390: gaccess: Check if guest address is in memslot Previously, access_guest_page() did not check whether the given guest address is inside of a memslot. This is not a problem, since kvm_write_guest_page/kvm_read_guest_page return -EFAULT in this case. However, -EFAULT is also returned when copy_to/from_user fails. When emulating a guest instruction, the address being outside a memslot usually means that an addressing exception should be injected into the guest. Failure in copy_to/from_user however indicates that something is wrong in userspace and hence should be handled there. To be able to distinguish these two cases, return PGM_ADDRESSING in access_guest_page() when the guest address is outside guest memory. In access_guest_real(), populate vcpu->arch.pgm.code such that kvm_s390_inject_prog_cond() can be used in the caller for injecting into the guest (if applicable). Since this adds a new return value to access_guest_page(), we need to make sure that other callers are not confused by the new positive return value. There are the following users of access_guest_page(): - access_guest_with_key() does the checking itself (in guest_range_to_gpas()), so this case should never happen. Even if, the handling is set up properly. - access_guest_real() just passes the return code to its callers, which are: - read_guest_real() - see below - write_guest_real() - see below There are the following users of read_guest_real(): - ar_translation() in gaccess.c which already returns PGM_* - setup_apcb10(), setup_apcb00(), setup_apcb11() in vsie.c which always return -EFAULT on read_guest_read() nonzero return - no change - shadow_crycb(), handle_stfle() always present this as validity, this could be handled better but doesn't change current behaviour - no change There are the following users of write_guest_real(): - kvm_s390_store_status_unloaded() always returns -EFAULT on write_guest_real() failure. Fixes: 2293897805c2 ("KVM: s390: add architecture compliant guest access functions") Cc: stable(a)vger.kernel.org Signed-off-by: Nico Boehr <nrb(a)linux.ibm.com> Reviewed-by: Heiko Carstens <hca(a)linux.ibm.com> Link: https://lore.kernel.org/r/20240917151904.74314-2-nrb@linux.ibm.com Acked-by: Janosch Frank <frankja(a)linux.ibm.com> Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index e65f597e3044..a688351f4ab5 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -828,6 +828,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -985,6 +987,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index b320d12aa049..3fde45a151f2 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, * @len: number of bytes to copy * * Copy @len bytes from @data (kernel space) to @gra (guest real address). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest low address and key protection are not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying from @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to guest memory. */ @@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, * @len: number of bytes to copy * * Copy @len bytes from @gra (guest real address) to @data (kernel space). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest key protection is not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying to @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to kernel space. */

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] KVM: s390: gaccess: Check if guest address is in memslot" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x e8061f06185be0a06a73760d6526b8b0feadfe52 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101823-tractor-twitter-a318@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e8061f06185be0a06a73760d6526b8b0feadfe52 Mon Sep 17 00:00:00 2001 From: Nico Boehr <nrb(a)linux.ibm.com> Date: Tue, 17 Sep 2024 17:18:33 +0200 Subject: [PATCH] KVM: s390: gaccess: Check if guest address is in memslot Previously, access_guest_page() did not check whether the given guest address is inside of a memslot. This is not a problem, since kvm_write_guest_page/kvm_read_guest_page return -EFAULT in this case. However, -EFAULT is also returned when copy_to/from_user fails. When emulating a guest instruction, the address being outside a memslot usually means that an addressing exception should be injected into the guest. Failure in copy_to/from_user however indicates that something is wrong in userspace and hence should be handled there. To be able to distinguish these two cases, return PGM_ADDRESSING in access_guest_page() when the guest address is outside guest memory. In access_guest_real(), populate vcpu->arch.pgm.code such that kvm_s390_inject_prog_cond() can be used in the caller for injecting into the guest (if applicable). Since this adds a new return value to access_guest_page(), we need to make sure that other callers are not confused by the new positive return value. There are the following users of access_guest_page(): - access_guest_with_key() does the checking itself (in guest_range_to_gpas()), so this case should never happen. Even if, the handling is set up properly. - access_guest_real() just passes the return code to its callers, which are: - read_guest_real() - see below - write_guest_real() - see below There are the following users of read_guest_real(): - ar_translation() in gaccess.c which already returns PGM_* - setup_apcb10(), setup_apcb00(), setup_apcb11() in vsie.c which always return -EFAULT on read_guest_read() nonzero return - no change - shadow_crycb(), handle_stfle() always present this as validity, this could be handled better but doesn't change current behaviour - no change There are the following users of write_guest_real(): - kvm_s390_store_status_unloaded() always returns -EFAULT on write_guest_real() failure. Fixes: 2293897805c2 ("KVM: s390: add architecture compliant guest access functions") Cc: stable(a)vger.kernel.org Signed-off-by: Nico Boehr <nrb(a)linux.ibm.com> Reviewed-by: Heiko Carstens <hca(a)linux.ibm.com> Link: https://lore.kernel.org/r/20240917151904.74314-2-nrb@linux.ibm.com Acked-by: Janosch Frank <frankja(a)linux.ibm.com> Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index e65f597e3044..a688351f4ab5 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -828,6 +828,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -985,6 +987,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index b320d12aa049..3fde45a151f2 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, * @len: number of bytes to copy * * Copy @len bytes from @data (kernel space) to @gra (guest real address). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest low address and key protection are not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying from @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to guest memory. */ @@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, * @len: number of bytes to copy * * Copy @len bytes from @gra (guest real address) to @data (kernel space). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest key protection is not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying to @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to kernel space. */

11 months, 1 week

1
0
0 0

[PATCH 5.10] gfs2: Fix potential glock use-after-free on unmount

by He Zhe

From: Andreas Gruenbacher <agruenba(a)redhat.com> commit 0636b34b44589b142700ac137b5f69802cfe2e37 upstream. When a DLM lockspace is released and there ares still locks in that lockspace, DLM will unlock those locks automatically. Commit fb6791d100d1b started exploiting this behavior to speed up filesystem unmount: gfs2 would simply free glocks it didn't want to unlock and then release the lockspace. This didn't take the bast callbacks for asynchronous lock contention notifications into account, which remain active until until a lock is unlocked or its lockspace is released. To prevent those callbacks from accessing deallocated objects, put the glocks that should not be unlocked on the sd_dead_glocks list, release the lockspace, and only then free those glocks. As an additional measure, ignore unexpected ast and bast callbacks if the receiving glock is dead. Fixes: fb6791d100d1b ("GFS2: skip dlm_unlock calls in unmount") Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com> Cc: David Teigland <teigland(a)redhat.com> CVE: CVE-2024-38570 [Zhe: sd_glock_wait in gfs2_glock_free_later is not renamed to sd_kill_wait yet. So still use sd_glock_wait in gfs2_glock_free_later in this case.] Signed-off-by: He Zhe <zhe.he(a)windriver.com> --- fs/gfs2/glock.c | 35 ++++++++++++++++++++++++++++++++--- fs/gfs2/glock.h | 1 + fs/gfs2/incore.h | 1 + fs/gfs2/lock_dlm.c | 13 +++++++++++-- fs/gfs2/ops_fstype.c | 1 + fs/gfs2/super.c | 3 --- 6 files changed, 46 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b0f01a8e3776..11206d810344 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -159,19 +159,46 @@ static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) return true; } -void gfs2_glock_free(struct gfs2_glock *gl) +static void __gfs2_glock_free(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); +} + +void gfs2_glock_free(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + __gfs2_glock_free(gl); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); } +void gfs2_glock_free_later(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + spin_lock(&lru_lock); + list_add(&gl->gl_lru, &sdp->sd_dead_glocks); + spin_unlock(&lru_lock); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); +} + +static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) +{ + struct list_head *list = &sdp->sd_dead_glocks; + + while(!list_empty(list)) { + struct gfs2_glock *gl; + + gl = list_first_entry(list, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + __gfs2_glock_free(gl); + } +} + /** * gfs2_glock_hold() - increment reference count on glock * @gl: The glock to hold @@ -2016,6 +2043,8 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) wait_event_timeout(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0, HZ * 600); + gfs2_lm_unmount(sdp); + gfs2_free_dead_glocks(sdp); glock_hash_walk(dump_glock_func, sdp); } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 53813364517b..b81b369e7485 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -253,6 +253,7 @@ extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); extern void gfs2_glock_free(struct gfs2_glock *gl); +extern void gfs2_glock_free_later(struct gfs2_glock *gl); extern int __init gfs2_glock_init(void); extern void gfs2_glock_exit(void); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index f8858d995b24..44cee9a4eef6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -863,6 +863,7 @@ struct gfs2_sbd { struct gfs2_holder sd_freeze_gh; atomic_t sd_freeze_state; struct mutex sd_freeze_mutex; + struct list_head sd_dead_glocks; char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 5564aa8b4592..9aad03f0dcdf 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -118,6 +118,11 @@ static void gdlm_ast(void *arg) struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; + /* If the glock is dead, we only react to a dlm_unlock() reply. */ + if (__lockref_is_dead(&gl->gl_lockref) && + gl->gl_lksb.sb_status != -DLM_EUNLOCK) + return; + gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -168,6 +173,9 @@ static void gdlm_bast(void *arg, int mode) { struct gfs2_glock *gl = arg; + if (__lockref_is_dead(&gl->gl_lockref)) + return; + switch (mode) { case DLM_LOCK_EX: gfs2_glock_cb(gl, LM_ST_UNLOCKED); @@ -286,6 +294,8 @@ static void gdlm_put_lock(struct gfs2_glock *gl) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; + BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); + if (gl->gl_lksb.sb_lkid == 0) { gfs2_glock_free(gl); return; @@ -305,7 +315,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && !gl->gl_lksb.sb_lvbptr) { - gfs2_glock_free(gl); + gfs2_glock_free_later(gl); return; } @@ -315,7 +325,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl) fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); - return; } } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 648f7336043f..4a8c070d14cf 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -141,6 +141,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_log_flush_wait); atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); mutex_init(&sdp->sd_freeze_mutex); + INIT_LIST_HEAD(&sdp->sd_dead_glocks); return sdp; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 8cf4ef61cdc4..039d678b1689 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -662,10 +662,7 @@ static void gfs2_put_super(struct super_block *sb) gfs2_gl_hash_clear(sdp); truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); - /* Unmount the locking protocol */ - gfs2_lm_unmount(sdp); - /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); free_sbd(sdp); } -- 2.25.1

11 months, 1 week

2
9
0 0

[PATCH 4.19] net: dsa: mv88e6xxx: Fix out-of-bound access

by hsimeliere.opensource＠witekio.com

From: Joseph Huang <Joseph.Huang(a)garmin.com> commit 528876d867a23b5198022baf2e388052ca67c952 upstream. If an ATU violation was caused by a CPU Load operation, the SPID could be larger than DSA_MAX_PORTS (the size of mv88e6xxx_chip.ports[] array). Fixes: 75c05a74e745 ("net: dsa: mv88e6xxx: Fix counting of ATU violations") Signed-off-by: Joseph Huang <Joseph.Huang(a)garmin.com> Reviewed-by: Andrew Lunn <andrew(a)lunn.ch> Link: https://patch.msgid.link/20240819235251.1331763-1-Joseph.Huang@garmin.com Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> Signed-off-by: Bruno VERNAY <bruno.vernay(a)se.com> Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource(a)witekio.com> --- drivers/net/dsa/mv88e6xxx/global1_atu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index ea243840ee0f..aca35c9d9fbd 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -363,7 +363,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) dev_err_ratelimited(chip->dev, "ATU full violation for %pM portvec %x spid %d\n", entry.mac, entry.portvec, spid); - chip->ports[spid].atu_full_violation++; + if (spid < ARRAY_SIZE(chip->ports)) + chip->ports[spid].atu_full_violation++; } mutex_unlock(&chip->reg_lock); -- 2.43.0

11 months, 1 week

1
0
0 0

[PATCH 05/13] media: mgb4: protect driver against spectre

by Mauro Carvalho Chehab

Frequency range is set from sysfs via frequency_range_store(), being vulnerable to spectre, as reported by smatch: drivers/media/pci/mgb4/mgb4_cmt.c:231 mgb4_cmt_set_vin_freq_range() warn: potential spectre issue 'cmt_vals_in' [r] drivers/media/pci/mgb4/mgb4_cmt.c:238 mgb4_cmt_set_vin_freq_range() warn: possible spectre second half. 'reg_set' Fix it. Fixes: 0ab13674a9bd ("media: pci: mgb4: Added Digiteq Automotive MGB4 driver") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/media/pci/mgb4/mgb4_cmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/pci/mgb4/mgb4_cmt.c b/drivers/media/pci/mgb4/mgb4_cmt.c index 70dc78ef193c..a25b68403bc6 100644 --- a/drivers/media/pci/mgb4/mgb4_cmt.c +++ b/drivers/media/pci/mgb4/mgb4_cmt.c @@ -227,6 +227,8 @@ void mgb4_cmt_set_vin_freq_range(struct mgb4_vin_dev *vindev, u32 config; size_t i; + freq_range = array_index_nospec(freq_range, ARRAY_SIZE(cmt_vals_in)); + addr = cmt_addrs_in[vindev->config->id]; reg_set = cmt_vals_in[freq_range]; -- 2.47.0

11 months, 1 week

2
3
0 0

[PATCH v2] remoteproc: qcom_q6v5_pas: disable auto boot for wpss

by Balaji Pothunoori

Currently, the rproc "atomic_t power" variable is incremented during: a. WPSS rproc auto boot. b. AHB power on for ath11k. During AHB power off (rmmod ath11k_ahb.ko), rproc_shutdown fails to unload the WPSS firmware because the rproc->power value is '2', causing the atomic_dec_and_test(&rproc->power) condition to fail. Consequently, during AHB power on (insmod ath11k_ahb.ko), QMI_WLANFW_HOST_CAP_REQ_V01 fails due to the host and firmware QMI states being out of sync. Fixes: 300ed425dfa9 ("remoteproc: qcom_q6v5_pas: Add SC7280 ADSP, CDSP & WPSS") Cc: stable(a)vger.kernel.org Signed-off-by: Balaji Pothunoori <quic_bpothuno(a)quicinc.com> --- v2: updated commit text. added Fixes/cc:stable tags. drivers/remoteproc/qcom_q6v5_pas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index ef82835e98a4..05963d7924df 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -1344,7 +1344,7 @@ static const struct adsp_data sc7280_wpss_resource = { .crash_reason_smem = 626, .firmware_name = "wpss.mdt", .pas_id = 6, - .auto_boot = true, + .auto_boot = false, .proxy_pd_names = (char*[]){ "cx", "mx", -- 2.34.1

11 months, 1 week

2
1
0 0

FAILED: patch "[PATCH] mptcp: prevent MPC handshake on port-based signal endpoints" failed to apply to 6.11-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.11-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y git checkout FETCH_HEAD git cherry-pick -x 3d041393ea8c815f773020fb4a995331a69c0139 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101809-refinish-concave-f892@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d041393ea8c815f773020fb4a995331a69c0139 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Mon, 14 Oct 2024 16:06:00 +0200 Subject: [PATCH] mptcp: prevent MPC handshake on port-based signal endpoints Syzkaller reported a lockdep splat: ============================================ WARNING: possible recursive locking detected 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Not tainted -------------------------------------------- syz-executor364/5113 is trying to acquire lock: ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 but task is already holding lock: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(k-slock-AF_INET); lock(k-slock-AF_INET); *** DEADLOCK *** May be due to missing lock nesting notation 7 locks held by syz-executor364/5113: #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x153/0x1b10 net/mptcp/protocol.c:1806 #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg_fastopen+0x11f/0x530 net/mptcp/protocol.c:1727 #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x5f/0x1b80 net/ipv4/ip_output.c:470 #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1390 net/ipv4/ip_output.c:228 #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: local_lock_acquire include/linux/local_lock_internal.h:29 [inline] #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x33b/0x15b0 net/core/dev.c:6104 #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x230/0x5f0 net/ipv4/ip_input.c:232 #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 stack backtrace: CPU: 0 UID: 0 PID: 5113 Comm: syz-executor364 Not tainted 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 check_deadlock kernel/locking/lockdep.c:3061 [inline] validate_chain+0x15d3/0x5900 kernel/locking/lockdep.c:3855 __lock_acquire+0x137a/0x2040 kernel/locking/lockdep.c:5142 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 mptcp_sk_clone_init+0x32/0x13c0 net/mptcp/protocol.c:3279 subflow_syn_recv_sock+0x931/0x1920 net/mptcp/subflow.c:874 tcp_check_req+0xfe4/0x1a20 net/ipv4/tcp_minisocks.c:853 tcp_v4_rcv+0x1c3e/0x37f0 net/ipv4/tcp_ipv4.c:2267 ip_protocol_deliver_rcu+0x22e/0x440 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x341/0x5f0 net/ipv4/ip_input.c:233 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x2bf/0x650 net/core/dev.c:5775 process_backlog+0x662/0x15b0 net/core/dev.c:6108 __napi_poll+0xcb/0x490 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:6963 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 do_softirq+0x11b/0x1e0 kernel/softirq.c:455 </IRQ> <TASK> __local_bh_enable_ip+0x1bb/0x200 kernel/softirq.c:382 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:908 [inline] __dev_queue_xmit+0x1763/0x3e90 net/core/dev.c:4450 dev_queue_xmit include/linux/netdevice.h:3105 [inline] neigh_hh_output include/net/neighbour.h:526 [inline] neigh_output include/net/neighbour.h:540 [inline] ip_finish_output2+0xd41/0x1390 net/ipv4/ip_output.c:235 ip_local_out net/ipv4/ip_output.c:129 [inline] __ip_queue_xmit+0x118c/0x1b80 net/ipv4/ip_output.c:535 __tcp_transmit_skb+0x2544/0x3b30 net/ipv4/tcp_output.c:1466 tcp_rcv_synsent_state_process net/ipv4/tcp_input.c:6542 [inline] tcp_rcv_state_process+0x2c32/0x4570 net/ipv4/tcp_input.c:6729 tcp_v4_do_rcv+0x77d/0xc70 net/ipv4/tcp_ipv4.c:1934 sk_backlog_rcv include/net/sock.h:1111 [inline] __release_sock+0x214/0x350 net/core/sock.c:3004 release_sock+0x61/0x1f0 net/core/sock.c:3558 mptcp_sendmsg_fastopen+0x1ad/0x530 net/mptcp/protocol.c:1733 mptcp_sendmsg+0x1884/0x1b10 net/mptcp/protocol.c:1812 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 ___sys_sendmsg net/socket.c:2651 [inline] __sys_sendmmsg+0x3b2/0x740 net/socket.c:2737 __do_sys_sendmmsg net/socket.c:2766 [inline] __se_sys_sendmmsg net/socket.c:2763 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2763 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f04fb13a6b9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 01 1a 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd651f42d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f04fb13a6b9 RDX: 0000000000000001 RSI: 0000000020000d00 RDI: 0000000000000004 RBP: 00007ffd651f4310 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000020000080 R11: 0000000000000246 R12: 00000000000f4240 R13: 00007f04fb187449 R14: 00007ffd651f42f4 R15: 00007ffd651f4300 </TASK> As noted by Cong Wang, the splat is false positive, but the code path leading to the report is an unexpected one: a client is attempting an MPC handshake towards the in-kernel listener created by the in-kernel PM for a port based signal endpoint. Such connection will be never accepted; many of them can make the listener queue full and preventing the creation of MPJ subflow via such listener - its intended role. Explicitly detect this scenario at initial-syn time and drop the incoming MPC request. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Cc: stable(a)vger.kernel.org Reported-by: syzbot+f4aacdfef2c6a6529c3e(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f4aacdfef2c6a6529c3e Cc: Cong Wang <cong.wang(a)bytedance.com> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241014-net-mptcp-mpc-port-endp-v2-1-7faea8e6b6ae… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index ad88bd3c58df..19eb9292bd60 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -17,6 +17,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), + SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 3206cdda8bb1..128282982843 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -12,6 +12,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */ + MPTCP_MIB_MPCAPABLEENDPATTEMPT, /* Prohibited MPC to port-based endp */ MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f6f0a38a0750..1a78998fe1f4 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1121,6 +1121,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); err = __inet_listen_sk(ssk, backlog); if (!err) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 74417aae08d0..568a72702b08 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -535,6 +535,7 @@ struct mptcp_subflow_context { __unused : 8; bool data_avail; bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ u32 remote_nonce; u64 thmac; u32 local_nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 25dde81bcb75..6170f2fff71e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -132,6 +132,13 @@ static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason) } } +static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb) +{ + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT); + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + return -EPERM; +} + /* Init mptcp request socket. * * Returns an error code if a JOIN has failed and a TCP reset @@ -165,6 +172,8 @@ static int subflow_check_req(struct request_sock *req, if (opt_mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); + if (unlikely(listener->pm_listener)) + return subflow_reset_req_endp(req, skb); if (opt_mp_join) return 0; } else if (opt_mp_join) { @@ -172,6 +181,8 @@ static int subflow_check_req(struct request_sock *req, if (mp_opt.backup) SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); + } else if (unlikely(listener->pm_listener)) { + return subflow_reset_req_endp(req, skb); } if (opt_mp_capable && listener->request_mptcp) {

11 months, 1 week

3
2
0 0

[PATCH 5.10 5.15 1/3] io_uring/sqpoll: do not allow pinning outside of cpuset

by Felix Moessbauer

commit f011c9cf04c06f16b24f583d313d3c012e589e50 upstream. The submit queue polling threads are userland threads that just never exit to the userland. When creating the thread with IORING_SETUP_SQ_AFF, the affinity of the poller thread is set to the cpu specified in sq_thread_cpu. However, this CPU can be outside of the cpuset defined by the cgroup cpuset controller. This violates the rules defined by the cpuset controller and is a potential issue for realtime applications. In b7ed6d8ffd6 we fixed the default affinity of the poller thread, in case no explicit pinning is required by inheriting the one of the creating task. In case of explicit pinning, the check is more complicated, as also a cpu outside of the parent cpumask is allowed. We implemented this by using cpuset_cpus_allowed (that has support for cgroup cpusets) and testing if the requested cpu is in the set. Fixes: 37d1e2e3642e ("io_uring: move SQPOLL thread io-wq forked worker") Signed-off-by: Felix Moessbauer <felix.moessbauer(a)siemens.com> Link: https://lore.kernel.org/r/20240909150036.55921-1-felix.moessbauer@siemens.c… Signed-off-by: Jens Axboe <axboe(a)kernel.dk> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- io_uring/io_uring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8ed2c65529714..6b6fd244233f8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -56,6 +56,7 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/percpu.h> +#include <linux/cpuset.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/bvec.h> @@ -8746,10 +8747,12 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, return 0; if (p->flags & IORING_SETUP_SQ_AFF) { + struct cpumask allowed_mask; int cpu = p->sq_thread_cpu; ret = -EINVAL; - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + cpuset_cpus_allowed(current, &allowed_mask); + if (!cpumask_test_cpu(cpu, &allowed_mask)) goto err_sqpoll; sqd->sq_cpu = cpu; } else { -- 2.39.5

11 months, 1 week

2
3
0 0

[PATCH 5.10 0/1] wifi: rtl8xxxu: Fix the error handling of the probe function

by Daniil Dulov

Svacer reports a NULL-pointer dereference in rtl8xxxu_probe(). After having been compared to a NULL value, pointer hw is passed as 1st parameter in call to ieee80211_free_hw(), where it is dereferenced. The problem is present in 5.10 stable release and can be fixed by the following upstream patch that can be cleanly applied to 5.10 branch.

11 months, 1 week

1
1
0 0

FAILED: patch "[PATCH] tcp: fix mptcp DSS corruption due to large pmtu xmit" failed to apply to 6.11-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.11-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y git checkout FETCH_HEAD git cherry-pick -x 4dabcdf581217e60690467a37c956a5b8dbc6bd9 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101432-shucking-snagged-7c42@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^.. Possible dependencies: 4dabcdf58121 ("tcp: fix mptcp DSS corruption due to large pmtu xmit") 65249feb6b3d ("net: add support for skbs with unreadable frags") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4dabcdf581217e60690467a37c956a5b8dbc6bd9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Tue, 8 Oct 2024 13:04:53 +0200 Subject: [PATCH] tcp: fix mptcp DSS corruption due to large pmtu xmit Syzkaller was able to trigger a DSS corruption: TCP: request_sock_subflow_v4: Possible SYN flooding on port [::]:20002. Sending cookies. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 5227 at net/mptcp/protocol.c:695 __mptcp_move_skbs_from_subflow+0x20a9/0x21f0 net/mptcp/protocol.c:695 Modules linked in: CPU: 0 UID: 0 PID: 5227 Comm: syz-executor350 Not tainted 6.11.0-syzkaller-08829-gaf9c191ac2a0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:__mptcp_move_skbs_from_subflow+0x20a9/0x21f0 net/mptcp/protocol.c:695 Code: 0f b6 dc 31 ff 89 de e8 b5 dd ea f5 89 d8 48 81 c4 50 01 00 00 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc e8 98 da ea f5 90 <0f> 0b 90 e9 47 ff ff ff e8 8a da ea f5 90 0f 0b 90 e9 99 e0 ff ff RSP: 0018:ffffc90000006db8 EFLAGS: 00010246 RAX: ffffffff8ba9df18 RBX: 00000000000055f0 RCX: ffff888030023c00 RDX: 0000000000000100 RSI: 00000000000081e5 RDI: 00000000000055f0 RBP: 1ffff110062bf1ae R08: ffffffff8ba9cf12 R09: 1ffff110062bf1b8 R10: dffffc0000000000 R11: ffffed10062bf1b9 R12: 0000000000000000 R13: dffffc0000000000 R14: 00000000700cec61 R15: 00000000000081e5 FS: 000055556679c380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020287000 CR3: 0000000077892000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <IRQ> move_skbs_to_msk net/mptcp/protocol.c:811 [inline] mptcp_data_ready+0x29c/0xa90 net/mptcp/protocol.c:854 subflow_data_ready+0x34a/0x920 net/mptcp/subflow.c:1490 tcp_data_queue+0x20fd/0x76c0 net/ipv4/tcp_input.c:5283 tcp_rcv_established+0xfba/0x2020 net/ipv4/tcp_input.c:6237 tcp_v4_do_rcv+0x96d/0xc70 net/ipv4/tcp_ipv4.c:1915 tcp_v4_rcv+0x2dc0/0x37f0 net/ipv4/tcp_ipv4.c:2350 ip_protocol_deliver_rcu+0x22e/0x440 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x341/0x5f0 net/ipv4/ip_input.c:233 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 __netif_receive_skb_one_core net/core/dev.c:5662 [inline] __netif_receive_skb+0x2bf/0x650 net/core/dev.c:5775 process_backlog+0x662/0x15b0 net/core/dev.c:6107 __napi_poll+0xcb/0x490 net/core/dev.c:6771 napi_poll net/core/dev.c:6840 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:6962 handle_softirqs+0x2c5/0x980 kernel/softirq.c:554 do_softirq+0x11b/0x1e0 kernel/softirq.c:455 </IRQ> <TASK> __local_bh_enable_ip+0x1bb/0x200 kernel/softirq.c:382 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:919 [inline] __dev_queue_xmit+0x1764/0x3e80 net/core/dev.c:4451 dev_queue_xmit include/linux/netdevice.h:3094 [inline] neigh_hh_output include/net/neighbour.h:526 [inline] neigh_output include/net/neighbour.h:540 [inline] ip_finish_output2+0xd41/0x1390 net/ipv4/ip_output.c:236 ip_local_out net/ipv4/ip_output.c:130 [inline] __ip_queue_xmit+0x118c/0x1b80 net/ipv4/ip_output.c:536 __tcp_transmit_skb+0x2544/0x3b30 net/ipv4/tcp_output.c:1466 tcp_transmit_skb net/ipv4/tcp_output.c:1484 [inline] tcp_mtu_probe net/ipv4/tcp_output.c:2547 [inline] tcp_write_xmit+0x641d/0x6bf0 net/ipv4/tcp_output.c:2752 __tcp_push_pending_frames+0x9b/0x360 net/ipv4/tcp_output.c:3015 tcp_push_pending_frames include/net/tcp.h:2107 [inline] tcp_data_snd_check net/ipv4/tcp_input.c:5714 [inline] tcp_rcv_established+0x1026/0x2020 net/ipv4/tcp_input.c:6239 tcp_v4_do_rcv+0x96d/0xc70 net/ipv4/tcp_ipv4.c:1915 sk_backlog_rcv include/net/sock.h:1113 [inline] __release_sock+0x214/0x350 net/core/sock.c:3072 release_sock+0x61/0x1f0 net/core/sock.c:3626 mptcp_push_release net/mptcp/protocol.c:1486 [inline] __mptcp_push_pending+0x6b5/0x9f0 net/mptcp/protocol.c:1625 mptcp_sendmsg+0x10bb/0x1b10 net/mptcp/protocol.c:1903 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2603 ___sys_sendmsg net/socket.c:2657 [inline] __sys_sendmsg+0x2aa/0x390 net/socket.c:2686 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fb06e9317f9 Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe2cfd4f98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fb06e97f468 RCX: 00007fb06e9317f9 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000005 RBP: 00007fb06e97f446 R08: 0000555500000000 R09: 0000555500000000 R10: 0000555500000000 R11: 0000000000000246 R12: 00007fb06e97f406 R13: 0000000000000001 R14: 00007ffe2cfd4fe0 R15: 0000000000000003 </TASK> Additionally syzkaller provided a nice reproducer. The repro enables pmtu on the loopback device, leading to tcp_mtu_probe() generating very large probe packets. tcp_can_coalesce_send_queue_head() currently does not check for mptcp-level invariants, and allowed the creation of cross-DSS probes, leading to the mentioned corruption. Address the issue teaching tcp_can_coalesce_send_queue_head() about mptcp using the tcp_skb_can_collapse(), also reducing the code duplication. Fixes: 85712484110d ("tcp: coalesce/collapse must respect MPTCP extensions") Cc: stable(a)vger.kernel.org Reported-by: syzbot+d1bff73460e33101f0e7(a)syzkaller.appspotmail.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/513 Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Acked-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-2-c6fb8e93e55… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4fd746bd4d54..68804fd01daf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2342,10 +2342,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor) || - tcp_has_tx_tstamp(skb) || - !skb_pure_zcopy_same(skb, next) || - skb_frags_readable(skb) != skb_frags_readable(next)) + if (tcp_has_tx_tstamp(skb) || !tcp_skb_can_collapse(skb, next)) return false; len -= skb->len;

11 months, 1 week

3
2
0 0

[PATCH 6.1.y 5.15.y] drm/shmem-helper: Fix BUG_ON() on mmap(PROT_WRITE, MAP_PRIVATE)

by Sherry Yang

From: "Wachowski, Karol" <karol.wachowski(a)intel.com> commit 39bc27bd688066a63e56f7f64ad34fae03fbe3b8 upstream. Lack of check for copy-on-write (COW) mapping in drm_gem_shmem_mmap allows users to call mmap with PROT_WRITE and MAP_PRIVATE flag causing a kernel panic due to BUG_ON in vmf_insert_pfn_prot: BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); Return -EINVAL early if COW mapping is detected. This bug affects all drm drivers using default shmem helpers. It can be reproduced by this simple example: void *ptr = mmap(0, size, PROT_WRITE, MAP_PRIVATE, fd, mmap_offset); ptr[0] = 0; Fixes: 2194a63a818d ("drm: Add library for shmem backed GEM objects") Cc: Noralf Trønnes <noralf(a)tronnes.org> Cc: Eric Anholt <eric(a)anholt.net> Cc: Rob Herring <robh(a)kernel.org> Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com> Cc: Maxime Ripard <mripard(a)kernel.org> Cc: Thomas Zimmermann <tzimmermann(a)suse.de> Cc: David Airlie <airlied(a)gmail.com> Cc: Daniel Vetter <daniel(a)ffwll.ch> Cc: dri-devel(a)lists.freedesktop.org Cc: <stable(a)vger.kernel.org> # v5.2+ Signed-off-by: Wachowski, Karol <karol.wachowski(a)intel.com> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz(a)linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20240520100514.925681-1-jacek… Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> [Sherry: bp to fix CVE-2024-39497, ignore context change due to missing commit 21aa27ddc582 ("drm/shmem-helper: Switch to reservation lock")] Signed-off-by: Sherry Yang <sherry.yang(a)oracle.com> --- drivers/gpu/drm/drm_gem_shmem_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index e33f06bb66eb..fb8093577245 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -638,6 +638,9 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct return ret; } + if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + ret = drm_gem_shmem_get_pages(shmem); if (ret) return ret; -- 2.46.0

11 months, 1 week

2
1
0 0

[PATCH 5.15, 5.10, 5.4, 4.19 1/1] KVM: Fix a data race on last_boosted_vcpu in kvm_vcpu_on_spin()

by Saeed Mirzamohammadi

From: Breno Leitao <leitao(a)debian.org> commit 49f683b41f28918df3e51ddc0d928cb2e934ccdb upstream. Use {READ,WRITE}_ONCE() to access kvm->last_boosted_vcpu to ensure the loads and stores are atomic. In the extremely unlikely scenario the compiler tears the stores, it's theoretically possible for KVM to attempt to get a vCPU using an out-of-bounds index, e.g. if the write is split into multiple 8-bit stores, and is paired with a 32-bit load on a VM with 257 vCPUs: CPU0 CPU1 last_boosted_vcpu = 0xff; (last_boosted_vcpu = 0x100) last_boosted_vcpu[15:8] = 0x01; i = (last_boosted_vcpu = 0x1ff) last_boosted_vcpu[7:0] = 0x00; vcpu = kvm->vcpu_array[0x1ff]; As detected by KCSAN: BUG: KCSAN: data-race in kvm_vcpu_on_spin [kvm] / kvm_vcpu_on_spin [kvm] write to 0xffffc90025a92344 of 4 bytes by task 4340 on cpu 16: kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4112) kvm handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:? arch/x86/kvm/vmx/vmx.c:6606) kvm_intel vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890) __x64_sys_ioctl (fs/ioctl.c:890) x64_sys_call (arch/x86/entry/syscall_64.c:33) do_syscall_64 (arch/x86/entry/common.c:?) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) read to 0xffffc90025a92344 of 4 bytes by task 4342 on cpu 4: kvm_vcpu_on_spin (arch/x86/kvm/../../../virt/kvm/kvm_main.c:4069) kvm handle_pause (arch/x86/kvm/vmx/vmx.c:5929) kvm_intel vmx_handle_exit (arch/x86/kvm/vmx/vmx.c:? arch/x86/kvm/vmx/vmx.c:6606) kvm_intel vcpu_run (arch/x86/kvm/x86.c:11107 arch/x86/kvm/x86.c:11211) kvm kvm_arch_vcpu_ioctl_run (arch/x86/kvm/x86.c:?) kvm kvm_vcpu_ioctl (arch/x86/kvm/../../../virt/kvm/kvm_main.c:?) kvm __se_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:904 fs/ioctl.c:890) __x64_sys_ioctl (fs/ioctl.c:890) x64_sys_call (arch/x86/entry/syscall_64.c:33) do_syscall_64 (arch/x86/entry/common.c:?) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) value changed: 0x00000012 -> 0x00000000 Fixes: 217ece6129f2 ("KVM: use yield_to instead of sleep in kvm_vcpu_on_spin") Cc: stable(a)vger.kernel.org # 4.19+ Signed-off-by: Breno Leitao <leitao(a)debian.org> Link: https://lore.kernel.org/r/20240510092353.2261824-1-leitao@debian.org Signed-off-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> (cherry picked from commit 92c77807d938145c7c3350c944ef9f39d7f6017c) Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi(a)oracle.com> --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 801ab0635bc32..8734d5bbd951e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3525,12 +3525,13 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; - int last_boosted_vcpu = me->kvm->last_boosted_vcpu; + int last_boosted_vcpu; int yielded = 0; int try = 3; int pass; int i; + last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu); kvm_vcpu_set_in_spin_loop(me, true); /* * We boost the priority of a VCPU that is runnable but not @@ -3561,7 +3562,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) yielded = kvm_vcpu_yield_to(vcpu); if (yielded > 0) { - kvm->last_boosted_vcpu = i; + WRITE_ONCE(kvm->last_boosted_vcpu, i); break; } else if (yielded < 0) { try--; -- 2.46.0

11 months, 1 week

2
1
0 0

FAILED: patch "[PATCH] maple_tree: correct tree corruption on spanning store" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x bea07fd63192b61209d48cbb81ef474cc3ee4c62 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101840-army-handstand-92f8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From bea07fd63192b61209d48cbb81ef474cc3ee4c62 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Date: Mon, 7 Oct 2024 16:28:32 +0100 Subject: [PATCH] maple_tree: correct tree corruption on spanning store Patch series "maple_tree: correct tree corruption on spanning store", v3. There has been a nasty yet subtle maple tree corruption bug that appears to have been in existence since the inception of the algorithm. This bug seems far more likely to happen since commit f8d112a4e657 ("mm/mmap: avoid zeroing vma tree in mmap_region()"), which is the point at which reports started to be submitted concerning this bug. We were made definitely aware of the bug thanks to the kind efforts of Bert Karwatzki who helped enormously in my being able to track this down and identify the cause of it. The bug arises when an attempt is made to perform a spanning store across two leaf nodes, where the right leaf node is the rightmost child of the shared parent, AND the store completely consumes the right-mode node. This results in mas_wr_spanning_store() mitakenly duplicating the new and existing entries at the maximum pivot within the range, and thus maple tree corruption. The fix patch corrects this by detecting this scenario and disallowing the mistaken duplicate copy. The fix patch commit message goes into great detail as to how this occurs. This series also includes a test which reliably reproduces the issue, and asserts that the fix works correctly. Bert has kindly tested the fix and confirmed it resolved his issues. Also Mikhail Gavrilov kindly reported what appears to be precisely the same bug, which this fix should also resolve. This patch (of 2): There has been a subtle bug present in the maple tree implementation from its inception. This arises from how stores are performed - when a store occurs, it will overwrite overlapping ranges and adjust the tree as necessary to accommodate this. A range may always ultimately span two leaf nodes. In this instance we walk the two leaf nodes, determine which elements are not overwritten to the left and to the right of the start and end of the ranges respectively and then rebalance the tree to contain these entries and the newly inserted one. This kind of store is dubbed a 'spanning store' and is implemented by mas_wr_spanning_store(). In order to reach this stage, mas_store_gfp() invokes mas_wr_preallocate(), mas_wr_store_type() and mas_wr_walk() in turn to walk the tree and update the object (mas) to traverse to the location where the write should be performed, determining its store type. When a spanning store is required, this function returns false stopping at the parent node which contains the target range, and mas_wr_store_type() marks the mas->store_type as wr_spanning_store to denote this fact. When we go to perform the store in mas_wr_spanning_store(), we first determine the elements AFTER the END of the range we wish to store (that is, to the right of the entry to be inserted) - we do this by walking to the NEXT pivot in the tree (i.e. r_mas.last + 1), starting at the node we have just determined contains the range over which we intend to write. We then turn our attention to the entries to the left of the entry we are inserting, whose state is represented by l_mas, and copy these into a 'big node', which is a special node which contains enough slots to contain two leaf node's worth of data. We then copy the entry we wish to store immediately after this - the copy and the insertion of the new entry is performed by mas_store_b_node(). After this we copy the elements to the right of the end of the range which we are inserting, if we have not exceeded the length of the node (i.e. r_mas.offset <= r_mas.end). Herein lies the bug - under very specific circumstances, this logic can break and corrupt the maple tree. Consider the following tree: Height 0 Root Node / \ pivot = 0xffff / \ pivot = ULONG_MAX / \ 1 A [-----] ... / \ pivot = 0x4fff / \ pivot = 0xffff / \ 2 (LEAVES) B [-----] [-----] C ^--- Last pivot 0xffff. Now imagine we wish to store an entry in the range [0x4000, 0xffff] (note that all ranges expressed in maple tree code are inclusive): 1. mas_store_gfp() descends the tree, finds node A at <=0xffff, then determines that this is a spanning store across nodes B and C. The mas state is set such that the current node from which we traverse further is node A. 2. In mas_wr_spanning_store() we try to find elements to the right of pivot 0xffff by searching for an index of 0x10000: - mas_wr_walk_index() invokes mas_wr_walk_descend() and mas_wr_node_walk() in turn. - mas_wr_node_walk() loops over entries in node A until EITHER it finds an entry whose pivot equals or exceeds 0x10000 OR it reaches the final entry. - Since no entry has a pivot equal to or exceeding 0x10000, pivot 0xffff is selected, leading to node C. - mas_wr_walk_traverse() resets the mas state to traverse node C. We loop around and invoke mas_wr_walk_descend() and mas_wr_node_walk() in turn once again. - Again, we reach the last entry in node C, which has a pivot of 0xffff. 3. We then copy the elements to the left of 0x4000 in node B to the big node via mas_store_b_node(), and insert the new [0x4000, 0xffff] entry too. 4. We determine whether we have any entries to copy from the right of the end of the range via - and with r_mas set up at the entry at pivot 0xffff, r_mas.offset <= r_mas.end, and then we DUPLICATE the entry at pivot 0xffff. 5. BUG! The maple tree is corrupted with a duplicate entry. This requires a very specific set of circumstances - we must be spanning the last element in a leaf node, which is the last element in the parent node. spanning store across two leaf nodes with a range that ends at that shared pivot. A potential solution to this problem would simply be to reset the walk each time we traverse r_mas, however given the rarity of this situation it seems that would be rather inefficient. Instead, this patch detects if the right hand node is populated, i.e. has anything we need to copy. We do so by only copying elements from the right of the entry being inserted when the maximum value present exceeds the last, rather than basing this on offset position. The patch also updates some comments and eliminates the unused bool return value in mas_wr_walk_index(). The work performed in commit f8d112a4e657 ("mm/mmap: avoid zeroing vma tree in mmap_region()") seems to have made the probability of this event much more likely, which is the point at which reports started to be submitted concerning this bug. The motivation for this change arose from Bert Karwatzki's report of encountering mm instability after the release of kernel v6.12-rc1 which, after the use of CONFIG_DEBUG_VM_MAPLE_TREE and similar configuration options, was identified as maple tree corruption. After Bert very generously provided his time and ability to reproduce this event consistently, I was able to finally identify that the issue discussed in this commit message was occurring for him. Link: https://lkml.kernel.org/r/cover.1728314402.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/48b349a2a0f7c76e18772712d0997a5e12ab0a3b.17283144… Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Reported-by: Bert Karwatzki <spasswolf(a)web.de> Closes: https://lore.kernel.org/all/20241001023402.3374-1-spasswolf@web.de/ Tested-by: Bert Karwatzki <spasswolf(a)web.de> Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com> Closes: https://lore.kernel.org/all/CABXGCsOPwuoNOqSMmAvWO2Fz4TEmPnjFj-b7iF+XFRu1h7… Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Reviewed-by: Liam R. Howlett <Liam.Howlett(a)Oracle.com> Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com> Reviewed-by: Wei Yang <richard.weiyang(a)gmail.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Sidhartha Kumar <sidhartha.kumar(a)oracle.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ce7c7a7a8258..3619301dda2e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -2196,6 +2196,8 @@ static inline void mas_node_or_none(struct ma_state *mas, /* * mas_wr_node_walk() - Find the correct offset for the index in the @mas. + * If @mas->index cannot be found within the containing + * node, we traverse to the last entry in the node. * @wr_mas: The maple write state * * Uses mas_slot_locked() and does not need to worry about dead nodes. @@ -3532,7 +3534,7 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas) return true; } -static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) +static void mas_wr_walk_index(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; @@ -3541,11 +3543,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) wr_mas->content = mas_slot_locked(mas, wr_mas->slots, mas->offset); if (ma_is_leaf(wr_mas->type)) - return true; + return; mas_wr_walk_traverse(wr_mas); - } - return true; } /* * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. @@ -3765,8 +3765,8 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) memset(&b_node, 0, sizeof(struct maple_big_node)); /* Copy l_mas and store the value in b_node. */ mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); - /* Copy r_mas into b_node. */ - if (r_mas.offset <= r_mas.end) + /* Copy r_mas into b_node if there is anything to copy. */ + if (r_mas.max > r_mas.last) mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, &b_node, b_node.b_end + 1); else

11 months, 1 week

3
2
0 0

[PATCH 2/4] xhci: Mitigate failed set dequeue pointer commands

by Mathias Nyman

Avoid xHC host from processing a cancelled URB by always turning cancelled URB TDs into no-op TRBs before queuing a 'Set TR Deq' command. If the command fails then xHC will start processing the cancelled TD instead of skipping it once endpoint is restarted, causing issues like Babble error. This is not a complete solution as a failed 'Set TR Deq' command does not guarantee xHC TRB caches are cleared. Fixes: 4db356924a50 ("xhci: turn cancelled td cleanup to its own function") Cc: stable(a)vger.kernel.org Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com> --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 4d664ba53fe9..7dedf31bbddd 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1023,7 +1023,7 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) td_to_noop(xhci, ring, cached_td, false); cached_td->cancel_status = TD_CLEARED; } - + td_to_noop(xhci, ring, td, false); td->cancel_status = TD_CLEARING_CACHE; cached_td = td; break; -- 2.25.1

11 months, 1 week

2
4
0 0

[PATCH v2 08/13] media: ar0521: don't overflow when checking PLL values

by Mauro Carvalho Chehab

The PLL checks are comparing 64 bit integers with 32 bit ones, as reported by Coverity. Depending on the values of the variables, this may underflow. Fix it ensuring that both sides of the expression are u64. Fixes: 852b50aeed15 ("media: On Semi AR0521 sensor driver") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> Acked-by: Sakari Ailus <sakari.ailus(a)linux.intel.com> --- drivers/media/i2c/ar0521.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/ar0521.c b/drivers/media/i2c/ar0521.c index fc27238dd4d3..24873149096c 100644 --- a/drivers/media/i2c/ar0521.c +++ b/drivers/media/i2c/ar0521.c @@ -255,10 +255,10 @@ static u32 calc_pll(struct ar0521_dev *sensor, u32 freq, u16 *pre_ptr, u16 *mult continue; /* Minimum value */ if (new_mult > 254) break; /* Maximum, larger pre won't work either */ - if (sensor->extclk_freq * (u64)new_mult < AR0521_PLL_MIN * + if (sensor->extclk_freq * (u64)new_mult < (u64)AR0521_PLL_MIN * new_pre) continue; - if (sensor->extclk_freq * (u64)new_mult > AR0521_PLL_MAX * + if (sensor->extclk_freq * (u64)new_mult > (u64)AR0521_PLL_MAX * new_pre) break; /* Larger pre won't work either */ new_pll = div64_round_up(sensor->extclk_freq * (u64)new_mult, -- 2.47.0

11 months, 1 week

2
1
0 0

[PATCH 5.10 0/1] iommu/amd: Prepare for multiple DMA domain types

by Daniil Dulov

Svacer reports possible dereference of a NULL-pointer in amd_iommu_probe_finalize(). The problem is present in 5.10 stable release and can be fixed by the following upstream patch. In order to apply this patch, the incoming changes had to be manually accepted. This action was necessary due to some differences in the code of amd_iommu_probe_finalize() of the upstream version and 5.10 version of the kernel.

11 months, 1 week

2
2
0 0

[PATCH 6.6 000/211] 6.6.57-rc2 review

by Greg Kroah-Hartman

This is the start of the stable review cycle for the 6.6.57 release. There are 211 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Thu, 17 Oct 2024 11:22:41 +0000. Anything received after that time might be too late. The whole patch series can be found in one patch at: https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.6.57-rc2… or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.6.y and the diffstat can be found below. thanks, greg k-h ------------- Pseudo-Shortlog of commits: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Linux 6.6.57-rc2 Johan Hovold <johan+linaro(a)kernel.org> scsi: Revert "scsi: sd: Do not repeat the starting disk message" Vitaly Lifshits <vitaly.lifshits(a)intel.com> e1000e: fix force smbus during suspend flow Linus Walleij <linus.walleij(a)linaro.org> net: ethernet: cortina: Restore TSO support Patrick Roy <roypat(a)amazon.co.uk> secretmem: disable memfd_secret() if arch cannot set direct map Alexander Gordeev <agordeev(a)linux.ibm.com> fs/proc/kcore.c: allow translation of physical memory addresses Frederic Weisbecker <frederic(a)kernel.org> kthread: unpark only parked kthread Luca Stefani <luca.stefani.ge1(a)gmail.com> btrfs: split remaining space to discard in chunks Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> selftests/rseq: Fix mm_cid test failure Donet Tom <donettom(a)linux.ibm.com> selftests/mm: fix incorrect buffer->mirror size in hmm2 double_map test Zhang Rui <rui.zhang(a)intel.com> powercap: intel_rapl_tpmi: Fix bogus register reading Yonatan Maman <Ymaman(a)Nvidia.com> nouveau/dmem: Fix vulnerability in migrate_to_ram upon copy error Kun(llfl) <llfl(a)linux.alibaba.com> device-dax: correct pgoff align in dax_set_mapping() Matthieu Baerts (NGI0) <matttbe(a)kernel.org> mptcp: pm: do not remove closing subflows Paolo Abeni <pabeni(a)redhat.com> mptcp: handle consistently DSS corruption Christian Marangi <ansuelsmth(a)gmail.com> net: phy: Remove LED entry from LEDs list on unregister Anatolij Gustschin <agust(a)denx.de> net: dsa: lan9303: ensure chip reset and wait for READY status Anastasia Kovaleva <a.kovaleva(a)yadro.com> net: Fix an unsafe loop on the list Ignat Korchagin <ignat(a)cloudflare.com> net: explicitly clear the sk pointer, when pf->create fails Niklas Cassel <cassel(a)kernel.org> ata: libata: avoid superfluous disk spin down + spin up during hibernation Matthieu Baerts (NGI0) <matttbe(a)kernel.org> mptcp: fallback when MPTCP opts are dropped after 1st data Avri Altman <avri.altman(a)wdc.com> scsi: ufs: Use pre-calculated offsets in ufshcd_init_lrb() Daniel Palmer <daniel(a)0x0f.com> scsi: wd33c93: Don't use stale scsi_pointer value Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com> Bluetooth: hci_conn: Fix UAF in hci_enhanced_setup_sync Jani Nikula <jani.nikula(a)intel.com> drm/i915/hdcp: fix connector refcounting Maíra Canal <mcanal(a)igalia.com> drm/vc4: Stop the active perfmon before being destroyed Maíra Canal <mcanal(a)igalia.com> drm/v3d: Stop the active perfmon before being destroyed SurajSonawane2415 <surajsonawane0215(a)gmail.com> hid: intel-ish-hid: Fix uninitialized variable 'rv' in ish_fw_xfer_direct_dma John Keeping <jkeeping(a)inmusicbrands.com> usb: gadget: core: force synchronous registration Icenowy Zheng <uwu(a)icenowy.me> usb: storage: ignore bogus device raised by JieLi BR21 USB sound chip Jose Alberto Reguero <jose.alberto.reguero(a)gmail.com> usb: xhci: Fix problem with xhci resume from suspend Selvarasu Ganesan <selvarasu.g(a)samsung.com> usb: dwc3: core: Stop processing of pending events if controller is halted Oliver Neukum <oneukum(a)suse.com> Revert "usb: yurex: Replace snprintf() with the safer scnprintf() variant" Wade Wang <wade.wang(a)hp.com> HID: plantronics: Workaround for an unexcepted opposite volume key He Lugang <helugang(a)uniontech.com> HID: multitouch: Add support for lenovo Y9000P Touchpad Basavaraj Natikar <Basavaraj.Natikar(a)amd.com> HID: amd_sfh: Switch to device-managed dmam_alloc_coherent() Javier Carrasco <javier.carrasco.cruz(a)gmail.com> hwmon: (adt7470) Add missing dependency on REGMAP_I2C Javier Carrasco <javier.carrasco.cruz(a)gmail.com> hwmon: (adm9240) Add missing dependency on REGMAP_I2C Javier Carrasco <javier.carrasco.cruz(a)gmail.com> hwmon: (mc34vr500) Add missing dependency on REGMAP_I2C Guenter Roeck <linux(a)roeck-us.net> hwmon: (tmp513) Add missing dependency on REGMAP_I2C Peter Colberg <peter.colberg(a)intel.com> hwmon: intel-m10-bmc-hwmon: relabel Columbiaville to CVL Die Temperature Kenton Groombridge <concord(a)gentoo.org> wifi: mac80211: Avoid address calculations via out of bounds array indexing Luke D. Jones <luke(a)ljones.dev> hid-asus: add ROG Ally X prod ID to quirk list Luke D. Jones <luke(a)ljones.dev> HID: asus: add ROG Z13 lightbar Luke D. Jones <luke(a)ljones.dev> HID: asus: add ROG Ally N-Key ID and keycodes Kai-Heng Feng <kai.heng.feng(a)canonical.com> HID: i2c-hid: Skip SET_POWER SLEEP for Cirque touchpad on system suspend Hans de Goede <hdegoede(a)redhat.com> HID: i2c-hid: Renumber I2C_HID_QUIRK_ defines Hans de Goede <hdegoede(a)redhat.com> HID: i2c-hid: Remove I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV quirk Johannes Roith <johannes(a)gnu-linux.rocks> HID: mcp2200: added driver for GPIOs of MCP2200 Frederic Weisbecker <frederic(a)kernel.org> rcu/nocb: Fix rcuog wake-up from offline softirq Frederic Weisbecker <frederic(a)kernel.org> rcu/nocb: Make IRQs disablement symmetric Eric Dumazet <edumazet(a)google.com> slip: make slhc_remember() more robust against malicious packets Eric Dumazet <edumazet(a)google.com> ppp: fix ppp_async_encode() illegal access Kuniyuki Iwashima <kuniyu(a)amazon.com> phonet: Handle error of rtnl_register_module(). Eric Dumazet <edumazet(a)google.com> phonet: no longer hold RTNL in route_dumpit() Kuniyuki Iwashima <kuniyu(a)amazon.com> mpls: Handle error of rtnl_register_module(). Eric Dumazet <edumazet(a)google.com> mpls: no longer hold RTNL in mpls_netconf_dump_devconf() Eric Dumazet <edumazet(a)google.com> rtnetlink: add RTNL_FLAG_DUMP_UNLOCKED flag Eric Dumazet <edumazet(a)google.com> rtnetlink: change nlk->cb_mutex role Kuniyuki Iwashima <kuniyu(a)amazon.com> mctp: Handle error of rtnl_register_module(). Kuniyuki Iwashima <kuniyu(a)amazon.com> bridge: Handle error of rtnl_register_module(). Kuniyuki Iwashima <kuniyu(a)amazon.com> vxlan: Handle error of rtnl_register_module(). Kuniyuki Iwashima <kuniyu(a)amazon.com> rtnetlink: Add bulk registration helpers for rtnetlink message handlers. Eric Dumazet <edumazet(a)google.com> net: do not delay dst_entries_add() in dst_release() Florian Westphal <fw(a)strlen.de> netfilter: fib: check correct rtable in vrf setups Florian Westphal <fw(a)strlen.de> netfilter: xtables: avoid NFPROTO_UNSPEC where needed Xin Long <lucien.xin(a)gmail.com> sctp: ensure sk_state is set to CLOSED if hashing fails in sctp_listen_start Filipe Manana <fdmanana(a)suse.com> btrfs: zoned: fix missing RCU locking in error message when loading zone info Rosen Penev <rosenp(a)gmail.com> net: ibm: emac: mal: fix wrong goto Eric Dumazet <edumazet(a)google.com> net/sched: accept TCA_STAB only for root qdisc Vitaly Lifshits <vitaly.lifshits(a)intel.com> e1000e: change I219 (19) devices to ADP Mohamed Khalfella <mkhalfella(a)purestorage.com> igb: Do not bring the device up after non-fatal error Aleksandr Loktionov <aleksandr.loktionov(a)intel.com> i40e: Fix macvlan leak by synchronizing access to mac_filter_hash Wojciech Drewek <wojciech.drewek(a)intel.com> ice: Flush FDB entries before reset Michal Swiatkowski <michal.swiatkowski(a)linux.intel.com> ice: rename switchdev to eswitch Marcin Szycik <marcin.szycik(a)linux.intel.com> ice: Fix netif_is_ice() in Safe Mode Zhang Rui <rui.zhang(a)intel.com> powercap: intel_rapl_tpmi: Ignore minor version change Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com> platform/x86/intel/tpmi: Add defines to get version information Billy Tsai <billy_tsai(a)aspeedtech.com> gpio: aspeed: Use devm_clk api to manage clock source Billy Tsai <billy_tsai(a)aspeedtech.com> gpio: aspeed: Add the flush write to ensure the write complete. Yonatan Maman <Ymaman(a)Nvidia.com> nouveau/dmem: Fix privileged error in copy engine channel Ben Skeggs <bskeggs(a)nvidia.com> drm/nouveau: pass cli to nouveau_channel_new() instead of drm+device Jonas Gorski <jonas.gorski(a)gmail.com> net: dsa: b53: fix jumbo frames on 10/100 ports Jonas Gorski <jonas.gorski(a)gmail.com> net: dsa: b53: allow lower MTUs on BCM5325/5365 Jonas Gorski <jonas.gorski(a)gmail.com> net: dsa: b53: fix max MTU for BCM5325/BCM5365 Jonas Gorski <jonas.gorski(a)gmail.com> net: dsa: b53: fix max MTU for 1g switches Jonas Gorski <jonas.gorski(a)gmail.com> net: dsa: b53: fix jumbo frame mtu check Christophe JAILLET <christophe.jaillet(a)wanadoo.fr> net: ethernet: adi: adin1110: Fix some error handling path in adin1110_read_fifo() Jakub Kicinski <kuba(a)kernel.org> Revert "net: stmmac: set PP_FLAG_DMA_SYNC_DEV only if XDP is enabled" Zhang Rui <rui.zhang(a)intel.com> thermal: intel: int340x: processor: Fix warning during module unload Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com> thermal: int340x: processor_thermal: Set feature mask before proc_thermal_add Christophe JAILLET <christophe.jaillet(a)wanadoo.fr> net: phy: bcm84881: Fix some error handling paths Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com> Bluetooth: RFCOMM: FIX possible deadlock in rfcomm_sk_state_change Kacper Ludwinski <kac.ludwinski(a)icloud.com> selftests: net: no_forwarding: fix VID for $swp2 in one_bridge_two_pvids() test Andy Roulin <aroulin(a)nvidia.com> netfilter: br_netfilter: fix panic with metadata_dst skb David Howells <dhowells(a)redhat.com> rxrpc: Fix uninitialised variable in rxrpc_send_data() Neal Cardwell <ncardwell(a)google.com> tcp: fix TFO SYN_RECV to not zero retrans_stamp with retransmits out Aananth V <aananthv(a)google.com> tcp: new TCP_INFO stats for RTO events Neal Cardwell <ncardwell(a)google.com> tcp: fix tcp_enter_recovery() to zero retrans_stamp when it's safe Neal Cardwell <ncardwell(a)google.com> tcp: fix to allow timestamp undo if no retransmits were sent Ingo van Lil <inguin(a)gmx.de> net: phy: dp83869: fix memory corruption when enabling fiber Yanjun Zhang <zhangyanjun(a)cestc.cn> NFSv4: Prevent NULL-pointer dereference in nfs42_complete_copies() Dan Carpenter <dan.carpenter(a)linaro.org> SUNRPC: Fix integer overflow in decode_rc_list() Dave Ertman <david.m.ertman(a)intel.com> ice: fix VLAN replay after reset Michal Swiatkowski <michal.swiatkowski(a)linux.intel.com> ice: set correct dst VSI in only LAN filters Chuck Lever <chuck.lever(a)oracle.com> NFSD: Mark filecache "down" if init fails Shyam Sundar S K <Shyam-sundar.S-k(a)amd.com> x86/amd_nb: Add new PCI IDs for AMD family 1Ah model 60h Shyam Sundar S K <Shyam-sundar.S-k(a)amd.com> x86/amd_nb: Add new PCI IDs for AMD family 0x1a Andrey Shumilin <shum.sdl(a)nppct.ru> fbdev: sisfb: Fix strbuf array overflow Enzo Matsumiya <ematsumiya(a)suse.de> smb: client: fix UAF in async decryption Qianqiang Liu <qianqiang.liu(a)163.com> fbcon: Fix a NULL pointer dereference issue in fbcon_putcs Alex Hung <alex.hung(a)amd.com> drm/amd/display: Check null pointer before dereferencing se Justin Tee <justin.tee(a)broadcom.com> scsi: lpfc: Ensure DA_ID handling completion before deleting an NPIV instance Justin Tee <justin.tee(a)broadcom.com> scsi: lpfc: Add ELS_RSP cmd to the list of WQEs to flush in lpfc_els_flush_cmd() Zijun Hu <quic_zijuhu(a)quicinc.com> driver core: bus: Return -EIO instead of 0 when show/store invalid bus attribute Zijun Hu <quic_zijuhu(a)quicinc.com> driver core: bus: Fix double free in driver API bus_register() Riyan Dhiman <riyandhiman14(a)gmail.com> staging: vme_user: added bound check to geoid Zhu Jun <zhujun2(a)cmss.chinamobile.com> tools/iio: Add memory allocation failure check for trigger_name Philip Chen <philipchen(a)chromium.org> virtio_pmem: Check device status before requesting flush Simon Horman <horms(a)kernel.org> netfilter: nf_reject: Fix build warning when CONFIG_BRIDGE_NETFILTER=n Florian Westphal <fw(a)strlen.de> netfilter: nf_nat: don't try nat source port reallocation for reverse dir clash Wentao Guan <guanwentao(a)uniontech.com> LoongArch: Fix memleak in pci_acpi_scan_root() Ruffalo Lavoisier <ruffalolavoisier(a)gmail.com> comedi: ni_routing: tools: Check when the file could not be opened Shawn Shao <shawn.shao(a)jaguarmicro.com> usb: dwc2: Adjust the timing of USB Driver Interrupt Registration in the Crashkernel Scenario Xu Yang <xu.yang_2(a)nxp.com> usb: chipidea: udc: enable suspend interrupt after usb reset Wadim Egorov <w.egorov(a)phytec.de> usb: typec: tipd: Free IRQ only if it was requested before Jiri Slaby (SUSE) <jirislaby(a)kernel.org> serial: protect uart_port_dtr_rts() in uart_shutdown() too Peng Fan <peng.fan(a)nxp.com> clk: imx: Remove CLK_SET_PARENT_GATE for DRAM mux for i.MX7D Peng Fan <peng.fan(a)nxp.com> remoteproc: imx_rproc: Use imx specific hook for find_loaded_rsc_table Yunke Cao <yunkec(a)chromium.org> media: videobuf2-core: clear memory related fields in __vb2_plane_dmabuf_put() Ying Sun <sunying(a)isrc.iscas.ac.cn> riscv/kexec_file: Fix relocation type R_RISCV_ADD16 and R_RISCV_SUB16 unknown Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com> soundwire: cadence: re-check Peripheral status with delayed_work Michael Guralnik <michaelgur(a)nvidia.com> RDMA/mlx5: Enforce umem boundaries for explicit ODP page faults Jisheng Zhang <jszhang(a)kernel.org> riscv: avoid Imbalance in RAS Hans de Goede <hdegoede(a)redhat.com> mfd: intel_soc_pmic_chtwc: Make Lenovo Yoga Tab 3 X90F DMI match less strict Kaixin Wang <kxwang23(a)m.fudan.edu.cn> ntb: ntb_hw_switchtec: Fix use after free vulnerability in switchtec_ntb_remove due to race condition Jens Axboe <axboe(a)kernel.dk> io_uring: check if we need to reschedule during overflow flush Palmer Dabbelt <palmer(a)rivosinc.com> RISC-V: Don't have MAX_PHYSMEM_BITS exceed phys_addr_t Kaixin Wang <kxwang23(a)m.fudan.edu.cn> i3c: master: cdns: Fix use after free vulnerability in cdns_i3c_master Driver Due to Race Condition Alex Williamson <alex.williamson(a)redhat.com> PCI: Mark Creative Labs EMU20k2 INTx masking as broken Hans de Goede <hdegoede(a)redhat.com> i2c: i801: Use a different adapter-name for IDF adapters Subramanian Ananthanarayanan <quic_skananth(a)quicinc.com> PCI: Add ACS quirk for Qualcomm SA8775P Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> clk: bcm: bcm53573: fix OF node leak in init Md Haris Iqbal <haris.iqbal(a)ionos.com> RDMA/rtrs-srv: Avoid null pointer deref during path establishment WangYuli <wangyuli(a)uniontech.com> PCI: Add function 0 DMA alias quirk for Glenfly Arise chip Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com> soundwire: intel_bus_common: enable interrupts before exiting reset Saravanan Vajravel <saravanan.vajravel(a)broadcom.com> RDMA/mad: Improve handling of timed out WRs of mad agent Daniel Jordan <daniel.m.jordan(a)oracle.com> ktest.pl: Avoid false positives with grub2 skip regex Xu Kuohai <xukuohai(a)huawei.com> bpf: Prevent tail call between progs attached to different hooks Thomas Richter <tmricht(a)linux.ibm.com> s390/cpum_sf: Remove WARN_ON_ONCE statements Wojciech Gładysz <wojciech.gladysz(a)infogain.com> ext4: nested locking for xattr inode Jan Kara <jack(a)suse.cz> ext4: don't set SB_RDONLY after filesystem errors Yonghong Song <yonghong.song(a)linux.dev> bpf, x64: Fix a jit convergence issue Gerald Schaefer <gerald.schaefer(a)linux.ibm.com> s390/mm: Add cond_resched() to cmm_alloc/free_pages() Heiko Carstens <hca(a)linux.ibm.com> s390/facility: Disable compile time optimization for decompressor code Tao Chen <chen.dylane(a)gmail.com> bpf: Check percpu map value size first Daniel Borkmann <daniel(a)iogearbox.net> selftests/bpf: Fix ARG_PTR_TO_LONG {half-,}uninitialized test Mathias Krause <minipli(a)grsecurity.net> Input: synaptics-rmi4 - fix UAF of IRQ domain on driver removal Andrey Skvortsov <andrej.skvortzov(a)gmail.com> zram: don't free statically defined names Sergey Senozhatsky <senozhatsky(a)chromium.org> zram: free secondary algorithms names Diogo Jahchan Koike <djahchankoike(a)gmail.com> ntfs3: Change to non-blocking allocation in ntfs_d_hash Michael S. Tsirkin <mst(a)redhat.com> virtio_console: fix misc probe bugs Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com> fs/ntfs3: Refactor enum_rstbl to suppress static checker Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com> fs/ntfs3: Fix sparse warning in ni_fiemap Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com> fs/ntfs3: Do not call file_modified if collapse range failed Alex Hung <alex.hung(a)amd.com> drm/amd/display: Revert "Check HDCP returned status" Wenjing Liu <wenjing.liu(a)amd.com> drm/amd/display: Remove a redundant check in authenticated_dp Paul Menzel <pmenzel(a)molgen.mpg.de> lib/build_OID_registry: avoid non-destructive substitution for Perl < 5.13.2 compat Randy Dunlap <rdunlap(a)infradead.org> jbd2: fix kernel-doc for j_transaction_overhead_buffers Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com> Bluetooth: Fix usage of __hci_cmd_sync_status Benjamin Poirier <bpoirier(a)nvidia.com> selftests: Introduce Makefile variable to list shared bash scripts Benjamin Poirier <bpoirier(a)nvidia.com> selftests: net: Remove executable bits from library scripts Aditya Gupta <adityag(a)linux.ibm.com> libsubcmd: Don't free the usage string Yang Jihong <yangjihong1(a)huawei.com> perf sched: Move curr_pid and cpu_last_switched initialization to perf_sched__{lat|map|replay}() Yang Jihong <yangjihong1(a)huawei.com> perf sched: Move curr_thread initialization to perf_sched__map() Yang Jihong <yangjihong1(a)huawei.com> perf sched: Fix memory leak in perf_sched__map() Yang Jihong <yangjihong1(a)huawei.com> perf sched: Move start_work_mutex and work_done_wait_mutex initialization to perf_sched__replay() Masami Hiramatsu (Google) <mhiramat(a)kernel.org> bootconfig: Fix the kerneldoc of _xbc_exit() Hui Wang <hui.wang(a)canonical.com> e1000e: move force SMBUS near the end of enable_ulp function Tony Nguyen <anthony.l.nguyen(a)intel.com> i40e: Include types.h to some headers Ivan Vecera <ivecera(a)redhat.com> i40e: Fix ST code value for Clause 45 Damien Le Moal <dlemoal(a)kernel.org> scsi: sd: Do not repeat the starting disk message Damien Le Moal <dlemoal(a)kernel.org> scsi: Remove scsi device no_start_on_resume flag Gergo Koteles <soyer(a)irl.hu> ASoC: tas2781: mark dvc_tlv with __maybe_unused Damien Le Moal <dlemoal(a)kernel.org> ata: ahci: Add mask_port_map module parameter Carlos Song <carlos.song(a)nxp.com> spi: spi-fsl-lpspi: remove redundant spi_controller_put call Charlie Jenkins <charlie(a)rivosinc.com> riscv: cpufeature: Fix thead vector hwcap removal Steven Rostedt (Google) <rostedt(a)goodmis.org> tracing: Have saved_cmdlines arrays all in one allocation Xiubo Li <xiubli(a)redhat.com> libceph: init the cursor when preparing sparse read in msgr2 Shannon Nelson <shannon.nelson(a)amd.com> pds_core: no health-thread in VF path Geoff Levand <geoff(a)infradead.org> Revert "powerpc/ps3_defconfig: Disable PPC64_BIG_ENDIAN_ELF_ABI_V2" Manivannan Sadhasivam <mani(a)kernel.org> bus: mhi: ep: Do not allocate memory for MHI objects from DMA zone Manivannan Sadhasivam <mani(a)kernel.org> bus: mhi: ep: Add support for async DMA read operation Manivannan Sadhasivam <mani(a)kernel.org> bus: mhi: ep: Add support for async DMA write operation Manivannan Sadhasivam <mani(a)kernel.org> bus: mhi: ep: Introduce async read/write callbacks Manivannan Sadhasivam <mani(a)kernel.org> bus: mhi: ep: Rename read_from_host() and write_to_host() APIs Rob Clark <robdclark(a)chromium.org> drm/crtc: fix uninitialized variable use even harder Jean-Loïc Charroud <lagiraudiere+linux(a)free.fr> ALSA: hda/realtek: cs35l41: Fix device ID / model name Jean-Loïc Charroud <lagiraudiere+linux(a)free.fr> ALSA: hda/realtek: cs35l41: Fix order and duplicates in quirks table Steven Rostedt (Google) <rostedt(a)goodmis.org> tracing: Remove precision vsnprintf() check from print event Cong Yang <yangcong5(a)huaqin.corp-partner.google.com> drm/panel: boe-tv101wum-nl6: Fine tune Himax83102-j02 panel HFP and HBP (again) Linus Walleij <linus.walleij(a)linaro.org> net: ethernet: cortina: Drop TSO support Song Shuai <songshuaishuai(a)tinylab.org> riscv: Remove SHADOW_OVERFLOW_STACK_SIZE macro Andreas Gruenbacher <agruenba(a)redhat.com> gfs2: Revert "ignore negated quota changes" Andreas Gruenbacher <agruenba(a)redhat.com> gfs2: qd_check_sync cleanups Andreas Gruenbacher <agruenba(a)redhat.com> gfs2: Revert "introduce qd_bh_get_or_undo" Abel Vesa <abel.vesa(a)linaro.org> phy: qualcomm: eusb2-repeater: Rework init to drop redundant zero-out loop Konrad Dybcio <konrad.dybcio(a)linaro.org> phy: qualcomm: phy-qcom-eusb2-repeater: Add tuning overrides Richard Fitzgerald <rf(a)opensource.cirrus.com> ASoC: cs35l56: Load tunings for the correct speaker models Bjorn Helgaas <bhelgaas(a)google.com> Revert "PCI/MSI: Provide stubs for IMS functions" Wei Fang <wei.fang(a)nxp.com> net: fec: don't save PTP state if PTP is unsupported Gabriel Krisman Bertazi <krisman(a)suse.de> unicode: Don't special case ignorable code points ------------- Diffstat: Documentation/dev-tools/kselftest.rst | 12 + Makefile | 4 +- arch/loongarch/pci/acpi.c | 1 + arch/powerpc/configs/ps3_defconfig | 1 - arch/riscv/include/asm/sbi.h | 2 + arch/riscv/include/asm/sparsemem.h | 2 +- arch/riscv/include/asm/thread_info.h | 1 - arch/riscv/kernel/cpu.c | 40 +- arch/riscv/kernel/cpufeature.c | 8 +- arch/riscv/kernel/elf_kexec.c | 6 + arch/riscv/kernel/entry.S | 4 +- arch/s390/include/asm/facility.h | 6 +- arch/s390/include/asm/io.h | 2 + arch/s390/kernel/perf_cpum_sf.c | 12 +- arch/s390/mm/cmm.c | 18 +- arch/x86/kernel/amd_nb.c | 4 + arch/x86/net/bpf_jit_comp.c | 54 +- drivers/ata/ahci.c | 85 + drivers/ata/libata-eh.c | 18 +- drivers/base/bus.c | 8 +- drivers/block/zram/zram_drv.c | 7 + drivers/bus/mhi/ep/internal.h | 1 + drivers/bus/mhi/ep/main.c | 248 +- drivers/bus/mhi/ep/ring.c | 8 +- drivers/char/virtio_console.c | 18 +- drivers/clk/bcm/clk-bcm53573-ilp.c | 2 +- drivers/clk/imx/clk-imx7d.c | 4 +- .../drivers/ni_routing/tools/convert_c_to_py.c | 5 + drivers/dax/device.c | 2 +- drivers/gpio/gpio-aspeed.c | 4 +- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- .../drm/amd/display/modules/hdcp/hdcp1_execution.c | 27 +- drivers/gpu/drm/drm_crtc.c | 1 + drivers/gpu/drm/i915/display/intel_hdcp.c | 10 +- drivers/gpu/drm/nouveau/dispnv04/crtc.c | 2 +- drivers/gpu/drm/nouveau/nouveau_abi16.c | 2 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_chan.c | 21 +- drivers/gpu/drm/nouveau/nouveau_chan.h | 3 +- drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 4 +- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 8 +- drivers/gpu/drm/v3d/v3d_perfmon.c | 9 +- drivers/gpu/drm/vc4/vc4_perfmon.c | 7 +- drivers/hid/Kconfig | 9 + drivers/hid/Makefile | 1 + drivers/hid/amd-sfh-hid/amd_sfh_client.c | 14 +- drivers/hid/hid-asus.c | 14 +- drivers/hid/hid-ids.h | 10 + drivers/hid/hid-mcp2200.c | 392 ++ drivers/hid/hid-multitouch.c | 8 +- drivers/hid/hid-plantronics.c | 23 + drivers/hid/i2c-hid/i2c-hid-core.c | 22 +- drivers/hid/intel-ish-hid/ishtp-fw-loader.c | 2 +- drivers/hwmon/Kconfig | 4 + drivers/hwmon/intel-m10-bmc-hwmon.c | 2 +- drivers/hwmon/k10temp.c | 1 + drivers/i2c/busses/i2c-i801.c | 9 +- drivers/i3c/master/i3c-master-cdns.c | 1 + drivers/infiniband/core/mad.c | 14 +- drivers/infiniband/hw/mlx5/odp.c | 25 +- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 13 +- drivers/input/rmi4/rmi_driver.c | 6 +- drivers/media/common/videobuf2/videobuf2-core.c | 8 +- drivers/mfd/intel_soc_pmic_chtwc.c | 1 - drivers/net/dsa/b53/b53_common.c | 17 +- drivers/net/dsa/lan9303-core.c | 29 + drivers/net/ethernet/adi/adin1110.c | 4 +- drivers/net/ethernet/amd/pds_core/main.c | 6 + drivers/net/ethernet/cortina/gemini.c | 32 +- drivers/net/ethernet/freescale/fec_main.c | 6 +- drivers/net/ethernet/ibm/emac/mal.c | 2 +- drivers/net/ethernet/intel/e1000e/hw.h | 4 +- drivers/net/ethernet/intel/e1000e/ich8lan.c | 55 + drivers/net/ethernet/intel/e1000e/netdev.c | 22 +- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 1 + drivers/net/ethernet/intel/i40e/i40e_diag.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_register.h | 2 +- drivers/net/ethernet/intel/i40e/i40e_type.h | 4 +- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 + drivers/net/ethernet/intel/ice/ice.h | 6 +- drivers/net/ethernet/intel/ice/ice_eswitch.c | 63 +- drivers/net/ethernet/intel/ice/ice_eswitch_br.c | 17 +- drivers/net/ethernet/intel/ice/ice_eswitch_br.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 27 +- drivers/net/ethernet/intel/ice/ice_switch.c | 2 - drivers/net/ethernet/intel/ice/ice_tc_lib.c | 15 +- drivers/net/ethernet/intel/igb/igb_main.c | 4 + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/phy/bcm84881.c | 4 +- drivers/net/phy/dp83869.c | 1 - drivers/net/phy/phy_device.c | 5 +- drivers/net/ppp/ppp_async.c | 2 +- drivers/net/slip/slhc.c | 57 +- drivers/net/vxlan/vxlan_core.c | 6 +- drivers/net/vxlan/vxlan_private.h | 2 +- drivers/net/vxlan/vxlan_vnifilter.c | 19 +- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 1 + drivers/nvdimm/nd_virtio.c | 9 + drivers/pci/endpoint/functions/pci-epf-mhi.c | 8 +- drivers/pci/quirks.c | 8 + drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c | 145 +- drivers/powercap/intel_rapl_tpmi.c | 19 +- drivers/remoteproc/imx_rproc.c | 13 +- drivers/scsi/lpfc/lpfc_ct.c | 12 + drivers/scsi/lpfc/lpfc_disc.h | 7 + drivers/scsi/lpfc/lpfc_els.c | 7 +- drivers/scsi/lpfc/lpfc_vport.c | 43 +- drivers/scsi/sd.c | 9 +- drivers/scsi/wd33c93.c | 2 +- drivers/soundwire/cadence_master.c | 39 +- drivers/soundwire/cadence_master.h | 5 + drivers/soundwire/intel.h | 2 + drivers/soundwire/intel_auxdevice.c | 1 + drivers/soundwire/intel_bus_common.c | 35 +- drivers/spi/spi-fsl-lpspi.c | 14 +- drivers/staging/vme_user/vme_fake.c | 6 + drivers/staging/vme_user/vme_tsi148.c | 6 + .../int340x_thermal/processor_thermal_device_pci.c | 23 +- drivers/tty/serial/serial_core.c | 16 +- drivers/ufs/core/ufshcd.c | 5 +- drivers/usb/chipidea/udc.c | 8 +- drivers/usb/dwc2/platform.c | 26 +- drivers/usb/dwc3/core.c | 22 +- drivers/usb/dwc3/core.h | 4 - drivers/usb/dwc3/gadget.c | 11 - drivers/usb/gadget/udc/core.c | 1 + drivers/usb/host/xhci-pci.c | 5 + drivers/usb/misc/yurex.c | 19 +- drivers/usb/storage/unusual_devs.h | 11 + drivers/usb/typec/tipd/core.c | 2 + drivers/video/fbdev/core/fbcon.c | 2 + drivers/video/fbdev/sis/sis_main.c | 2 +- fs/btrfs/extent-tree.c | 19 +- fs/btrfs/volumes.h | 6 + fs/btrfs/zoned.c | 2 +- fs/ext4/super.c | 9 +- fs/ext4/xattr.c | 4 +- fs/gfs2/quota.c | 59 +- fs/nfs/callback_xdr.c | 2 + fs/nfs/client.c | 1 + fs/nfs/nfs42proc.c | 2 +- fs/nfs/nfs4state.c | 2 +- fs/nfsd/filecache.c | 4 +- fs/ntfs3/file.c | 4 +- fs/ntfs3/frecord.c | 21 +- fs/ntfs3/fslog.c | 19 +- fs/ntfs3/namei.c | 4 +- fs/proc/kcore.c | 36 +- fs/smb/client/smb2ops.c | 47 +- fs/smb/client/smb2pdu.c | 6 + fs/unicode/mkutf8data.c | 70 - fs/unicode/utf8data.c_shipped | 6703 ++++++++++---------- include/linux/bpf.h | 1 + include/linux/intel_tpmi.h | 6 + include/linux/jbd2.h | 2 +- include/linux/mhi_ep.h | 21 +- include/linux/netlink.h | 2 + include/linux/nfs_fs_sb.h | 1 + include/linux/pci.h | 34 +- include/linux/pci_ids.h | 4 + include/linux/tcp.h | 8 + include/net/mctp.h | 2 +- include/net/rtnetlink.h | 18 + include/net/sch_generic.h | 1 - include/net/sock.h | 2 + include/scsi/scsi_device.h | 1 - include/sound/cs35l56.h | 1 + include/sound/tas2781-tlv.h | 2 +- include/uapi/linux/tcp.h | 12 + io_uring/io_uring.c | 15 + kernel/bpf/arraymap.c | 3 + kernel/bpf/core.c | 21 +- kernel/bpf/hashtab.c | 3 + kernel/kthread.c | 2 + kernel/rcu/tree.c | 9 +- kernel/rcu/tree_nocb.h | 28 +- kernel/trace/trace.c | 18 +- kernel/trace/trace_output.c | 6 +- lib/bootconfig.c | 3 +- lib/build_OID_registry | 4 +- mm/secretmem.c | 4 +- net/bluetooth/hci_conn.c | 3 + net/bluetooth/hci_core.c | 27 +- net/bluetooth/rfcomm/sock.c | 2 - net/bridge/br_netfilter_hooks.c | 5 + net/bridge/br_netlink.c | 6 +- net/bridge/br_private.h | 5 +- net/bridge/br_vlan.c | 19 +- net/ceph/messenger_v2.c | 3 + net/core/dst.c | 17 +- net/core/rtnetlink.c | 31 + net/ipv4/netfilter/nf_reject_ipv4.c | 10 +- net/ipv4/netfilter/nft_fib_ipv4.c | 4 +- net/ipv4/tcp.c | 9 + net/ipv4/tcp_input.c | 57 +- net/ipv4/tcp_minisocks.c | 4 + net/ipv4/tcp_timer.c | 17 +- net/ipv6/netfilter/nf_reject_ipv6.c | 5 +- net/ipv6/netfilter/nft_fib_ipv6.c | 5 +- net/mac80211/scan.c | 17 +- net/mctp/af_mctp.c | 6 +- net/mctp/device.c | 32 +- net/mctp/neigh.c | 29 +- net/mctp/route.c | 33 +- net/mpls/af_mpls.c | 87 +- net/mptcp/mib.c | 2 + net/mptcp/mib.h | 2 + net/mptcp/pm_netlink.c | 3 +- net/mptcp/protocol.c | 24 +- net/mptcp/subflow.c | 6 +- net/netfilter/nf_nat_core.c | 120 +- net/netfilter/xt_CHECKSUM.c | 33 +- net/netfilter/xt_CLASSIFY.c | 16 +- net/netfilter/xt_CONNSECMARK.c | 36 +- net/netfilter/xt_CT.c | 148 +- net/netfilter/xt_IDLETIMER.c | 59 +- net/netfilter/xt_LED.c | 39 +- net/netfilter/xt_NFLOG.c | 36 +- net/netfilter/xt_RATEEST.c | 39 +- net/netfilter/xt_SECMARK.c | 27 +- net/netfilter/xt_TRACE.c | 35 +- net/netfilter/xt_addrtype.c | 15 +- net/netfilter/xt_cluster.c | 33 +- net/netfilter/xt_connbytes.c | 4 +- net/netfilter/xt_connlimit.c | 39 +- net/netfilter/xt_connmark.c | 28 +- net/netfilter/xt_mark.c | 42 +- net/netlink/af_netlink.c | 38 +- net/netlink/af_netlink.h | 5 +- net/phonet/pn_netlink.c | 43 +- net/rxrpc/sendmsg.c | 10 +- net/sched/sch_api.c | 7 +- net/sctp/socket.c | 18 +- net/socket.c | 7 +- sound/pci/hda/hda_intel.c | 2 +- sound/pci/hda/patch_realtek.c | 7 +- sound/soc/codecs/cs35l56-shared.c | 36 + sound/soc/codecs/cs35l56.c | 32 +- sound/soc/codecs/cs35l56.h | 1 + tools/iio/iio_generic_buffer.c | 4 + tools/lib/subcmd/parse-options.c | 8 +- tools/perf/builtin-kmem.c | 2 + tools/perf/builtin-kvm.c | 3 + tools/perf/builtin-kwork.c | 3 + tools/perf/builtin-lock.c | 3 + tools/perf/builtin-mem.c | 3 + tools/perf/builtin-sched.c | 164 +- tools/testing/ktest/ktest.pl | 2 +- tools/testing/selftests/Makefile | 7 +- .../testing/selftests/bpf/progs/verifier_int_ptr.c | 5 +- tools/testing/selftests/lib.mk | 19 + tools/testing/selftests/mm/hmm-tests.c | 2 +- .../selftests/net/forwarding/no_forwarding.sh | 2 +- tools/testing/selftests/net/setup_loopback.sh | 0 tools/testing/selftests/rseq/rseq.c | 110 +- tools/testing/selftests/rseq/rseq.h | 10 +- 258 files changed, 6561 insertions(+), 4700 deletions(-)

11 months, 1 week

10
9
0 0

[PATCH v2 2/7] serial: qcom-geni: fix shutdown race

by Johan Hovold

A commit adding back the stopping of tx on port shutdown failed to add back the locking which had also been removed by commit e83766334f96 ("tty: serial: qcom_geni_serial: No need to stop tx/rx on UART shutdown"). Holding the port lock is needed to serialise against the console code, which may update the interrupt enable register and access the port state. Fixes: d8aca2f96813 ("tty: serial: qcom-geni-serial: stop operations in progress at shutdown") Fixes: 947cc4ecc06c ("serial: qcom-geni: fix soft lockup on sw flow control and suspend") Cc: stable(a)vger.kernel.org # 6.3 Cc: Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- drivers/tty/serial/qcom_geni_serial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 9ea6bd09e665..b6a8729cee6d 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1096,10 +1096,12 @@ static void qcom_geni_serial_shutdown(struct uart_port *uport) { disable_irq(uport->irq); + uart_port_lock_irq(uport); qcom_geni_serial_stop_tx(uport); qcom_geni_serial_stop_rx(uport); qcom_geni_serial_cancel_tx_cmd(uport); + uart_port_unlock_irq(uport); } static void qcom_geni_serial_flush_buffer(struct uart_port *uport) -- 2.45.2

11 months, 1 week

4
8
0 0

FAILED: patch "[PATCH] maple_tree: correct tree corruption on spanning store" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x bea07fd63192b61209d48cbb81ef474cc3ee4c62 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101818-ducky-dallying-2814@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From bea07fd63192b61209d48cbb81ef474cc3ee4c62 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Date: Mon, 7 Oct 2024 16:28:32 +0100 Subject: [PATCH] maple_tree: correct tree corruption on spanning store Patch series "maple_tree: correct tree corruption on spanning store", v3. There has been a nasty yet subtle maple tree corruption bug that appears to have been in existence since the inception of the algorithm. This bug seems far more likely to happen since commit f8d112a4e657 ("mm/mmap: avoid zeroing vma tree in mmap_region()"), which is the point at which reports started to be submitted concerning this bug. We were made definitely aware of the bug thanks to the kind efforts of Bert Karwatzki who helped enormously in my being able to track this down and identify the cause of it. The bug arises when an attempt is made to perform a spanning store across two leaf nodes, where the right leaf node is the rightmost child of the shared parent, AND the store completely consumes the right-mode node. This results in mas_wr_spanning_store() mitakenly duplicating the new and existing entries at the maximum pivot within the range, and thus maple tree corruption. The fix patch corrects this by detecting this scenario and disallowing the mistaken duplicate copy. The fix patch commit message goes into great detail as to how this occurs. This series also includes a test which reliably reproduces the issue, and asserts that the fix works correctly. Bert has kindly tested the fix and confirmed it resolved his issues. Also Mikhail Gavrilov kindly reported what appears to be precisely the same bug, which this fix should also resolve. This patch (of 2): There has been a subtle bug present in the maple tree implementation from its inception. This arises from how stores are performed - when a store occurs, it will overwrite overlapping ranges and adjust the tree as necessary to accommodate this. A range may always ultimately span two leaf nodes. In this instance we walk the two leaf nodes, determine which elements are not overwritten to the left and to the right of the start and end of the ranges respectively and then rebalance the tree to contain these entries and the newly inserted one. This kind of store is dubbed a 'spanning store' and is implemented by mas_wr_spanning_store(). In order to reach this stage, mas_store_gfp() invokes mas_wr_preallocate(), mas_wr_store_type() and mas_wr_walk() in turn to walk the tree and update the object (mas) to traverse to the location where the write should be performed, determining its store type. When a spanning store is required, this function returns false stopping at the parent node which contains the target range, and mas_wr_store_type() marks the mas->store_type as wr_spanning_store to denote this fact. When we go to perform the store in mas_wr_spanning_store(), we first determine the elements AFTER the END of the range we wish to store (that is, to the right of the entry to be inserted) - we do this by walking to the NEXT pivot in the tree (i.e. r_mas.last + 1), starting at the node we have just determined contains the range over which we intend to write. We then turn our attention to the entries to the left of the entry we are inserting, whose state is represented by l_mas, and copy these into a 'big node', which is a special node which contains enough slots to contain two leaf node's worth of data. We then copy the entry we wish to store immediately after this - the copy and the insertion of the new entry is performed by mas_store_b_node(). After this we copy the elements to the right of the end of the range which we are inserting, if we have not exceeded the length of the node (i.e. r_mas.offset <= r_mas.end). Herein lies the bug - under very specific circumstances, this logic can break and corrupt the maple tree. Consider the following tree: Height 0 Root Node / \ pivot = 0xffff / \ pivot = ULONG_MAX / \ 1 A [-----] ... / \ pivot = 0x4fff / \ pivot = 0xffff / \ 2 (LEAVES) B [-----] [-----] C ^--- Last pivot 0xffff. Now imagine we wish to store an entry in the range [0x4000, 0xffff] (note that all ranges expressed in maple tree code are inclusive): 1. mas_store_gfp() descends the tree, finds node A at <=0xffff, then determines that this is a spanning store across nodes B and C. The mas state is set such that the current node from which we traverse further is node A. 2. In mas_wr_spanning_store() we try to find elements to the right of pivot 0xffff by searching for an index of 0x10000: - mas_wr_walk_index() invokes mas_wr_walk_descend() and mas_wr_node_walk() in turn. - mas_wr_node_walk() loops over entries in node A until EITHER it finds an entry whose pivot equals or exceeds 0x10000 OR it reaches the final entry. - Since no entry has a pivot equal to or exceeding 0x10000, pivot 0xffff is selected, leading to node C. - mas_wr_walk_traverse() resets the mas state to traverse node C. We loop around and invoke mas_wr_walk_descend() and mas_wr_node_walk() in turn once again. - Again, we reach the last entry in node C, which has a pivot of 0xffff. 3. We then copy the elements to the left of 0x4000 in node B to the big node via mas_store_b_node(), and insert the new [0x4000, 0xffff] entry too. 4. We determine whether we have any entries to copy from the right of the end of the range via - and with r_mas set up at the entry at pivot 0xffff, r_mas.offset <= r_mas.end, and then we DUPLICATE the entry at pivot 0xffff. 5. BUG! The maple tree is corrupted with a duplicate entry. This requires a very specific set of circumstances - we must be spanning the last element in a leaf node, which is the last element in the parent node. spanning store across two leaf nodes with a range that ends at that shared pivot. A potential solution to this problem would simply be to reset the walk each time we traverse r_mas, however given the rarity of this situation it seems that would be rather inefficient. Instead, this patch detects if the right hand node is populated, i.e. has anything we need to copy. We do so by only copying elements from the right of the entry being inserted when the maximum value present exceeds the last, rather than basing this on offset position. The patch also updates some comments and eliminates the unused bool return value in mas_wr_walk_index(). The work performed in commit f8d112a4e657 ("mm/mmap: avoid zeroing vma tree in mmap_region()") seems to have made the probability of this event much more likely, which is the point at which reports started to be submitted concerning this bug. The motivation for this change arose from Bert Karwatzki's report of encountering mm instability after the release of kernel v6.12-rc1 which, after the use of CONFIG_DEBUG_VM_MAPLE_TREE and similar configuration options, was identified as maple tree corruption. After Bert very generously provided his time and ability to reproduce this event consistently, I was able to finally identify that the issue discussed in this commit message was occurring for him. Link: https://lkml.kernel.org/r/cover.1728314402.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/48b349a2a0f7c76e18772712d0997a5e12ab0a3b.17283144… Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Reported-by: Bert Karwatzki <spasswolf(a)web.de> Closes: https://lore.kernel.org/all/20241001023402.3374-1-spasswolf@web.de/ Tested-by: Bert Karwatzki <spasswolf(a)web.de> Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com> Closes: https://lore.kernel.org/all/CABXGCsOPwuoNOqSMmAvWO2Fz4TEmPnjFj-b7iF+XFRu1h7… Acked-by: Vlastimil Babka <vbabka(a)suse.cz> Reviewed-by: Liam R. Howlett <Liam.Howlett(a)Oracle.com> Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com> Reviewed-by: Wei Yang <richard.weiyang(a)gmail.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Sidhartha Kumar <sidhartha.kumar(a)oracle.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/lib/maple_tree.c b/lib/maple_tree.c index ce7c7a7a8258..3619301dda2e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -2196,6 +2196,8 @@ static inline void mas_node_or_none(struct ma_state *mas, /* * mas_wr_node_walk() - Find the correct offset for the index in the @mas. + * If @mas->index cannot be found within the containing + * node, we traverse to the last entry in the node. * @wr_mas: The maple write state * * Uses mas_slot_locked() and does not need to worry about dead nodes. @@ -3532,7 +3534,7 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas) return true; } -static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) +static void mas_wr_walk_index(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; @@ -3541,11 +3543,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) wr_mas->content = mas_slot_locked(mas, wr_mas->slots, mas->offset); if (ma_is_leaf(wr_mas->type)) - return true; + return; mas_wr_walk_traverse(wr_mas); - } - return true; } /* * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. @@ -3765,8 +3765,8 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) memset(&b_node, 0, sizeof(struct maple_big_node)); /* Copy l_mas and store the value in b_node. */ mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); - /* Copy r_mas into b_node. */ - if (r_mas.offset <= r_mas.end) + /* Copy r_mas into b_node if there is anything to copy. */ + if (r_mas.max > r_mas.last) mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, &b_node, b_node.b_end + 1); else

11 months, 1 week

3
2
0 0

[PATCH 5.10 0/1] soundwire: cadence: Fix error check in cdns_xfer_msg()

by Daniil Dulov

Svacer reports redundant comparison in cdns_xfer_msg(). The problem is present in 5.10 stable release and can be fixed by the following upstream patch that can be cleanly applied to 5.10 stable branch.

11 months, 1 week

1
1
0 0

[PATCH 5.15.y 5.10.y 5.4.y] wifi: mac80211: fix potential key use-after-free

by Sherry Yang

From: Johannes Berg <johannes.berg(a)intel.com> [ Upstream commit 31db78a4923ef5e2008f2eed321811ca79e7f71b ] When ieee80211_key_link() is called by ieee80211_gtk_rekey_add() but returns 0 due to KRACK protection (identical key reinstall), ieee80211_gtk_rekey_add() will still return a pointer into the key, in a potential use-after-free. This normally doesn't happen since it's only called by iwlwifi in case of WoWLAN rekey offload which has its own KRACK protection, but still better to fix, do that by returning an error code and converting that to success on the cfg80211 boundary only, leaving the error for bad callers of ieee80211_gtk_rekey_add(). Reported-by: Dan Carpenter <dan.carpenter(a)linaro.org> Fixes: fdf7cb4185b6 ("mac80211: accept key reinstall without changing anything") Signed-off-by: Johannes Berg <johannes.berg(a)intel.com> Signed-off-by: Sasha Levin <sashal(a)kernel.org> [Sherry: bp to fix CVE-2023-52530, resolved minor conflicts in net/mac80211/cfg.c because of context change due to missing commit 23a5f0af6ff4 ("wifi: mac80211: remove cipher scheme support") ccdde7c74ffd ("wifi: mac80211: properly implement MLO key handling")] Signed-off-by: Sherry Yang <sherry.yang(a)oracle.com> --- net/mac80211/cfg.c | 3 +++ net/mac80211/key.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f652982a106b..c54b3be62c0a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -511,6 +511,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, sta->cipher_scheme = cs; err = ieee80211_key_link(key, sdata, sta); + /* KRACK protection, shouldn't happen but just silently accept key */ + if (err == -EALREADY) + err = 0; out_unlock: mutex_unlock(&local->sta_mtx); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index f695fc80088b..7b427e39831b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -843,7 +843,7 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (ieee80211_key_identical(sdata, old_key, key)) { ieee80211_key_free_unused(key); - ret = 0; + ret = -EALREADY; goto out; } -- 2.46.0

11 months, 1 week

2
1
0 0

[PATCH 6.1.y] nvme: fix metadata handling in nvme-passthrough

by Puranjay Mohan

[ Upstream commit 7c2fd76048e95dd267055b5f5e0a48e6e7c81fd9 ] On an NVMe namespace that does not support metadata, it is possible to send an IO command with metadata through io-passthru. This allows issues like [1] to trigger in the completion code path. nvme_map_user_request() doesn't check if the namespace supports metadata before sending it forward. It also allows admin commands with metadata to be processed as it ignores metadata when bdev == NULL and may report success. Reject an IO command with metadata when the NVMe namespace doesn't support it and reject an admin command if it has metadata. [1] https://lore.kernel.org/all/mb61pcylvnym8.fsf@amazon.com/ Suggested-by: Christoph Hellwig <hch(a)lst.de> Reviewed-by: Christoph Hellwig <hch(a)lst.de> Reviewed-by: Sagi Grimberg <sagi(a)grimberg.me> Reviewed-by: Anuj Gupta <anuj20.g(a)samsung.com> Signed-off-by: Keith Busch <kbusch(a)kernel.org> [ Minor changes to make it work on 6.1 ] Signed-off-by: Puranjay Mohan <pjy(a)amazon.com> --- drivers/nvme/host/ioctl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index b3e322e4ade38..a02873792890e 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -3,6 +3,7 @@ * Copyright (c) 2011-2014, Intel Corporation. * Copyright (c) 2017-2021 Christoph Hellwig. */ +#include <linux/blk-integrity.h> #include <linux/ptrace.h> /* for force_successful_syscall_return */ #include <linux/nvme_ioctl.h> #include <linux/io_uring.h> @@ -95,10 +96,15 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; struct block_device *bdev = ns ? ns->disk->part0 : NULL; + bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); + bool has_metadata = meta_buffer && meta_len; struct bio *bio = NULL; void *meta = NULL; int ret; + if (has_metadata && !supports_metadata) + return -EINVAL; + if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { struct iov_iter iter; @@ -122,7 +128,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (bdev) bio_set_dev(bio, bdev); - if (bdev && meta_buffer && meta_len) { + if (has_metadata) { meta = nvme_add_user_metadata(req, meta_buffer, meta_len, meta_seed); if (IS_ERR(meta)) { -- 2.40.1

11 months, 1 week

2
1
0 0

[PATCH stable 6.1 0/2] devlink: Fix RCU stall when unregistering a devlink instance

by Ido Schimmel

Upstream commit c2368b19807a ("net: devlink: introduce "unregistering" mark and use it during devlinks iteration") in v6.0 introduced a race when unregistering a devlink instance that can result in RCU stalls and in the system completely locking up. Exact details and reproducer can be found here [1]. The bug was inadvertently fixed in v6.3 by upstream commit d77278196441 ("devlink: bump the instance index directly when iterating"). This patchset fixes the bug by backporting the second commit and a related dependency from v6.3 to v6.1.y while adjusting them to the devlink file structure in v6.1.y (net/devlink/{core.c,devl_internal.h} -> net/devlink/leftover.c). Tested by running the devlink tests under tools/testing/selftests/drivers/net/netdevsim/ and the reproducer mentioned in [1]. [1] https://lore.kernel.org/stable/20241001112035.973187-1-idosch@nvidia.com/ Jakub Kicinski (2): devlink: drop the filter argument from devlinks_xa_find_get devlink: bump the instance index directly when iterating net/devlink/leftover.c | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) -- 2.47.0

11 months, 1 week

2
3
0 0

FAILED: patch "[PATCH] secretmem: disable memfd_secret() if arch cannot set direct" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101412-prowling-snowflake-9fe0@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: 532b53cebe58 ("secretmem: disable memfd_secret() if arch cannot set direct map") f7c5b1aab5ef ("mm/secretmem: remove reduntant return value") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 532b53cebe58f34ce1c0f34d866f5c0e335c53c6 Mon Sep 17 00:00:00 2001 From: Patrick Roy <roypat(a)amazon.co.uk> Date: Tue, 1 Oct 2024 09:00:41 +0100 Subject: [PATCH] secretmem: disable memfd_secret() if arch cannot set direct map Return -ENOSYS from memfd_secret() syscall if !can_set_direct_map(). This is the case for example on some arm64 configurations, where marking 4k PTEs in the direct map not present can only be done if the direct map is set up at 4k granularity in the first place (as ARM's break-before-make semantics do not easily allow breaking apart large/gigantic pages). More precisely, on arm64 systems with !can_set_direct_map(), set_direct_map_invalid_noflush() is a no-op, however it returns success (0) instead of an error. This means that memfd_secret will seemingly "work" (e.g. syscall succeeds, you can mmap the fd and fault in pages), but it does not actually achieve its goal of removing its memory from the direct map. Note that with this patch, memfd_secret() will start erroring on systems where can_set_direct_map() returns false (arm64 with CONFIG_RODATA_FULL_DEFAULT_ENABLED=n, CONFIG_DEBUG_PAGEALLOC=n and CONFIG_KFENCE=n), but that still seems better than the current silent failure. Since CONFIG_RODATA_FULL_DEFAULT_ENABLED defaults to 'y', most arm64 systems actually have a working memfd_secret() and aren't be affected. From going through the iterations of the original memfd_secret patch series, it seems that disabling the syscall in these scenarios was the intended behavior [1] (preferred over having set_direct_map_invalid_noflush return an error as that would result in SIGBUSes at page-fault time), however the check for it got dropped between v16 [2] and v17 [3], when secretmem moved away from CMA allocations. [1]: https://lore.kernel.org/lkml/20201124164930.GK8537@kernel.org/ [2]: https://lore.kernel.org/lkml/20210121122723.3446-11-rppt@kernel.org/#t [3]: https://lore.kernel.org/lkml/20201125092208.12544-10-rppt@kernel.org/ Link: https://lkml.kernel.org/r/20241001080056.784735-1-roypat@amazon.co.uk Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: Patrick Roy <roypat(a)amazon.co.uk> Reviewed-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org> Cc: Alexander Graf <graf(a)amazon.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: James Gowans <jgowans(a)amazon.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/secretmem.c b/mm/secretmem.c index 3afb5ad701e1..399552814fd0 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -238,7 +238,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) /* make sure local flags do not confict with global fcntl.h */ BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); - if (!secretmem_enable) + if (!secretmem_enable || !can_set_direct_map()) return -ENOSYS; if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) @@ -280,7 +280,7 @@ static struct file_system_type secretmem_fs = { static int __init secretmem_init(void) { - if (!secretmem_enable) + if (!secretmem_enable || !can_set_direct_map()) return 0; secretmem_mnt = kern_mount(&secretmem_fs);

11 months, 1 week

3
4
0 0

[PATCH 6.6 00/21] xfs backports for 6.6.y (from 6.10)

by Catherine Hoang

Hello, This series contains backports for 6.6 from the 6.10 release. This patchset has gone through xfs testing and review. Christoph Hellwig (4): xfs: fix error returns from xfs_bmapi_write xfs: fix xfs_bmap_add_extent_delay_real for partial conversions xfs: remove a racy if_bytes check in xfs_reflink_end_cow_extent xfs: fix freeing speculative preallocations for preallocated files Darrick J. Wong (11): xfs: require XFS_SB_FEAT_INCOMPAT_LOG_XATTRS for attr log intent item recovery xfs: check opcode and iovec count match in xlog_recover_attri_commit_pass2 xfs: fix missing check for invalid attr flags xfs: check shortform attr entry flags specifically xfs: validate recovered name buffers when recovering xattr items xfs: enforce one namespace per attribute xfs: revert commit 44af6c7e59b12 xfs: use dontcache for grabbing inodes during scrub xfs: allow symlinks with short remote targets xfs: restrict when we try to align cow fork delalloc to cowextsz hints xfs: allow unlinked symlinks and dirs with zero size Dave Chinner (1): xfs: fix unlink vs cluster buffer instantiation race Wengang Wang (1): xfs: make sure sb_fdblocks is non-negative Zhang Yi (4): xfs: match lock mode in xfs_buffered_write_iomap_begin() xfs: make the seq argument to xfs_bmapi_convert_delalloc() optional xfs: make xfs_bmapi_convert_delalloc() to allocate the target offset xfs: convert delayed extents to unwritten when zeroing post eof blocks fs/xfs/libxfs/xfs_attr.c | 11 +++ fs/xfs/libxfs/xfs_attr.h | 4 +- fs/xfs/libxfs/xfs_attr_leaf.c | 6 +- fs/xfs/libxfs/xfs_attr_remote.c | 1 - fs/xfs/libxfs/xfs_bmap.c | 130 ++++++++++++++++++++++++++------ fs/xfs/libxfs/xfs_da_btree.c | 20 ++--- fs/xfs/libxfs/xfs_da_format.h | 5 ++ fs/xfs/libxfs/xfs_inode_buf.c | 47 ++++++++++-- fs/xfs/libxfs/xfs_sb.c | 7 +- fs/xfs/scrub/attr.c | 47 +++++++----- fs/xfs/scrub/common.c | 12 +-- fs/xfs/scrub/scrub.h | 7 ++ fs/xfs/xfs_aops.c | 54 ++++--------- fs/xfs/xfs_attr_item.c | 98 ++++++++++++++++++++---- fs/xfs/xfs_attr_list.c | 11 ++- fs/xfs/xfs_bmap_util.c | 61 +++++++++------ fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_dquot.c | 1 - fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_inode.c | 37 +++++---- fs/xfs/xfs_iomap.c | 81 +++++++++++--------- fs/xfs/xfs_reflink.c | 20 ----- fs/xfs/xfs_rtalloc.c | 2 - 23 files changed, 433 insertions(+), 233 deletions(-) -- 2.39.3

11 months, 1 week

2
22
0 0

FAILED: patch "[PATCH] mm: don't install PMD mappings when THPs are disabled by the" failed to apply to 6.11-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.11-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.11.y git checkout FETCH_HEAD git cherry-pick -x 2b0f922323ccfa76219bcaacd35cd50aeaa1359 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101841-keep-coma-4963@gregkh' --subject-prefix 'PATCH 6.11.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2b0f922323ccfa76219bcaacd35cd50aeaa13592 Mon Sep 17 00:00:00 2001 From: David Hildenbrand <david(a)redhat.com> Date: Fri, 11 Oct 2024 12:24:45 +0200 Subject: [PATCH] mm: don't install PMD mappings when THPs are disabled by the hw/process/vma We (or rather, readahead logic :) ) might be allocating a THP in the pagecache and then try mapping it into a process that explicitly disabled THP: we might end up installing PMD mappings. This is a problem for s390x KVM, which explicitly remaps all PMD-mapped THPs to be PTE-mapped in s390_enable_sie()->thp_split_mm(), before starting the VM. For example, starting a VM backed on a file system with large folios supported makes the VM crash when the VM tries accessing such a mapping using KVM. Is it also a problem when the HW disabled THP using TRANSPARENT_HUGEPAGE_UNSUPPORTED? At least on x86 this would be the case without X86_FEATURE_PSE. In the future, we might be able to do better on s390x and only disallow PMD mappings -- what s390x and likely TRANSPARENT_HUGEPAGE_UNSUPPORTED really wants. For now, fix it by essentially performing the same check as would be done in __thp_vma_allowable_orders() or in shmem code, where this works as expected, and disallow PMD mappings, making us fallback to PTE mappings. Link: https://lkml.kernel.org/r/20241011102445.934409-3-david@redhat.com Fixes: 793917d997df ("mm/readahead: Add large folio readahead") Signed-off-by: David Hildenbrand <david(a)redhat.com> Reported-by: Leo Fu <bfu(a)redhat.com> Tested-by: Thomas Huth <thuth(a)redhat.com> Cc: Thomas Huth <thuth(a)redhat.com> Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org> Cc: Ryan Roberts <ryan.roberts(a)arm.com> Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com> Cc: Janosch Frank <frankja(a)linux.ibm.com> Cc: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/memory.c b/mm/memory.c index c0869a962ddd..30feedabc932 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4920,6 +4920,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) pmd_t entry; vm_fault_t ret = VM_FAULT_FALLBACK; + /* + * It is too late to allocate a small folio, we already have a large + * folio in the pagecache: especially s390 KVM cannot tolerate any + * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any + * PMD mappings if THPs are disabled. + */ + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) + return ret; + if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER)) return ret;

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] mm: huge_memory: add vma_thp_disabled() and" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 963756aac1f011d904ddd9548ae82286d3a91f96 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101848-lucid-mountain-2cdf@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 963756aac1f011d904ddd9548ae82286d3a91f96 Mon Sep 17 00:00:00 2001 From: Kefeng Wang <wangkefeng.wang(a)huawei.com> Date: Fri, 11 Oct 2024 12:24:44 +0200 Subject: [PATCH] mm: huge_memory: add vma_thp_disabled() and thp_disabled_by_hw() Patch series "mm: don't install PMD mappings when THPs are disabled by the hw/process/vma". During testing, it was found that we can get PMD mappings in processes where THP (and more precisely, PMD mappings) are supposed to be disabled. While it works as expected for anon+shmem, the pagecache is the problematic bit. For s390 KVM this currently means that a VM backed by a file located on filesystem with large folio support can crash when KVM tries accessing the problematic page, because the readahead logic might decide to use a PMD-sized THP and faulting it into the page tables will install a PMD mapping, something that s390 KVM cannot tolerate. This might also be a problem with HW that does not support PMD mappings, but I did not try reproducing it. Fix it by respecting the ways to disable THPs when deciding whether we can install a PMD mapping. khugepaged should already be taking care of not collapsing if THPs are effectively disabled for the hw/process/vma. This patch (of 2): Add vma_thp_disabled() and thp_disabled_by_hw() helpers to be shared by shmem_allowable_huge_orders() and __thp_vma_allowable_orders(). [david(a)redhat.com: rename to vma_thp_disabled(), split out thp_disabled_by_hw() ] Link: https://lkml.kernel.org/r/20241011102445.934409-2-david@redhat.com Fixes: 793917d997df ("mm/readahead: Add large folio readahead") Signed-off-by: Kefeng Wang <wangkefeng.wang(a)huawei.com> Signed-off-by: David Hildenbrand <david(a)redhat.com> Reported-by: Leo Fu <bfu(a)redhat.com> Tested-by: Thomas Huth <thuth(a)redhat.com> Reviewed-by: Ryan Roberts <ryan.roberts(a)arm.com> Cc: Boqiao Fu <bfu(a)redhat.com> Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com> Cc: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Janosch Frank <frankja(a)linux.ibm.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 67d0ab3c3bba..ef5b80e48599 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -322,6 +322,24 @@ struct thpsize { (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) +static inline bool vma_thp_disabled(struct vm_area_struct *vma, + unsigned long vm_flags) +{ + /* + * Explicitly disabled through madvise or prctl, or some + * architectures may disable THP for some mappings, for + * example, s390 kvm. + */ + return (vm_flags & VM_NOHUGEPAGE) || + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags); +} + +static inline bool thp_disabled_by_hw(void) +{ + /* If the hardware/firmware marked hugepage support disabled. */ + return transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED); +} + unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 87b49ecc7b1e..2fb328880b50 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -109,18 +109,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, if (!vma->vm_mm) /* vdso */ return 0; - /* - * Explicitly disabled through madvise or prctl, or some - * architectures may disable THP for some mappings, for - * example, s390 kvm. - * */ - if ((vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) - return 0; - /* - * If the hardware/firmware marked hugepage support disabled. - */ - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags)) return 0; /* khugepaged doesn't collapse DAX vma, but page fault is fine. */ diff --git a/mm/shmem.c b/mm/shmem.c index 4f11b5506363..c5adb987b23c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1664,12 +1664,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, loff_t i_size; int order; - if (vma && ((vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) - return 0; - - /* If the hardware/firmware marked hugepage support disabled. */ - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) + if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) return 0; global_huge = shmem_huge_global_enabled(inode, index, write_end,

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] mm: huge_memory: add vma_thp_disabled() and" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 963756aac1f011d904ddd9548ae82286d3a91f96 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101842-flatness-osmosis-b08e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 963756aac1f011d904ddd9548ae82286d3a91f96 Mon Sep 17 00:00:00 2001 From: Kefeng Wang <wangkefeng.wang(a)huawei.com> Date: Fri, 11 Oct 2024 12:24:44 +0200 Subject: [PATCH] mm: huge_memory: add vma_thp_disabled() and thp_disabled_by_hw() Patch series "mm: don't install PMD mappings when THPs are disabled by the hw/process/vma". During testing, it was found that we can get PMD mappings in processes where THP (and more precisely, PMD mappings) are supposed to be disabled. While it works as expected for anon+shmem, the pagecache is the problematic bit. For s390 KVM this currently means that a VM backed by a file located on filesystem with large folio support can crash when KVM tries accessing the problematic page, because the readahead logic might decide to use a PMD-sized THP and faulting it into the page tables will install a PMD mapping, something that s390 KVM cannot tolerate. This might also be a problem with HW that does not support PMD mappings, but I did not try reproducing it. Fix it by respecting the ways to disable THPs when deciding whether we can install a PMD mapping. khugepaged should already be taking care of not collapsing if THPs are effectively disabled for the hw/process/vma. This patch (of 2): Add vma_thp_disabled() and thp_disabled_by_hw() helpers to be shared by shmem_allowable_huge_orders() and __thp_vma_allowable_orders(). [david(a)redhat.com: rename to vma_thp_disabled(), split out thp_disabled_by_hw() ] Link: https://lkml.kernel.org/r/20241011102445.934409-2-david@redhat.com Fixes: 793917d997df ("mm/readahead: Add large folio readahead") Signed-off-by: Kefeng Wang <wangkefeng.wang(a)huawei.com> Signed-off-by: David Hildenbrand <david(a)redhat.com> Reported-by: Leo Fu <bfu(a)redhat.com> Tested-by: Thomas Huth <thuth(a)redhat.com> Reviewed-by: Ryan Roberts <ryan.roberts(a)arm.com> Cc: Boqiao Fu <bfu(a)redhat.com> Cc: Christian Borntraeger <borntraeger(a)linux.ibm.com> Cc: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Janosch Frank <frankja(a)linux.ibm.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 67d0ab3c3bba..ef5b80e48599 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -322,6 +322,24 @@ struct thpsize { (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) +static inline bool vma_thp_disabled(struct vm_area_struct *vma, + unsigned long vm_flags) +{ + /* + * Explicitly disabled through madvise or prctl, or some + * architectures may disable THP for some mappings, for + * example, s390 kvm. + */ + return (vm_flags & VM_NOHUGEPAGE) || + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags); +} + +static inline bool thp_disabled_by_hw(void) +{ + /* If the hardware/firmware marked hugepage support disabled. */ + return transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED); +} + unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 87b49ecc7b1e..2fb328880b50 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -109,18 +109,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, if (!vma->vm_mm) /* vdso */ return 0; - /* - * Explicitly disabled through madvise or prctl, or some - * architectures may disable THP for some mappings, for - * example, s390 kvm. - * */ - if ((vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) - return 0; - /* - * If the hardware/firmware marked hugepage support disabled. - */ - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags)) return 0; /* khugepaged doesn't collapse DAX vma, but page fault is fine. */ diff --git a/mm/shmem.c b/mm/shmem.c index 4f11b5506363..c5adb987b23c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1664,12 +1664,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, loff_t i_size; int order; - if (vma && ((vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) - return 0; - - /* If the hardware/firmware marked hugepage support disabled. */ - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) + if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) return 0; global_huge = shmem_huge_global_enabled(inode, index, write_end,

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] mm/swapfile: skip HugeTLB pages for unuse_vma" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 7528c4fb1237512ee18049f852f014eba80bbe8d # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101858-rewire-vocation-c981@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7528c4fb1237512ee18049f852f014eba80bbe8d Mon Sep 17 00:00:00 2001 From: Liu Shixin <liushixin2(a)huawei.com> Date: Tue, 15 Oct 2024 09:45:21 +0800 Subject: [PATCH] mm/swapfile: skip HugeTLB pages for unuse_vma I got a bad pud error and lost a 1GB HugeTLB when calling swapoff. The problem can be reproduced by the following steps: 1. Allocate an anonymous 1GB HugeTLB and some other anonymous memory. 2. Swapout the above anonymous memory. 3. run swapoff and we will get a bad pud error in kernel message: mm/pgtable-generic.c:42: bad pud 00000000743d215d(84000001400000e7) We can tell that pud_clear_bad is called by pud_none_or_clear_bad in unuse_pud_range() by ftrace. And therefore the HugeTLB pages will never be freed because we lost it from page table. We can skip HugeTLB pages for unuse_vma to fix it. Link: https://lkml.kernel.org/r/20241015014521.570237-1-liushixin2@huawei.com Fixes: 0fe6e20b9c4c ("hugetlb, rmap: add reverse mapping for hugepage") Signed-off-by: Liu Shixin <liushixin2(a)huawei.com> Acked-by: Muchun Song <muchun.song(a)linux.dev> Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/swapfile.c b/mm/swapfile.c index eb782fcd5627..b0915f3fab31 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2313,7 +2313,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type) mmap_read_lock(mm); for_each_vma(vmi, vma) { - if (vma->anon_vma) { + if (vma->anon_vma && !is_vm_hugetlb_page(vma)) { ret = unuse_vma(vma, type); if (ret) break;

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] mm: khugepaged: fix the arguments order in" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 37f0b47c5143c2957909ced44fc09ffb118c99f7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101803-cage-smokiness-cb8b@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 37f0b47c5143c2957909ced44fc09ffb118c99f7 Mon Sep 17 00:00:00 2001 From: Yang Shi <yang(a)os.amperecomputing.com> Date: Fri, 11 Oct 2024 18:17:02 -0700 Subject: [PATCH] mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point The "addr" and "is_shmem" arguments have different order in TP_PROTO and TP_ARGS. This resulted in the incorrect trace result: text-hugepage-644429 [276] 392092.878683: mm_khugepaged_collapse_file: mm=0xffff20025d52c440, hpage_pfn=0x200678c00, index=512, addr=1, is_shmem=0, filename=text-hugepage, nr=512, result=failed The value of "addr" is wrong because it was treated as bool value, the type of is_shmem. Fix the order in TP_PROTO to keep "addr" is before "is_shmem" since the original patch review suggested this order to achieve best packing. And use "lx" for "addr" instead of "ld" in TP_printk because address is typically shown in hex. After the fix, the trace result looks correct: text-hugepage-7291 [004] 128.627251: mm_khugepaged_collapse_file: mm=0xffff0001328f9500, hpage_pfn=0x20016ea00, index=512, addr=0x400000, is_shmem=0, filename=text-hugepage, nr=512, result=failed Link: https://lkml.kernel.org/r/20241012011702.1084846-1-yang@os.amperecomputing.… Fixes: 4c9473e87e75 ("mm/khugepaged: add tracepoint to collapse_file()") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Gautam Menghani <gautammenghani201(a)gmail.com> Cc: Steven Rostedt (Google) <rostedt(a)goodmis.org> Cc: <stable(a)vger.kernel.org> [6.2+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index b5f5369b6300..9d5c00b0285c 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -208,7 +208,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TRACE_EVENT(mm_khugepaged_collapse_file, TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, - bool is_shmem, unsigned long addr, struct file *file, + unsigned long addr, bool is_shmem, struct file *file, int nr, int result), TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), TP_STRUCT__entry( @@ -233,7 +233,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, __entry->result = result; ), - TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s", + TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%lx, is_shmem=%d, filename=%s, nr=%d, result=%s", __entry->mm, __entry->hpfn, __entry->index, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f9c39898eaff..a420eff92011 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2227,7 +2227,7 @@ rollback: folio_put(new_folio); out: VM_BUG_ON(!list_empty(&pagelist)); - trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result); + trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem, file, HPAGE_PMD_NR, result); return result; }

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] nilfs2: propagate directory read errors from" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 08cfa12adf888db98879dbd735bc741360a34168 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101855-landowner-harness-1529@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 08cfa12adf888db98879dbd735bc741360a34168 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Date: Fri, 4 Oct 2024 12:35:31 +0900 Subject: [PATCH] nilfs2: propagate directory read errors from nilfs_find_entry() Syzbot reported that a task hang occurs in vcs_open() during a fuzzing test for nilfs2. The root cause of this problem is that in nilfs_find_entry(), which searches for directory entries, ignores errors when loading a directory page/folio via nilfs_get_folio() fails. If the filesystem images is corrupted, and the i_size of the directory inode is large, and the directory page/folio is successfully read but fails the sanity check, for example when it is zero-filled, nilfs_check_folio() may continue to spit out error messages in bursts. Fix this issue by propagating the error to the callers when loading a page/folio fails in nilfs_find_entry(). The current interface of nilfs_find_entry() and its callers is outdated and cannot propagate error codes such as -EIO and -ENOMEM returned via nilfs_find_entry(), so fix it together. Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Reported-by: Lizhi Xu <lizhi.xu(a)windriver.com> Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com Reported-by: syzbot+8a192e8d090fa9a31135(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135 Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index fe5b1a30c509..a8602729586a 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -289,7 +289,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) * The folio is mapped and unlocked. When the caller is finished with * the entry, it should call folio_release_kmap(). * - * On failure, returns NULL and the caller should ignore foliop. + * On failure, returns an error pointer and the caller should ignore foliop. */ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct folio **foliop) @@ -312,22 +312,24 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, do { char *kaddr = nilfs_get_folio(dir, n, foliop); - if (!IS_ERR(kaddr)) { - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, - "zero-length directory entry"); - folio_release_kmap(*foliop, kaddr); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - folio_release_kmap(*foliop, kaddr); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; /* next folio is past the blocks we've got */ @@ -340,7 +342,7 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: ei->i_dir_start_lookup = n; @@ -384,18 +386,18 @@ fail: return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; struct folio *folio; de = nilfs_find_entry(dir, qstr, &folio); - if (de) { - res = le64_to_cpu(de->inode); - folio_release_kmap(folio, de); - } - return res; + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index c950139db6ef..4905063790c5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + } + return d_splice_alias(inode, dentry); } @@ -263,10 +271,11 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) struct folio *folio; int err; - err = -ENOENT; de = nilfs_find_entry(dir, &dentry->d_name, &folio); - if (!de) + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } inode = d_inode(dentry); err = -EIO; @@ -362,10 +371,11 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (unlikely(err)) return err; - err = -ENOENT; old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); - if (!old_de) + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } if (S_ISDIR(old_inode->i_mode)) { err = -EIO; @@ -382,10 +392,12 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (dir_de && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); goto out_dir; + } nilfs_set_link(new_dir, new_de, new_folio, old_inode); folio_release_kmap(new_folio, new_de); nilfs_mark_inode_dirty(new_dir); @@ -440,12 +452,13 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); root = NILFS_I(d_inode(child))->i_root; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index fb1c4c5bae7c..45d03826eaf1 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -254,7 +254,7 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) /* dir.c */ int nilfs_add_link(struct dentry *, struct inode *); -ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); int nilfs_make_empty(struct inode *, struct inode *); struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, struct folio **);

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] nilfs2: propagate directory read errors from" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 08cfa12adf888db98879dbd735bc741360a34168 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101854-exterior-cufflink-ccf4@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 08cfa12adf888db98879dbd735bc741360a34168 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Date: Fri, 4 Oct 2024 12:35:31 +0900 Subject: [PATCH] nilfs2: propagate directory read errors from nilfs_find_entry() Syzbot reported that a task hang occurs in vcs_open() during a fuzzing test for nilfs2. The root cause of this problem is that in nilfs_find_entry(), which searches for directory entries, ignores errors when loading a directory page/folio via nilfs_get_folio() fails. If the filesystem images is corrupted, and the i_size of the directory inode is large, and the directory page/folio is successfully read but fails the sanity check, for example when it is zero-filled, nilfs_check_folio() may continue to spit out error messages in bursts. Fix this issue by propagating the error to the callers when loading a page/folio fails in nilfs_find_entry(). The current interface of nilfs_find_entry() and its callers is outdated and cannot propagate error codes such as -EIO and -ENOMEM returned via nilfs_find_entry(), so fix it together. Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Reported-by: Lizhi Xu <lizhi.xu(a)windriver.com> Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com Reported-by: syzbot+8a192e8d090fa9a31135(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135 Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index fe5b1a30c509..a8602729586a 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -289,7 +289,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) * The folio is mapped and unlocked. When the caller is finished with * the entry, it should call folio_release_kmap(). * - * On failure, returns NULL and the caller should ignore foliop. + * On failure, returns an error pointer and the caller should ignore foliop. */ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct folio **foliop) @@ -312,22 +312,24 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, do { char *kaddr = nilfs_get_folio(dir, n, foliop); - if (!IS_ERR(kaddr)) { - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, - "zero-length directory entry"); - folio_release_kmap(*foliop, kaddr); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - folio_release_kmap(*foliop, kaddr); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; /* next folio is past the blocks we've got */ @@ -340,7 +342,7 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: ei->i_dir_start_lookup = n; @@ -384,18 +386,18 @@ fail: return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; struct folio *folio; de = nilfs_find_entry(dir, qstr, &folio); - if (de) { - res = le64_to_cpu(de->inode); - folio_release_kmap(folio, de); - } - return res; + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index c950139db6ef..4905063790c5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + } + return d_splice_alias(inode, dentry); } @@ -263,10 +271,11 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) struct folio *folio; int err; - err = -ENOENT; de = nilfs_find_entry(dir, &dentry->d_name, &folio); - if (!de) + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } inode = d_inode(dentry); err = -EIO; @@ -362,10 +371,11 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (unlikely(err)) return err; - err = -ENOENT; old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); - if (!old_de) + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } if (S_ISDIR(old_inode->i_mode)) { err = -EIO; @@ -382,10 +392,12 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (dir_de && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); goto out_dir; + } nilfs_set_link(new_dir, new_de, new_folio, old_inode); folio_release_kmap(new_folio, new_de); nilfs_mark_inode_dirty(new_dir); @@ -440,12 +452,13 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); root = NILFS_I(d_inode(child))->i_root; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index fb1c4c5bae7c..45d03826eaf1 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -254,7 +254,7 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) /* dir.c */ int nilfs_add_link(struct dentry *, struct inode *); -ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); int nilfs_make_empty(struct inode *, struct inode *); struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, struct folio **);

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] nilfs2: propagate directory read errors from" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 08cfa12adf888db98879dbd735bc741360a34168 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101852-giggly-ipod-3b07@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 08cfa12adf888db98879dbd735bc741360a34168 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Date: Fri, 4 Oct 2024 12:35:31 +0900 Subject: [PATCH] nilfs2: propagate directory read errors from nilfs_find_entry() Syzbot reported that a task hang occurs in vcs_open() during a fuzzing test for nilfs2. The root cause of this problem is that in nilfs_find_entry(), which searches for directory entries, ignores errors when loading a directory page/folio via nilfs_get_folio() fails. If the filesystem images is corrupted, and the i_size of the directory inode is large, and the directory page/folio is successfully read but fails the sanity check, for example when it is zero-filled, nilfs_check_folio() may continue to spit out error messages in bursts. Fix this issue by propagating the error to the callers when loading a page/folio fails in nilfs_find_entry(). The current interface of nilfs_find_entry() and its callers is outdated and cannot propagate error codes such as -EIO and -ENOMEM returned via nilfs_find_entry(), so fix it together. Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Reported-by: Lizhi Xu <lizhi.xu(a)windriver.com> Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com Reported-by: syzbot+8a192e8d090fa9a31135(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135 Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index fe5b1a30c509..a8602729586a 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -289,7 +289,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) * The folio is mapped and unlocked. When the caller is finished with * the entry, it should call folio_release_kmap(). * - * On failure, returns NULL and the caller should ignore foliop. + * On failure, returns an error pointer and the caller should ignore foliop. */ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct folio **foliop) @@ -312,22 +312,24 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, do { char *kaddr = nilfs_get_folio(dir, n, foliop); - if (!IS_ERR(kaddr)) { - de = (struct nilfs_dir_entry *)kaddr; - kaddr += nilfs_last_byte(dir, n) - reclen; - while ((char *) de <= kaddr) { - if (de->rec_len == 0) { - nilfs_error(dir->i_sb, - "zero-length directory entry"); - folio_release_kmap(*foliop, kaddr); - goto out; - } - if (nilfs_match(namelen, name, de)) - goto found; - de = nilfs_next_entry(de); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); + + de = (struct nilfs_dir_entry *)kaddr; + kaddr += nilfs_last_byte(dir, n) - reclen; + while ((char *)de <= kaddr) { + if (de->rec_len == 0) { + nilfs_error(dir->i_sb, + "zero-length directory entry"); + folio_release_kmap(*foliop, kaddr); + goto out; } - folio_release_kmap(*foliop, kaddr); + if (nilfs_match(namelen, name, de)) + goto found; + de = nilfs_next_entry(de); } + folio_release_kmap(*foliop, kaddr); + if (++n >= npages) n = 0; /* next folio is past the blocks we've got */ @@ -340,7 +342,7 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, } } while (n != start); out: - return NULL; + return ERR_PTR(-ENOENT); found: ei->i_dir_start_lookup = n; @@ -384,18 +386,18 @@ fail: return NULL; } -ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { - ino_t res = 0; struct nilfs_dir_entry *de; struct folio *folio; de = nilfs_find_entry(dir, qstr, &folio); - if (de) { - res = le64_to_cpu(de->inode); - folio_release_kmap(folio, de); - } - return res; + if (IS_ERR(de)) + return PTR_ERR(de); + + *ino = le64_to_cpu(de->inode); + folio_release_kmap(folio, de); + return 0; } void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index c950139db6ef..4905063790c5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; + int res; if (dentry->d_name.len > NILFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; + res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); + if (res) { + if (res != -ENOENT) + return ERR_PTR(res); + inode = NULL; + } else { + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); + } + return d_splice_alias(inode, dentry); } @@ -263,10 +271,11 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) struct folio *folio; int err; - err = -ENOENT; de = nilfs_find_entry(dir, &dentry->d_name, &folio); - if (!de) + if (IS_ERR(de)) { + err = PTR_ERR(de); goto out; + } inode = d_inode(dentry); err = -EIO; @@ -362,10 +371,11 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (unlikely(err)) return err; - err = -ENOENT; old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); - if (!old_de) + if (IS_ERR(old_de)) { + err = PTR_ERR(old_de); goto out; + } if (S_ISDIR(old_inode->i_mode)) { err = -EIO; @@ -382,10 +392,12 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (dir_de && !nilfs_empty_dir(new_inode)) goto out_dir; - err = -ENOENT; - new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio); - if (!new_de) + new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, + &new_folio); + if (IS_ERR(new_de)) { + err = PTR_ERR(new_de); goto out_dir; + } nilfs_set_link(new_dir, new_de, new_folio, old_inode); folio_release_kmap(new_folio, new_de); nilfs_mark_inode_dirty(new_dir); @@ -440,12 +452,13 @@ out: */ static struct dentry *nilfs_get_parent(struct dentry *child) { - unsigned long ino; + ino_t ino; + int res; struct nilfs_root *root; - ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); - if (!ino) - return ERR_PTR(-ENOENT); + res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); + if (res) + return ERR_PTR(res); root = NILFS_I(d_inode(child))->i_root; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index fb1c4c5bae7c..45d03826eaf1 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -254,7 +254,7 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) /* dir.c */ int nilfs_add_link(struct dentry *, struct inode *); -ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); +int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); int nilfs_make_empty(struct inode *, struct inode *); struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, struct folio **);

11 months, 1 week

1
0
0 0

[PATCH 4.19/5.4] PCI: Add function 0 DMA alias quirk for Glenfly Arise chip

by WangYuli

[ Upstream commit 9246b487ab3c3b5993aae7552b7a4c541cc14a49 ] Add DMA support for audio function of Glenfly Arise chip, which uses Requester ID of function 0. Link: https://lore.kernel.org/r/CA2BBD087345B6D1+20240823095708.3237375-1-wangyul… Signed-off-by: SiyuLi <siyuli(a)glenfly.com> Signed-off-by: WangYuli <wangyuli(a)uniontech.com> [bhelgaas: lower-case hex to match local code, drop unused Device IDs] Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com> Reviewed-by: Takashi Iwai <tiwai(a)suse.de> --- drivers/pci/quirks.c | 4 ++++ include/linux/pci_ids.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index bb5182089096..566bedc5836b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4042,6 +4042,10 @@ static void quirk_dma_func0_alias(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe832, quirk_dma_func0_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe476, quirk_dma_func0_alias); +/* Some Glenfly chips use function 0 as the PCIe Requester ID for DMA */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_GLENFLY, 0x3d40, quirk_dma_func0_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_GLENFLY, 0x3d41, quirk_dma_func0_alias); + static void quirk_dma_func1_alias(struct pci_dev *dev) { if (PCI_FUNC(dev->devfn) != 1) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 91193284710f..5f996cf93c13 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2647,6 +2647,8 @@ #define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 #define PCI_DEVICE_ID_DCI_PCCOM2 0x0004 +#define PCI_VENDOR_ID_GLENFLY 0x6766 + #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 -- 2.45.2

11 months, 1 week

2
1
0 0

Request to backport "irqchip/gic-v3-its: Fix VSYNC referencing an unmapped VPE on GIC v4.1"

by Zenghui Yu

Hi Greg, Sasha, Could you please help to backport the upstream commit 80e9963fb3b5509dfcabe9652d56bf4b35542055 ("irqchip/gic-v3-its: Fix VSYNC referencing an unmapped VPE on GIC v4.1") to * 5.10 * 5.15 * 6.1 * 6.6 trees? It can be applied and built (with arm64's defconfig) cleanly on top of the mentioned stable branches. Thanks, Zenghui

11 months, 1 week

2
1
0 0

FAILED: patch "[PATCH] selftests: mptcp: join: test for prohibited MPC to port-based" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 5afca7e996c42aed1b4a42d4712817601ba42aff # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101827-regulate-lining-6c3e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 5afca7e996c42aed1b4a42d4712817601ba42aff Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Mon, 14 Oct 2024 16:06:01 +0200 Subject: [PATCH] selftests: mptcp: join: test for prohibited MPC to port-based endp Explicitly verify that MPC connection attempts towards a port-based signal endpoint fail with a reset. Note that this new test is a bit different from the other ones, not using 'run_tests'. It is then needed to add the capture capability, and the picking the right port which have been extracted into three new helpers. The info about the capture can also be printed from a single point, which simplifies the exit paths in do_transfer(). The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Cc: stable(a)vger.kernel.org Co-developed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241014-net-mptcp-mpc-port-endp-v2-2-7faea8e6b6ae… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e8d0a01b4144..c07e2bd3a315 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -23,6 +23,7 @@ tmpfile="" cout="" err="" capout="" +cappid="" ns1="" ns2="" iptables="iptables" @@ -887,6 +888,44 @@ check_cestab() fi } +cond_start_capture() +{ + local ns="$1" + + :> "$capout" + + if $capture; then + local capuser capfile + if [ -z $SUDO_USER ]; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns") + + echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" + ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 & + cappid=$! + + sleep 1 + fi +} + +cond_stop_capture() +{ + if $capture; then + sleep 1 + kill $cappid + cat "$capout" + fi +} + +get_port() +{ + echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))" +} + do_transfer() { local listener_ns="$1" @@ -894,33 +933,17 @@ do_transfer() local cl_proto="$3" local srv_proto="$4" local connect_addr="$5" + local port - local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) - local cappid local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} local speed=${speed:-"fast"} + port=$(get_port) :> "$cout" :> "$sout" - :> "$capout" - if $capture; then - local capuser - if [ -z $SUDO_USER ] ; then - capuser="" - else - capuser="-Z $SUDO_USER" - fi - - capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") - - echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - cappid=$! - - sleep 1 - fi + cond_start_capture ${listener_ns} NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat -n @@ -1007,10 +1030,7 @@ do_transfer() wait $spid local rets=$? - if $capture; then - sleep 1 - kill $cappid - fi + cond_stop_capture NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out @@ -1026,7 +1046,6 @@ do_transfer() ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" cat /tmp/${connector_ns}.out - cat "$capout" return 1 fi @@ -1043,13 +1062,7 @@ do_transfer() fi rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - cat "$capout" - return 0 - fi - - cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -2873,6 +2886,32 @@ verify_listener_events() fail_test } +chk_mpc_endp_attempt() +{ + local retl=$1 + local attempts=$2 + + print_check "Connect" + + if [ ${retl} = 124 ]; then + fail_test "timeout on connect" + elif [ ${retl} = 0 ]; then + fail_test "unexpected successful connect" + else + print_ok + + print_check "Attempts" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$attempts" ]; then + fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}" + else + print_ok + fi + fi +} + add_addr_ports_tests() { # signal address with port @@ -2963,6 +3002,22 @@ add_addr_ports_tests() chk_join_nr 2 2 2 chk_add_nr 2 2 2 fi + + if reset "port-based signal endpoint must not accept mpc"; then + local port retl count + port=$(get_port) + + cond_start_capture ${ns1} + pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port} + mptcp_lib_wait_local_port_listen ${ns1} ${port} + + timeout 1 ip netns exec ${ns2} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1 + retl=$? + cond_stop_capture + + chk_mpc_endp_attempt ${retl} 1 + fi } syncookies_tests()

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] selftests: mptcp: join: test for prohibited MPC to port-based" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 5afca7e996c42aed1b4a42d4712817601ba42aff # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101826-outshine-powdered-a548@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 5afca7e996c42aed1b4a42d4712817601ba42aff Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Mon, 14 Oct 2024 16:06:01 +0200 Subject: [PATCH] selftests: mptcp: join: test for prohibited MPC to port-based endp Explicitly verify that MPC connection attempts towards a port-based signal endpoint fail with a reset. Note that this new test is a bit different from the other ones, not using 'run_tests'. It is then needed to add the capture capability, and the picking the right port which have been extracted into three new helpers. The info about the capture can also be printed from a single point, which simplifies the exit paths in do_transfer(). The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Cc: stable(a)vger.kernel.org Co-developed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241014-net-mptcp-mpc-port-endp-v2-2-7faea8e6b6ae… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e8d0a01b4144..c07e2bd3a315 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -23,6 +23,7 @@ tmpfile="" cout="" err="" capout="" +cappid="" ns1="" ns2="" iptables="iptables" @@ -887,6 +888,44 @@ check_cestab() fi } +cond_start_capture() +{ + local ns="$1" + + :> "$capout" + + if $capture; then + local capuser capfile + if [ -z $SUDO_USER ]; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns") + + echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" + ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 & + cappid=$! + + sleep 1 + fi +} + +cond_stop_capture() +{ + if $capture; then + sleep 1 + kill $cappid + cat "$capout" + fi +} + +get_port() +{ + echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))" +} + do_transfer() { local listener_ns="$1" @@ -894,33 +933,17 @@ do_transfer() local cl_proto="$3" local srv_proto="$4" local connect_addr="$5" + local port - local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) - local cappid local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} local speed=${speed:-"fast"} + port=$(get_port) :> "$cout" :> "$sout" - :> "$capout" - if $capture; then - local capuser - if [ -z $SUDO_USER ] ; then - capuser="" - else - capuser="-Z $SUDO_USER" - fi - - capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") - - echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - cappid=$! - - sleep 1 - fi + cond_start_capture ${listener_ns} NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat -n @@ -1007,10 +1030,7 @@ do_transfer() wait $spid local rets=$? - if $capture; then - sleep 1 - kill $cappid - fi + cond_stop_capture NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out @@ -1026,7 +1046,6 @@ do_transfer() ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" cat /tmp/${connector_ns}.out - cat "$capout" return 1 fi @@ -1043,13 +1062,7 @@ do_transfer() fi rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - cat "$capout" - return 0 - fi - - cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -2873,6 +2886,32 @@ verify_listener_events() fail_test } +chk_mpc_endp_attempt() +{ + local retl=$1 + local attempts=$2 + + print_check "Connect" + + if [ ${retl} = 124 ]; then + fail_test "timeout on connect" + elif [ ${retl} = 0 ]; then + fail_test "unexpected successful connect" + else + print_ok + + print_check "Attempts" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$attempts" ]; then + fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}" + else + print_ok + fi + fi +} + add_addr_ports_tests() { # signal address with port @@ -2963,6 +3002,22 @@ add_addr_ports_tests() chk_join_nr 2 2 2 chk_add_nr 2 2 2 fi + + if reset "port-based signal endpoint must not accept mpc"; then + local port retl count + port=$(get_port) + + cond_start_capture ${ns1} + pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port} + mptcp_lib_wait_local_port_listen ${ns1} ${port} + + timeout 1 ip netns exec ${ns2} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1 + retl=$? + cond_stop_capture + + chk_mpc_endp_attempt ${retl} 1 + fi } syncookies_tests()

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] selftests: mptcp: join: test for prohibited MPC to port-based" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 5afca7e996c42aed1b4a42d4712817601ba42aff # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101826-algorithm-figure-3cf3@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 5afca7e996c42aed1b4a42d4712817601ba42aff Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Mon, 14 Oct 2024 16:06:01 +0200 Subject: [PATCH] selftests: mptcp: join: test for prohibited MPC to port-based endp Explicitly verify that MPC connection attempts towards a port-based signal endpoint fail with a reset. Note that this new test is a bit different from the other ones, not using 'run_tests'. It is then needed to add the capture capability, and the picking the right port which have been extracted into three new helpers. The info about the capture can also be printed from a single point, which simplifies the exit paths in do_transfer(). The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Cc: stable(a)vger.kernel.org Co-developed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241014-net-mptcp-mpc-port-endp-v2-2-7faea8e6b6ae… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e8d0a01b4144..c07e2bd3a315 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -23,6 +23,7 @@ tmpfile="" cout="" err="" capout="" +cappid="" ns1="" ns2="" iptables="iptables" @@ -887,6 +888,44 @@ check_cestab() fi } +cond_start_capture() +{ + local ns="$1" + + :> "$capout" + + if $capture; then + local capuser capfile + if [ -z $SUDO_USER ]; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns") + + echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" + ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 & + cappid=$! + + sleep 1 + fi +} + +cond_stop_capture() +{ + if $capture; then + sleep 1 + kill $cappid + cat "$capout" + fi +} + +get_port() +{ + echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))" +} + do_transfer() { local listener_ns="$1" @@ -894,33 +933,17 @@ do_transfer() local cl_proto="$3" local srv_proto="$4" local connect_addr="$5" + local port - local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) - local cappid local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} local speed=${speed:-"fast"} + port=$(get_port) :> "$cout" :> "$sout" - :> "$capout" - if $capture; then - local capuser - if [ -z $SUDO_USER ] ; then - capuser="" - else - capuser="-Z $SUDO_USER" - fi - - capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") - - echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - cappid=$! - - sleep 1 - fi + cond_start_capture ${listener_ns} NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat -n @@ -1007,10 +1030,7 @@ do_transfer() wait $spid local rets=$? - if $capture; then - sleep 1 - kill $cappid - fi + cond_stop_capture NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out @@ -1026,7 +1046,6 @@ do_transfer() ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" cat /tmp/${connector_ns}.out - cat "$capout" return 1 fi @@ -1043,13 +1062,7 @@ do_transfer() fi rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - cat "$capout" - return 0 - fi - - cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -2873,6 +2886,32 @@ verify_listener_events() fail_test } +chk_mpc_endp_attempt() +{ + local retl=$1 + local attempts=$2 + + print_check "Connect" + + if [ ${retl} = 124 ]; then + fail_test "timeout on connect" + elif [ ${retl} = 0 ]; then + fail_test "unexpected successful connect" + else + print_ok + + print_check "Attempts" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$attempts" ]; then + fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}" + else + print_ok + fi + fi +} + add_addr_ports_tests() { # signal address with port @@ -2963,6 +3002,22 @@ add_addr_ports_tests() chk_join_nr 2 2 2 chk_add_nr 2 2 2 fi + + if reset "port-based signal endpoint must not accept mpc"; then + local port retl count + port=$(get_port) + + cond_start_capture ${ns1} + pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port} + mptcp_lib_wait_local_port_listen ${ns1} ${port} + + timeout 1 ip netns exec ${ns2} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1 + retl=$? + cond_stop_capture + + chk_mpc_endp_attempt ${retl} 1 + fi } syncookies_tests()

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] mptcp: prevent MPC handshake on port-based signal endpoints" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 3d041393ea8c815f773020fb4a995331a69c0139 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101816-ripping-veggie-2a69@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d041393ea8c815f773020fb4a995331a69c0139 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Mon, 14 Oct 2024 16:06:00 +0200 Subject: [PATCH] mptcp: prevent MPC handshake on port-based signal endpoints Syzkaller reported a lockdep splat: ============================================ WARNING: possible recursive locking detected 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Not tainted -------------------------------------------- syz-executor364/5113 is trying to acquire lock: ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 but task is already holding lock: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(k-slock-AF_INET); lock(k-slock-AF_INET); *** DEADLOCK *** May be due to missing lock nesting notation 7 locks held by syz-executor364/5113: #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x153/0x1b10 net/mptcp/protocol.c:1806 #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg_fastopen+0x11f/0x530 net/mptcp/protocol.c:1727 #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x5f/0x1b80 net/ipv4/ip_output.c:470 #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1390 net/ipv4/ip_output.c:228 #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: local_lock_acquire include/linux/local_lock_internal.h:29 [inline] #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x33b/0x15b0 net/core/dev.c:6104 #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x230/0x5f0 net/ipv4/ip_input.c:232 #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 stack backtrace: CPU: 0 UID: 0 PID: 5113 Comm: syz-executor364 Not tainted 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 check_deadlock kernel/locking/lockdep.c:3061 [inline] validate_chain+0x15d3/0x5900 kernel/locking/lockdep.c:3855 __lock_acquire+0x137a/0x2040 kernel/locking/lockdep.c:5142 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 mptcp_sk_clone_init+0x32/0x13c0 net/mptcp/protocol.c:3279 subflow_syn_recv_sock+0x931/0x1920 net/mptcp/subflow.c:874 tcp_check_req+0xfe4/0x1a20 net/ipv4/tcp_minisocks.c:853 tcp_v4_rcv+0x1c3e/0x37f0 net/ipv4/tcp_ipv4.c:2267 ip_protocol_deliver_rcu+0x22e/0x440 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x341/0x5f0 net/ipv4/ip_input.c:233 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x2bf/0x650 net/core/dev.c:5775 process_backlog+0x662/0x15b0 net/core/dev.c:6108 __napi_poll+0xcb/0x490 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:6963 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 do_softirq+0x11b/0x1e0 kernel/softirq.c:455 </IRQ> <TASK> __local_bh_enable_ip+0x1bb/0x200 kernel/softirq.c:382 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:908 [inline] __dev_queue_xmit+0x1763/0x3e90 net/core/dev.c:4450 dev_queue_xmit include/linux/netdevice.h:3105 [inline] neigh_hh_output include/net/neighbour.h:526 [inline] neigh_output include/net/neighbour.h:540 [inline] ip_finish_output2+0xd41/0x1390 net/ipv4/ip_output.c:235 ip_local_out net/ipv4/ip_output.c:129 [inline] __ip_queue_xmit+0x118c/0x1b80 net/ipv4/ip_output.c:535 __tcp_transmit_skb+0x2544/0x3b30 net/ipv4/tcp_output.c:1466 tcp_rcv_synsent_state_process net/ipv4/tcp_input.c:6542 [inline] tcp_rcv_state_process+0x2c32/0x4570 net/ipv4/tcp_input.c:6729 tcp_v4_do_rcv+0x77d/0xc70 net/ipv4/tcp_ipv4.c:1934 sk_backlog_rcv include/net/sock.h:1111 [inline] __release_sock+0x214/0x350 net/core/sock.c:3004 release_sock+0x61/0x1f0 net/core/sock.c:3558 mptcp_sendmsg_fastopen+0x1ad/0x530 net/mptcp/protocol.c:1733 mptcp_sendmsg+0x1884/0x1b10 net/mptcp/protocol.c:1812 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 ___sys_sendmsg net/socket.c:2651 [inline] __sys_sendmmsg+0x3b2/0x740 net/socket.c:2737 __do_sys_sendmmsg net/socket.c:2766 [inline] __se_sys_sendmmsg net/socket.c:2763 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2763 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f04fb13a6b9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 01 1a 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd651f42d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f04fb13a6b9 RDX: 0000000000000001 RSI: 0000000020000d00 RDI: 0000000000000004 RBP: 00007ffd651f4310 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000020000080 R11: 0000000000000246 R12: 00000000000f4240 R13: 00007f04fb187449 R14: 00007ffd651f42f4 R15: 00007ffd651f4300 </TASK> As noted by Cong Wang, the splat is false positive, but the code path leading to the report is an unexpected one: a client is attempting an MPC handshake towards the in-kernel listener created by the in-kernel PM for a port based signal endpoint. Such connection will be never accepted; many of them can make the listener queue full and preventing the creation of MPJ subflow via such listener - its intended role. Explicitly detect this scenario at initial-syn time and drop the incoming MPC request. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Cc: stable(a)vger.kernel.org Reported-by: syzbot+f4aacdfef2c6a6529c3e(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f4aacdfef2c6a6529c3e Cc: Cong Wang <cong.wang(a)bytedance.com> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241014-net-mptcp-mpc-port-endp-v2-1-7faea8e6b6ae… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index ad88bd3c58df..19eb9292bd60 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -17,6 +17,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), + SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 3206cdda8bb1..128282982843 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -12,6 +12,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */ + MPTCP_MIB_MPCAPABLEENDPATTEMPT, /* Prohibited MPC to port-based endp */ MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f6f0a38a0750..1a78998fe1f4 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1121,6 +1121,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); err = __inet_listen_sk(ssk, backlog); if (!err) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 74417aae08d0..568a72702b08 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -535,6 +535,7 @@ struct mptcp_subflow_context { __unused : 8; bool data_avail; bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ u32 remote_nonce; u64 thmac; u32 local_nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 25dde81bcb75..6170f2fff71e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -132,6 +132,13 @@ static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason) } } +static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb) +{ + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT); + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + return -EPERM; +} + /* Init mptcp request socket. * * Returns an error code if a JOIN has failed and a TCP reset @@ -165,6 +172,8 @@ static int subflow_check_req(struct request_sock *req, if (opt_mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); + if (unlikely(listener->pm_listener)) + return subflow_reset_req_endp(req, skb); if (opt_mp_join) return 0; } else if (opt_mp_join) { @@ -172,6 +181,8 @@ static int subflow_check_req(struct request_sock *req, if (mp_opt.backup) SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); + } else if (unlikely(listener->pm_listener)) { + return subflow_reset_req_endp(req, skb); } if (opt_mp_capable && listener->request_mptcp) {

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] mptcp: prevent MPC handshake on port-based signal endpoints" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 3d041393ea8c815f773020fb4a995331a69c0139 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101814-postal-swarm-0080@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d041393ea8c815f773020fb4a995331a69c0139 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Mon, 14 Oct 2024 16:06:00 +0200 Subject: [PATCH] mptcp: prevent MPC handshake on port-based signal endpoints Syzkaller reported a lockdep splat: ============================================ WARNING: possible recursive locking detected 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Not tainted -------------------------------------------- syz-executor364/5113 is trying to acquire lock: ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 but task is already holding lock: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(k-slock-AF_INET); lock(k-slock-AF_INET); *** DEADLOCK *** May be due to missing lock nesting notation 7 locks held by syz-executor364/5113: #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x153/0x1b10 net/mptcp/protocol.c:1806 #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg_fastopen+0x11f/0x530 net/mptcp/protocol.c:1727 #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x5f/0x1b80 net/ipv4/ip_output.c:470 #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1390 net/ipv4/ip_output.c:228 #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: local_lock_acquire include/linux/local_lock_internal.h:29 [inline] #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x33b/0x15b0 net/core/dev.c:6104 #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x230/0x5f0 net/ipv4/ip_input.c:232 #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 stack backtrace: CPU: 0 UID: 0 PID: 5113 Comm: syz-executor364 Not tainted 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 check_deadlock kernel/locking/lockdep.c:3061 [inline] validate_chain+0x15d3/0x5900 kernel/locking/lockdep.c:3855 __lock_acquire+0x137a/0x2040 kernel/locking/lockdep.c:5142 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 mptcp_sk_clone_init+0x32/0x13c0 net/mptcp/protocol.c:3279 subflow_syn_recv_sock+0x931/0x1920 net/mptcp/subflow.c:874 tcp_check_req+0xfe4/0x1a20 net/ipv4/tcp_minisocks.c:853 tcp_v4_rcv+0x1c3e/0x37f0 net/ipv4/tcp_ipv4.c:2267 ip_protocol_deliver_rcu+0x22e/0x440 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x341/0x5f0 net/ipv4/ip_input.c:233 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x2bf/0x650 net/core/dev.c:5775 process_backlog+0x662/0x15b0 net/core/dev.c:6108 __napi_poll+0xcb/0x490 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:6963 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 do_softirq+0x11b/0x1e0 kernel/softirq.c:455 </IRQ> <TASK> __local_bh_enable_ip+0x1bb/0x200 kernel/softirq.c:382 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:908 [inline] __dev_queue_xmit+0x1763/0x3e90 net/core/dev.c:4450 dev_queue_xmit include/linux/netdevice.h:3105 [inline] neigh_hh_output include/net/neighbour.h:526 [inline] neigh_output include/net/neighbour.h:540 [inline] ip_finish_output2+0xd41/0x1390 net/ipv4/ip_output.c:235 ip_local_out net/ipv4/ip_output.c:129 [inline] __ip_queue_xmit+0x118c/0x1b80 net/ipv4/ip_output.c:535 __tcp_transmit_skb+0x2544/0x3b30 net/ipv4/tcp_output.c:1466 tcp_rcv_synsent_state_process net/ipv4/tcp_input.c:6542 [inline] tcp_rcv_state_process+0x2c32/0x4570 net/ipv4/tcp_input.c:6729 tcp_v4_do_rcv+0x77d/0xc70 net/ipv4/tcp_ipv4.c:1934 sk_backlog_rcv include/net/sock.h:1111 [inline] __release_sock+0x214/0x350 net/core/sock.c:3004 release_sock+0x61/0x1f0 net/core/sock.c:3558 mptcp_sendmsg_fastopen+0x1ad/0x530 net/mptcp/protocol.c:1733 mptcp_sendmsg+0x1884/0x1b10 net/mptcp/protocol.c:1812 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 ___sys_sendmsg net/socket.c:2651 [inline] __sys_sendmmsg+0x3b2/0x740 net/socket.c:2737 __do_sys_sendmmsg net/socket.c:2766 [inline] __se_sys_sendmmsg net/socket.c:2763 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2763 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f04fb13a6b9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 01 1a 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd651f42d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f04fb13a6b9 RDX: 0000000000000001 RSI: 0000000020000d00 RDI: 0000000000000004 RBP: 00007ffd651f4310 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000020000080 R11: 0000000000000246 R12: 00000000000f4240 R13: 00007f04fb187449 R14: 00007ffd651f42f4 R15: 00007ffd651f4300 </TASK> As noted by Cong Wang, the splat is false positive, but the code path leading to the report is an unexpected one: a client is attempting an MPC handshake towards the in-kernel listener created by the in-kernel PM for a port based signal endpoint. Such connection will be never accepted; many of them can make the listener queue full and preventing the creation of MPJ subflow via such listener - its intended role. Explicitly detect this scenario at initial-syn time and drop the incoming MPC request. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Cc: stable(a)vger.kernel.org Reported-by: syzbot+f4aacdfef2c6a6529c3e(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f4aacdfef2c6a6529c3e Cc: Cong Wang <cong.wang(a)bytedance.com> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241014-net-mptcp-mpc-port-endp-v2-1-7faea8e6b6ae… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index ad88bd3c58df..19eb9292bd60 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -17,6 +17,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), + SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 3206cdda8bb1..128282982843 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -12,6 +12,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */ + MPTCP_MIB_MPCAPABLEENDPATTEMPT, /* Prohibited MPC to port-based endp */ MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f6f0a38a0750..1a78998fe1f4 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1121,6 +1121,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); err = __inet_listen_sk(ssk, backlog); if (!err) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 74417aae08d0..568a72702b08 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -535,6 +535,7 @@ struct mptcp_subflow_context { __unused : 8; bool data_avail; bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ u32 remote_nonce; u64 thmac; u32 local_nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 25dde81bcb75..6170f2fff71e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -132,6 +132,13 @@ static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason) } } +static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb) +{ + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT); + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + return -EPERM; +} + /* Init mptcp request socket. * * Returns an error code if a JOIN has failed and a TCP reset @@ -165,6 +172,8 @@ static int subflow_check_req(struct request_sock *req, if (opt_mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); + if (unlikely(listener->pm_listener)) + return subflow_reset_req_endp(req, skb); if (opt_mp_join) return 0; } else if (opt_mp_join) { @@ -172,6 +181,8 @@ static int subflow_check_req(struct request_sock *req, if (mp_opt.backup) SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); + } else if (unlikely(listener->pm_listener)) { + return subflow_reset_req_endp(req, skb); } if (opt_mp_capable && listener->request_mptcp) {

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] mptcp: prevent MPC handshake on port-based signal endpoints" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 3d041393ea8c815f773020fb4a995331a69c0139 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101811-faction-extruding-ca04@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3d041393ea8c815f773020fb4a995331a69c0139 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Mon, 14 Oct 2024 16:06:00 +0200 Subject: [PATCH] mptcp: prevent MPC handshake on port-based signal endpoints Syzkaller reported a lockdep splat: ============================================ WARNING: possible recursive locking detected 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Not tainted -------------------------------------------- syz-executor364/5113 is trying to acquire lock: ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff8880449f1958 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 but task is already holding lock: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(k-slock-AF_INET); lock(k-slock-AF_INET); *** DEADLOCK *** May be due to missing lock nesting notation 7 locks held by syz-executor364/5113: #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #0: ffff8880449f0e18 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x153/0x1b10 net/mptcp/protocol.c:1806 #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1607 [inline] #1: ffff88803fe39ad8 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg_fastopen+0x11f/0x530 net/mptcp/protocol.c:1727 #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #2: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: __ip_queue_xmit+0x5f/0x1b80 net/ipv4/ip_output.c:470 #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #3: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_finish_output2+0x45f/0x1390 net/ipv4/ip_output.c:228 #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: local_lock_acquire include/linux/local_lock_internal.h:29 [inline] #4: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: process_backlog+0x33b/0x15b0 net/core/dev.c:6104 #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:326 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:838 [inline] #5: ffffffff8e938320 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x230/0x5f0 net/ipv4/ip_input.c:232 #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #6: ffff88803fe3cb58 (k-slock-AF_INET){+.-.}-{2:2}, at: sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 stack backtrace: CPU: 0 UID: 0 PID: 5113 Comm: syz-executor364 Not tainted 6.11.0-rc6-syzkaller-00019-g67784a74e258 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 check_deadlock kernel/locking/lockdep.c:3061 [inline] validate_chain+0x15d3/0x5900 kernel/locking/lockdep.c:3855 __lock_acquire+0x137a/0x2040 kernel/locking/lockdep.c:5142 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5759 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] sk_clone_lock+0x2cd/0xf40 net/core/sock.c:2328 mptcp_sk_clone_init+0x32/0x13c0 net/mptcp/protocol.c:3279 subflow_syn_recv_sock+0x931/0x1920 net/mptcp/subflow.c:874 tcp_check_req+0xfe4/0x1a20 net/ipv4/tcp_minisocks.c:853 tcp_v4_rcv+0x1c3e/0x37f0 net/ipv4/tcp_ipv4.c:2267 ip_protocol_deliver_rcu+0x22e/0x440 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x341/0x5f0 net/ipv4/ip_input.c:233 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 NF_HOOK+0x3a4/0x450 include/linux/netfilter.h:314 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x2bf/0x650 net/core/dev.c:5775 process_backlog+0x662/0x15b0 net/core/dev.c:6108 __napi_poll+0xcb/0x490 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:6963 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 do_softirq+0x11b/0x1e0 kernel/softirq.c:455 </IRQ> <TASK> __local_bh_enable_ip+0x1bb/0x200 kernel/softirq.c:382 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:908 [inline] __dev_queue_xmit+0x1763/0x3e90 net/core/dev.c:4450 dev_queue_xmit include/linux/netdevice.h:3105 [inline] neigh_hh_output include/net/neighbour.h:526 [inline] neigh_output include/net/neighbour.h:540 [inline] ip_finish_output2+0xd41/0x1390 net/ipv4/ip_output.c:235 ip_local_out net/ipv4/ip_output.c:129 [inline] __ip_queue_xmit+0x118c/0x1b80 net/ipv4/ip_output.c:535 __tcp_transmit_skb+0x2544/0x3b30 net/ipv4/tcp_output.c:1466 tcp_rcv_synsent_state_process net/ipv4/tcp_input.c:6542 [inline] tcp_rcv_state_process+0x2c32/0x4570 net/ipv4/tcp_input.c:6729 tcp_v4_do_rcv+0x77d/0xc70 net/ipv4/tcp_ipv4.c:1934 sk_backlog_rcv include/net/sock.h:1111 [inline] __release_sock+0x214/0x350 net/core/sock.c:3004 release_sock+0x61/0x1f0 net/core/sock.c:3558 mptcp_sendmsg_fastopen+0x1ad/0x530 net/mptcp/protocol.c:1733 mptcp_sendmsg+0x1884/0x1b10 net/mptcp/protocol.c:1812 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x1a6/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2597 ___sys_sendmsg net/socket.c:2651 [inline] __sys_sendmmsg+0x3b2/0x740 net/socket.c:2737 __do_sys_sendmmsg net/socket.c:2766 [inline] __se_sys_sendmmsg net/socket.c:2763 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2763 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f04fb13a6b9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 01 1a 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd651f42d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f04fb13a6b9 RDX: 0000000000000001 RSI: 0000000020000d00 RDI: 0000000000000004 RBP: 00007ffd651f4310 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000020000080 R11: 0000000000000246 R12: 00000000000f4240 R13: 00007f04fb187449 R14: 00007ffd651f42f4 R15: 00007ffd651f4300 </TASK> As noted by Cong Wang, the splat is false positive, but the code path leading to the report is an unexpected one: a client is attempting an MPC handshake towards the in-kernel listener created by the in-kernel PM for a port based signal endpoint. Such connection will be never accepted; many of them can make the listener queue full and preventing the creation of MPJ subflow via such listener - its intended role. Explicitly detect this scenario at initial-syn time and drop the incoming MPC request. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Cc: stable(a)vger.kernel.org Reported-by: syzbot+f4aacdfef2c6a6529c3e(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f4aacdfef2c6a6529c3e Cc: Cong Wang <cong.wang(a)bytedance.com> Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Link: https://patch.msgid.link/20241014-net-mptcp-mpc-port-endp-v2-1-7faea8e6b6ae… Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index ad88bd3c58df..19eb9292bd60 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -17,6 +17,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), + SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 3206cdda8bb1..128282982843 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -12,6 +12,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */ + MPTCP_MIB_MPCAPABLEENDPATTEMPT, /* Prohibited MPC to port-based endp */ MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f6f0a38a0750..1a78998fe1f4 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1121,6 +1121,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); err = __inet_listen_sk(ssk, backlog); if (!err) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 74417aae08d0..568a72702b08 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -535,6 +535,7 @@ struct mptcp_subflow_context { __unused : 8; bool data_avail; bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ u32 remote_nonce; u64 thmac; u32 local_nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 25dde81bcb75..6170f2fff71e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -132,6 +132,13 @@ static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason) } } +static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb) +{ + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT); + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + return -EPERM; +} + /* Init mptcp request socket. * * Returns an error code if a JOIN has failed and a TCP reset @@ -165,6 +172,8 @@ static int subflow_check_req(struct request_sock *req, if (opt_mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); + if (unlikely(listener->pm_listener)) + return subflow_reset_req_endp(req, skb); if (opt_mp_join) return 0; } else if (opt_mp_join) { @@ -172,6 +181,8 @@ static int subflow_check_req(struct request_sock *req, if (mp_opt.backup) SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); + } else if (unlikely(listener->pm_listener)) { + return subflow_reset_req_endp(req, skb); } if (opt_mp_capable && listener->request_mptcp) {

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] arm64: probes: Fix uprobes for big-endian kernels" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101825-oaf-glaucoma-1d13@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland(a)arm.com> Date: Tue, 8 Oct 2024 16:58:48 +0100 Subject: [PATCH] arm64: probes: Fix uprobes for big-endian kernels The arm64 uprobes code is broken for big-endian kernels as it doesn't convert the in-memory instruction encoding (which is always little-endian) into the kernel's native endianness before analyzing and simulating instructions. This may result in a few distinct problems: * The kernel may may erroneously reject probing an instruction which can safely be probed. * The kernel may erroneously erroneously permit stepping an instruction out-of-line when that instruction cannot be stepped out-of-line safely. * The kernel may erroneously simulate instruction incorrectly dur to interpretting the byte-swapped encoding. The endianness mismatch isn't caught by the compiler or sparse because: * The arch_uprobe::{insn,ixol} fields are encoded as arrays of u8, so the compiler and sparse have no idea these contain a little-endian 32-bit value. The core uprobes code populates these with a memcpy() which similarly does not handle endianness. * While the uprobe_opcode_t type is an alias for __le32, both arch_uprobe_analyze_insn() and arch_uprobe_skip_sstep() cast from u8[] to the similarly-named probe_opcode_t, which is an alias for u32. Hence there is no endianness conversion warning. Fix this by changing the arch_uprobe::{insn,ixol} fields to __le32 and adding the appropriate __le32_to_cpu() conversions prior to consuming the instruction encoding. The core uprobes copies these fields as opaque ranges of bytes, and so is unaffected by this change. At the same time, remove MAX_UINSN_BYTES and consistently use AARCH64_INSN_SIZE for clarity. Tested with the following: | #include <stdio.h> | #include <stdbool.h> | | #define noinline __attribute__((noinline)) | | static noinline void *adrp_self(void) | { | void *addr; | | asm volatile( | " adrp %x0, adrp_self\n" | " add %x0, %x0, :lo12:adrp_self\n" | : "=r" (addr)); | } | | | int main(int argc, char *argv) | { | void *ptr = adrp_self(); | bool equal = (ptr == adrp_self); | | printf("adrp_self => %p\n" | "adrp_self() => %p\n" | "%s\n", | adrp_self, ptr, equal ? "EQUAL" : "NOT EQUAL"); | | return 0; | } .... where the adrp_self() function was compiled to: | 00000000004007e0 <adrp_self>: | 4007e0: 90000000 adrp x0, 400000 <__ehdr_start> | 4007e4: 911f8000 add x0, x0, #0x7e0 | 4007e8: d65f03c0 ret Before this patch, the ADRP is not recognized, and is assumed to be steppable, resulting in corruption of the result: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0xffffffffff7e0 | NOT EQUAL After this patch, the ADRP is correctly recognized and simulated: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL Fixes: 9842ceae9fa8 ("arm64: Add uprobe support") Cc: stable(a)vger.kernel.org Signed-off-by: Mark Rutland <mark.rutland(a)arm.com> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Link: https://lore.kernel.org/r/20241008155851.801546-4-mark.rutland@arm.com Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 2b09495499c6..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef __le32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index d49aef2657cd..a2f137a595fc 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] arm64: probes: Fix uprobes for big-endian kernels" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101822-deplored-dictator-689d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland(a)arm.com> Date: Tue, 8 Oct 2024 16:58:48 +0100 Subject: [PATCH] arm64: probes: Fix uprobes for big-endian kernels The arm64 uprobes code is broken for big-endian kernels as it doesn't convert the in-memory instruction encoding (which is always little-endian) into the kernel's native endianness before analyzing and simulating instructions. This may result in a few distinct problems: * The kernel may may erroneously reject probing an instruction which can safely be probed. * The kernel may erroneously erroneously permit stepping an instruction out-of-line when that instruction cannot be stepped out-of-line safely. * The kernel may erroneously simulate instruction incorrectly dur to interpretting the byte-swapped encoding. The endianness mismatch isn't caught by the compiler or sparse because: * The arch_uprobe::{insn,ixol} fields are encoded as arrays of u8, so the compiler and sparse have no idea these contain a little-endian 32-bit value. The core uprobes code populates these with a memcpy() which similarly does not handle endianness. * While the uprobe_opcode_t type is an alias for __le32, both arch_uprobe_analyze_insn() and arch_uprobe_skip_sstep() cast from u8[] to the similarly-named probe_opcode_t, which is an alias for u32. Hence there is no endianness conversion warning. Fix this by changing the arch_uprobe::{insn,ixol} fields to __le32 and adding the appropriate __le32_to_cpu() conversions prior to consuming the instruction encoding. The core uprobes copies these fields as opaque ranges of bytes, and so is unaffected by this change. At the same time, remove MAX_UINSN_BYTES and consistently use AARCH64_INSN_SIZE for clarity. Tested with the following: | #include <stdio.h> | #include <stdbool.h> | | #define noinline __attribute__((noinline)) | | static noinline void *adrp_self(void) | { | void *addr; | | asm volatile( | " adrp %x0, adrp_self\n" | " add %x0, %x0, :lo12:adrp_self\n" | : "=r" (addr)); | } | | | int main(int argc, char *argv) | { | void *ptr = adrp_self(); | bool equal = (ptr == adrp_self); | | printf("adrp_self => %p\n" | "adrp_self() => %p\n" | "%s\n", | adrp_self, ptr, equal ? "EQUAL" : "NOT EQUAL"); | | return 0; | } .... where the adrp_self() function was compiled to: | 00000000004007e0 <adrp_self>: | 4007e0: 90000000 adrp x0, 400000 <__ehdr_start> | 4007e4: 911f8000 add x0, x0, #0x7e0 | 4007e8: d65f03c0 ret Before this patch, the ADRP is not recognized, and is assumed to be steppable, resulting in corruption of the result: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0xffffffffff7e0 | NOT EQUAL After this patch, the ADRP is correctly recognized and simulated: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL Fixes: 9842ceae9fa8 ("arm64: Add uprobe support") Cc: stable(a)vger.kernel.org Signed-off-by: Mark Rutland <mark.rutland(a)arm.com> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Link: https://lore.kernel.org/r/20241008155851.801546-4-mark.rutland@arm.com Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 2b09495499c6..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef __le32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index d49aef2657cd..a2f137a595fc 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] arm64: probes: Fix uprobes for big-endian kernels" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101820-delirious-wrongful-e7f1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland(a)arm.com> Date: Tue, 8 Oct 2024 16:58:48 +0100 Subject: [PATCH] arm64: probes: Fix uprobes for big-endian kernels The arm64 uprobes code is broken for big-endian kernels as it doesn't convert the in-memory instruction encoding (which is always little-endian) into the kernel's native endianness before analyzing and simulating instructions. This may result in a few distinct problems: * The kernel may may erroneously reject probing an instruction which can safely be probed. * The kernel may erroneously erroneously permit stepping an instruction out-of-line when that instruction cannot be stepped out-of-line safely. * The kernel may erroneously simulate instruction incorrectly dur to interpretting the byte-swapped encoding. The endianness mismatch isn't caught by the compiler or sparse because: * The arch_uprobe::{insn,ixol} fields are encoded as arrays of u8, so the compiler and sparse have no idea these contain a little-endian 32-bit value. The core uprobes code populates these with a memcpy() which similarly does not handle endianness. * While the uprobe_opcode_t type is an alias for __le32, both arch_uprobe_analyze_insn() and arch_uprobe_skip_sstep() cast from u8[] to the similarly-named probe_opcode_t, which is an alias for u32. Hence there is no endianness conversion warning. Fix this by changing the arch_uprobe::{insn,ixol} fields to __le32 and adding the appropriate __le32_to_cpu() conversions prior to consuming the instruction encoding. The core uprobes copies these fields as opaque ranges of bytes, and so is unaffected by this change. At the same time, remove MAX_UINSN_BYTES and consistently use AARCH64_INSN_SIZE for clarity. Tested with the following: | #include <stdio.h> | #include <stdbool.h> | | #define noinline __attribute__((noinline)) | | static noinline void *adrp_self(void) | { | void *addr; | | asm volatile( | " adrp %x0, adrp_self\n" | " add %x0, %x0, :lo12:adrp_self\n" | : "=r" (addr)); | } | | | int main(int argc, char *argv) | { | void *ptr = adrp_self(); | bool equal = (ptr == adrp_self); | | printf("adrp_self => %p\n" | "adrp_self() => %p\n" | "%s\n", | adrp_self, ptr, equal ? "EQUAL" : "NOT EQUAL"); | | return 0; | } .... where the adrp_self() function was compiled to: | 00000000004007e0 <adrp_self>: | 4007e0: 90000000 adrp x0, 400000 <__ehdr_start> | 4007e4: 911f8000 add x0, x0, #0x7e0 | 4007e8: d65f03c0 ret Before this patch, the ADRP is not recognized, and is assumed to be steppable, resulting in corruption of the result: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0xffffffffff7e0 | NOT EQUAL After this patch, the ADRP is correctly recognized and simulated: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL Fixes: 9842ceae9fa8 ("arm64: Add uprobe support") Cc: stable(a)vger.kernel.org Signed-off-by: Mark Rutland <mark.rutland(a)arm.com> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Link: https://lore.kernel.org/r/20241008155851.801546-4-mark.rutland@arm.com Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 2b09495499c6..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef __le32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index d49aef2657cd..a2f137a595fc 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] arm64: probes: Fix uprobes for big-endian kernels" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101818-tying-implement-f714@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland(a)arm.com> Date: Tue, 8 Oct 2024 16:58:48 +0100 Subject: [PATCH] arm64: probes: Fix uprobes for big-endian kernels The arm64 uprobes code is broken for big-endian kernels as it doesn't convert the in-memory instruction encoding (which is always little-endian) into the kernel's native endianness before analyzing and simulating instructions. This may result in a few distinct problems: * The kernel may may erroneously reject probing an instruction which can safely be probed. * The kernel may erroneously erroneously permit stepping an instruction out-of-line when that instruction cannot be stepped out-of-line safely. * The kernel may erroneously simulate instruction incorrectly dur to interpretting the byte-swapped encoding. The endianness mismatch isn't caught by the compiler or sparse because: * The arch_uprobe::{insn,ixol} fields are encoded as arrays of u8, so the compiler and sparse have no idea these contain a little-endian 32-bit value. The core uprobes code populates these with a memcpy() which similarly does not handle endianness. * While the uprobe_opcode_t type is an alias for __le32, both arch_uprobe_analyze_insn() and arch_uprobe_skip_sstep() cast from u8[] to the similarly-named probe_opcode_t, which is an alias for u32. Hence there is no endianness conversion warning. Fix this by changing the arch_uprobe::{insn,ixol} fields to __le32 and adding the appropriate __le32_to_cpu() conversions prior to consuming the instruction encoding. The core uprobes copies these fields as opaque ranges of bytes, and so is unaffected by this change. At the same time, remove MAX_UINSN_BYTES and consistently use AARCH64_INSN_SIZE for clarity. Tested with the following: | #include <stdio.h> | #include <stdbool.h> | | #define noinline __attribute__((noinline)) | | static noinline void *adrp_self(void) | { | void *addr; | | asm volatile( | " adrp %x0, adrp_self\n" | " add %x0, %x0, :lo12:adrp_self\n" | : "=r" (addr)); | } | | | int main(int argc, char *argv) | { | void *ptr = adrp_self(); | bool equal = (ptr == adrp_self); | | printf("adrp_self => %p\n" | "adrp_self() => %p\n" | "%s\n", | adrp_self, ptr, equal ? "EQUAL" : "NOT EQUAL"); | | return 0; | } .... where the adrp_self() function was compiled to: | 00000000004007e0 <adrp_self>: | 4007e0: 90000000 adrp x0, 400000 <__ehdr_start> | 4007e4: 911f8000 add x0, x0, #0x7e0 | 4007e8: d65f03c0 ret Before this patch, the ADRP is not recognized, and is assumed to be steppable, resulting in corruption of the result: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0xffffffffff7e0 | NOT EQUAL After this patch, the ADRP is correctly recognized and simulated: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL Fixes: 9842ceae9fa8 ("arm64: Add uprobe support") Cc: stable(a)vger.kernel.org Signed-off-by: Mark Rutland <mark.rutland(a)arm.com> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Link: https://lore.kernel.org/r/20241008155851.801546-4-mark.rutland@arm.com Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 2b09495499c6..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef __le32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index d49aef2657cd..a2f137a595fc 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler)

11 months, 1 week

1
0
0 0

FAILED: patch "[PATCH] arm64: probes: Fix uprobes for big-endian kernels" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101815-chapped-decibel-91ed@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland(a)arm.com> Date: Tue, 8 Oct 2024 16:58:48 +0100 Subject: [PATCH] arm64: probes: Fix uprobes for big-endian kernels The arm64 uprobes code is broken for big-endian kernels as it doesn't convert the in-memory instruction encoding (which is always little-endian) into the kernel's native endianness before analyzing and simulating instructions. This may result in a few distinct problems: * The kernel may may erroneously reject probing an instruction which can safely be probed. * The kernel may erroneously erroneously permit stepping an instruction out-of-line when that instruction cannot be stepped out-of-line safely. * The kernel may erroneously simulate instruction incorrectly dur to interpretting the byte-swapped encoding. The endianness mismatch isn't caught by the compiler or sparse because: * The arch_uprobe::{insn,ixol} fields are encoded as arrays of u8, so the compiler and sparse have no idea these contain a little-endian 32-bit value. The core uprobes code populates these with a memcpy() which similarly does not handle endianness. * While the uprobe_opcode_t type is an alias for __le32, both arch_uprobe_analyze_insn() and arch_uprobe_skip_sstep() cast from u8[] to the similarly-named probe_opcode_t, which is an alias for u32. Hence there is no endianness conversion warning. Fix this by changing the arch_uprobe::{insn,ixol} fields to __le32 and adding the appropriate __le32_to_cpu() conversions prior to consuming the instruction encoding. The core uprobes copies these fields as opaque ranges of bytes, and so is unaffected by this change. At the same time, remove MAX_UINSN_BYTES and consistently use AARCH64_INSN_SIZE for clarity. Tested with the following: | #include <stdio.h> | #include <stdbool.h> | | #define noinline __attribute__((noinline)) | | static noinline void *adrp_self(void) | { | void *addr; | | asm volatile( | " adrp %x0, adrp_self\n" | " add %x0, %x0, :lo12:adrp_self\n" | : "=r" (addr)); | } | | | int main(int argc, char *argv) | { | void *ptr = adrp_self(); | bool equal = (ptr == adrp_self); | | printf("adrp_self => %p\n" | "adrp_self() => %p\n" | "%s\n", | adrp_self, ptr, equal ? "EQUAL" : "NOT EQUAL"); | | return 0; | } .... where the adrp_self() function was compiled to: | 00000000004007e0 <adrp_self>: | 4007e0: 90000000 adrp x0, 400000 <__ehdr_start> | 4007e4: 911f8000 add x0, x0, #0x7e0 | 4007e8: d65f03c0 ret Before this patch, the ADRP is not recognized, and is assumed to be steppable, resulting in corruption of the result: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0xffffffffff7e0 | NOT EQUAL After this patch, the ADRP is correctly recognized and simulated: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL Fixes: 9842ceae9fa8 ("arm64: Add uprobe support") Cc: stable(a)vger.kernel.org Signed-off-by: Mark Rutland <mark.rutland(a)arm.com> Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Will Deacon <will(a)kernel.org> Link: https://lore.kernel.org/r/20241008155851.801546-4-mark.rutland@arm.com Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 2b09495499c6..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include <asm/insn.h> #include <asm/probes.h> -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef __le32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index d49aef2657cd..a2f137a595fc 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler)

11 months, 1 week

1
0
0 0

[PATCH 6.1 00/19] Fix NULL pointer dereference for corrupted UDF filesystems

by Thadeu Lima de Souza Cascardo

UDF filesystems which have relocated blocks past the end of the device may lead to a dcache without an inode that would lead to a NULL pointer dereference, like this: [ 65.938826] repro: attempt to access beyond end of device [ 65.938826] loop0: rw=2049, sector=2052, nr_sectors = 2 limit=2048 [ 65.939476] Buffer I/O error on dev loop0, logical block 1026, lost async page write [ 65.940426] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 65.940894] #PF: supervisor read access in kernel mode [ 65.941280] #PF: error_code(0x0000) - not-present page [ 65.941552] PGD 8691067 P4D 8691067 PUD 84cb067 PMD 0 [ 65.941830] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 65.942069] CPU: 0 PID: 460 Comm: repro Not tainted 6.1.113-rc2-00792-g7e3aa874350e #618 [ 65.942490] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 65.942906] RIP: 0010:path_openat+0x3ae/0x5db [ 65.943136] Code: 89 c0 b8 eb ff ff ff 45 84 c0 0f 85 50 ff ff ff 83 3d da 24 3d 01 00 48 8b 4a 70 44 8b ad e4 00 00 00 8b 95 e0 00 00 00 75 0c <8b> 01 66 25 00 f0 66 3d 00 10 74 95 83 3d b0 24 3d 01 00 75 0c 8b [ 65.944078] RSP: 0018:ffffc900001c7d50 EFLAGS: 00010246 [ 65.944387] RAX: 00000000ffffffeb RBX: ffffc900001c7edc RCX: 0000000000000000 [ 65.945072] RDX: 0000000000000000 RSI: 0000000000000132 RDI: 0000000000000000 [ 65.945948] RBP: ffffc900001c7dc0 R08: 000000000622c100 R09: 0000000000000000 [ 65.946412] R10: ffffc900001c7b30 R11: 0000000000000002 R12: ffff888009533a00 [ 65.946833] R13: 00000000000041ed R14: 0000000000008241 R15: ffffffff82450ca0 [ 65.947257] FS: 00007c48054c4740(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000 [ 65.947702] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 65.947997] CR2: 0000000000000000 CR3: 0000000008c40000 CR4: 0000000000750ef0 [ 65.948361] PKRU: 55555554 [ 65.948503] Call Trace: [ 65.948631] <TASK> [ 65.948799] ? __die_body+0x1a/0x5d [ 65.949079] ? page_fault_oops+0x2ca/0x358 [ 65.949370] ? exc_page_fault+0x15f/0x18b [ 65.949654] ? asm_exc_page_fault+0x26/0x30 [ 65.949953] ? path_openat+0x3ae/0x5db [ 65.950228] do_filp_open+0x52/0xb3 [ 65.950480] ? lock_release+0x17a/0x25f [ 65.950759] ? _raw_spin_unlock+0x1e/0x32 [ 65.951044] do_sys_openat2+0x6d/0xe0 [ 65.951305] do_sys_open+0x39/0x57 [ 65.951479] do_syscall_64+0x71/0x88 [ 65.951660] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 65.951913] RIP: 0033:0x7c48055ecc7d [ 65.952100] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 81 0d 00 f7 d8 64 89 01 48 [ 65.953002] RSP: 002b:00007fff38c48918 EFLAGS: 00000202 ORIG_RAX: 0000000000000055 [ 65.953378] RAX: ffffffffffffffda RBX: 00007fff38c48a48 RCX: 00007c48055ecc7d [ 65.953733] RDX: 00007c48055ecc7d RSI: 0000000000000000 RDI: 0000000020000d00 [ 65.954128] RBP: 00007fff38c48930 R08: 0000000000000000 R09: 0000000000000000 [ 65.954492] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000 [ 65.955122] R13: 00007fff38c48a58 R14: 00005661cd1ccd10 R15: 00007c480572d000 [ 65.955666] </TASK> [ 65.955843] Modules linked in: [ 65.956054] CR2: 0000000000000000 [ 65.956285] ---[ end trace 0000000000000000 ]--- [ 65.956610] RIP: 0010:path_openat+0x3ae/0x5db [ 65.956886] Code: 89 c0 b8 eb ff ff ff 45 84 c0 0f 85 50 ff ff ff 83 3d da 24 3d 01 00 48 8b 4a 70 44 8b ad e4 00 00 00 8b 95 e0 00 00 00 75 0c <8b> 01 66 25 00 f0 66 3d 00 10 74 95 83 3d b0 24 3d 01 00 75 0c 8b [ 65.957973] RSP: 0018:ffffc900001c7d50 EFLAGS: 00010246 [ 65.958255] RAX: 00000000ffffffeb RBX: ffffc900001c7edc RCX: 0000000000000000 [ 65.958636] RDX: 0000000000000000 RSI: 0000000000000132 RDI: 0000000000000000 [ 65.959111] RBP: ffffc900001c7dc0 R08: 000000000622c100 R09: 0000000000000000 [ 65.959601] R10: ffffc900001c7b30 R11: 0000000000000002 R12: ffff888009533a00 [ 65.960095] R13: 00000000000041ed R14: 0000000000008241 R15: ffffffff82450ca0 [ 65.960539] FS: 00007c48054c4740(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000 [ 65.960971] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 65.961283] CR2: 0000000000000000 CR3: 0000000008c40000 CR4: 0000000000750ef0 [ 65.961664] PKRU: 55555554 [ 65.961820] Kernel panic - not syncing: Fatal exception Jan Kara (19): udf: New directory iteration code udf: Convert udf_expand_dir_adinicb() to new directory iteration udf: Move udf_expand_dir_adinicb() to its callsite udf: Implement searching for directory entry using new iteration code udf: Provide function to mark entry as deleted using new directory iteration code udf: Convert udf_rename() to new directory iteration code udf: Convert udf_readdir() to new directory iteration udf: Convert udf_lookup() to use new directory iteration code udf: Convert udf_get_parent() to new directory iteration code udf: Convert empty_dir() to new directory iteration code udf: Convert udf_rmdir() to new directory iteration code udf: Convert udf_unlink() to new directory iteration code udf: Implement adding of dir entries using new iteration code udf: Convert udf_add_nondir() to new directory iteration udf: Convert udf_mkdir() to new directory iteration code udf: Convert udf_link() to new directory iteration code udf: Remove old directory iteration code udf: Handle error when expanding directory udf: Don't return bh from udf_expand_dir_adinicb() fs/udf/dir.c | 148 ++----- fs/udf/directory.c | 564 ++++++++++++++++++------ fs/udf/inode.c | 90 ---- fs/udf/namei.c | 1049 +++++++++++++++----------------------------- fs/udf/udfdecl.h | 45 +- 5 files changed, 823 insertions(+), 1073 deletions(-) -- 2.34.1

11 months, 1 week

2
20
0 0

[PATCH v2 01/13] media: v4l2-ctrls-api: fix error handling for v4l2_g_ctrl()

by Mauro Carvalho Chehab

As detected by Coverity, the error check logic at get_ctrl() is broken: if ptr_to_user() fails to fill a control due to an error, no errors are returned and v4l2_g_ctrl() returns success on a failed operation, which may cause applications to fail. Add an error check at get_ctrl() and ensure that it will be returned to userspace without filling the control value if get_ctrl() fails. Fixes: 71c689dc2e73 ("media: v4l2-ctrls: split up into four source files") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/media/v4l2-core/v4l2-ctrls-api.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c b/drivers/media/v4l2-core/v4l2-ctrls-api.c index e5a364efd5e6..a0de7eeaf085 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-api.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c @@ -753,9 +753,10 @@ static int get_ctrl(struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c) for (i = 0; i < master->ncontrols; i++) cur_to_new(master->cluster[i]); ret = call_op(master, g_volatile_ctrl); - new_to_user(c, ctrl); + if (!ret) + ret = new_to_user(c, ctrl); } else { - cur_to_user(c, ctrl); + ret = cur_to_user(c, ctrl); } v4l2_ctrl_unlock(master); return ret; @@ -770,7 +771,10 @@ int v4l2_g_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_control *control) if (!ctrl || !ctrl->is_int) return -EINVAL; ret = get_ctrl(ctrl, &c); - control->value = c.value; + + if (!ret) + control->value = c.value; + return ret; } EXPORT_SYMBOL(v4l2_g_ctrl); @@ -811,10 +815,12 @@ static int set_ctrl_lock(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, int ret; v4l2_ctrl_lock(ctrl); - user_to_new(c, ctrl); + ret = user_to_new(c, ctrl); + if (ret) + return ret; ret = set_ctrl(fh, ctrl, 0); if (!ret) - cur_to_user(c, ctrl); + ret = cur_to_user(c, ctrl); v4l2_ctrl_unlock(ctrl); return ret; } -- 2.47.0

11 months, 1 week

2
2
0 0

[PATCH 6.10.y 0/3] : Yu Zhao's memory fix backport

by chrisl＠kernel.org

A few commits from Yu Zhao have been merged into 6.12. They need to be backported to 6.10. - c2a967f6ab0ec ("mm/hugetlb_vmemmap: don't synchronize_rcu() without HVO") - 95599ef684d01 ("mm/codetag: fix pgalloc_tag_split()") - e0a955bf7f61c ("mm/codetag: add pgalloc_tag_copy()") --- Yu Zhao (3): mm/hugetlb_vmemmap: don't synchronize_rcu() without HVO mm/codetag: fix pgalloc_tag_split() mm/codetag: add pgalloc_tag_copy() include/linux/alloc_tag.h | 24 ++++++++---------- include/linux/mm.h | 59 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pgalloc_tag.h | 31 ------------------------ mm/huge_memory.c | 2 +- mm/hugetlb_vmemmap.c | 40 +++++++++++++++--------------- mm/migrate.c | 1 + mm/page_alloc.c | 4 +-- 7 files changed, 93 insertions(+), 68 deletions(-) --- base-commit: 47c2f92131c47a37ea0e3d8e1a4e4c82a9b473d4 change-id: 20241016-stable-yuzhao-6.10-7779910482e8 Best regards, -- Chris Li <chrisl(a)kernel.org>

11 months, 1 week

2
4
0 0

[PATCH v2 13/13] media: pulse8-cec: fix data timestamp at pulse8_setup()

by Mauro Carvalho Chehab

As pointed by Coverity, there is a hidden overflow condition there. As date is signed and u8 is unsigned, doing: date = (data[0] << 24) With a value bigger than 07f will make all upper bits of date 0xffffffff. This can be demonstrated with this small code: <code> typedef int64_t time64_t; typedef uint8_t u8; int main(void) { u8 data[] = { 0xde ,0xad , 0xbe, 0xef }; time64_t date; date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; printf("Invalid data = 0x%08lx\n", date); date = ((unsigned)data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; printf("Expected data = 0x%08lx\n", date); return 0; } </code> Fix it by converting the upper bit calculation to unsigned. Fixes: cea28e7a55e7 ("media: pulse8-cec: reorganize function order") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/media/cec/usb/pulse8/pulse8-cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/cec/usb/pulse8/pulse8-cec.c b/drivers/media/cec/usb/pulse8/pulse8-cec.c index ba67587bd43e..171366fe3544 100644 --- a/drivers/media/cec/usb/pulse8/pulse8-cec.c +++ b/drivers/media/cec/usb/pulse8/pulse8-cec.c @@ -685,7 +685,7 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio, err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 4); if (err) return err; - date = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; + date = ((unsigned)data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]; dev_info(pulse8->dev, "Firmware build date %ptT\n", &date); dev_dbg(pulse8->dev, "Persistent config:\n"); -- 2.47.0

11 months, 1 week

1
0
0 0

[PATCH v2 11/13] media: stb0899_algo: initialize cfr before using it

by Mauro Carvalho Chehab

The loop at stb0899_search_carrier() starts with a random value for cfr, as reported by Coverity. Initialize it to zero, just like stb0899_dvbs_algo() to ensure that carrier search won't bail out. Fixes: 8bd135bab91f ("V4L/DVB (9375): Add STB0899 support") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/media/dvb-frontends/stb0899_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/stb0899_algo.c b/drivers/media/dvb-frontends/stb0899_algo.c index df89c33dac23..40537c4ccb0d 100644 --- a/drivers/media/dvb-frontends/stb0899_algo.c +++ b/drivers/media/dvb-frontends/stb0899_algo.c @@ -269,7 +269,7 @@ static enum stb0899_status stb0899_search_carrier(struct stb0899_state *state) short int derot_freq = 0, last_derot_freq = 0, derot_limit, next_loop = 3; int index = 0; - u8 cfr[2]; + u8 cfr[2] = {0}; u8 reg; internal->status = NOCARRIER; -- 2.47.0

11 months, 1 week

1
0
0 0

[PATCH v2 09/13] media: cx24116: prevent overflows on SNR calculus

by Mauro Carvalho Chehab

as reported by Coverity, if reading SNR registers fail, a negative number will be returned, causing an underflow when reading SNR registers. Prevent that. Fixes: 8953db793d5b ("V4L/DVB (9178): cx24116: Add module parameter to return SNR as ESNO.") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/media/dvb-frontends/cx24116.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c index 8b978a9f74a4..f5dd3a81725a 100644 --- a/drivers/media/dvb-frontends/cx24116.c +++ b/drivers/media/dvb-frontends/cx24116.c @@ -741,6 +741,7 @@ static int cx24116_read_snr_pct(struct dvb_frontend *fe, u16 *snr) { struct cx24116_state *state = fe->demodulator_priv; u8 snr_reading; + int ret; static const u32 snr_tab[] = { /* 10 x Table (rounded up) */ 0x00000, 0x0199A, 0x03333, 0x04ccD, 0x06667, 0x08000, 0x0999A, 0x0b333, 0x0cccD, 0x0e667, @@ -749,7 +750,11 @@ static int cx24116_read_snr_pct(struct dvb_frontend *fe, u16 *snr) dprintk("%s()\n", __func__); - snr_reading = cx24116_readreg(state, CX24116_REG_QUALITY0); + ret = cx24116_readreg(state, CX24116_REG_QUALITY0); + if (ret < 0) + return ret; + + snr_reading = ret; if (snr_reading >= 0xa0 /* 100% */) *snr = 0xffff; -- 2.47.0

11 months, 1 week

1
0
0 0

[PATCH v2 07/13] media: s5p-jpeg: prevent buffer overflows

by Mauro Carvalho Chehab

The current logic allows word to be less than 2. If this happens, there will be buffer overflows, as reported by smatch. Add extra checks to prevent it. While here, remove an unused word = 0 assignment. Fixes: 6c96dbbc2aa9 ("[media] s5p-jpeg: add support for 5433") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> Reviewed-by: Jacek Anaszewski <jacek.anaszewski(a)gmail.com> --- .../media/platform/samsung/s5p-jpeg/jpeg-core.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c b/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c index d2c4a0178b3c..1db4609b3557 100644 --- a/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/samsung/s5p-jpeg/jpeg-core.c @@ -775,11 +775,14 @@ static void exynos4_jpeg_parse_decode_h_tbl(struct s5p_jpeg_ctx *ctx) (unsigned long)vb2_plane_vaddr(&vb->vb2_buf, 0) + ctx->out_q.sos + 2; jpeg_buffer.curr = 0; - word = 0; - if (get_word_be(&jpeg_buffer, &word)) return; - jpeg_buffer.size = (long)word - 2; + + if (word < 2) + jpeg_buffer.size = 0; + else + jpeg_buffer.size = (long)word - 2; + jpeg_buffer.data += 2; jpeg_buffer.curr = 0; @@ -1058,6 +1061,7 @@ static int get_word_be(struct s5p_jpeg_buffer *buf, unsigned int *word) if (byte == -1) return -1; *word = (unsigned int)byte | temp; + return 0; } @@ -1145,7 +1149,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; sof = jpeg_buffer.curr; /* after 0xffc0 */ sof_len = length; @@ -1176,7 +1180,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; if (n_dqt >= S5P_JPEG_MAX_MARKER) return false; @@ -1189,7 +1193,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; - if (!length) + if (length <= 0) return false; if (n_dht >= S5P_JPEG_MAX_MARKER) return false; @@ -1214,6 +1218,7 @@ static bool s5p_jpeg_parse_hdr(struct s5p_jpeg_q_data *result, if (get_word_be(&jpeg_buffer, &word)) break; length = (long)word - 2; + /* No need to check underflows as skip() does it */ skip(&jpeg_buffer, length); break; } -- 2.47.0

11 months, 1 week

1
0
0 0

[PATCH v2 06/13] media: av7110: fix a spectre vulnerability

by Mauro Carvalho Chehab

As warned by smatch: drivers/staging/media/av7110/av7110_ca.c:270 dvb_ca_ioctl() warn: potential spectre issue 'av7110->ci_slot' [w] (local cap) There is a spectre-related vulnerability at the code. Fix it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/staging/media/av7110/av7110.h | 4 +++- drivers/staging/media/av7110/av7110_ca.c | 25 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/staging/media/av7110/av7110.h b/drivers/staging/media/av7110/av7110.h index ec461fd187af..b584754f4be0 100644 --- a/drivers/staging/media/av7110/av7110.h +++ b/drivers/staging/media/av7110/av7110.h @@ -88,6 +88,8 @@ struct infrared { u32 ir_config; }; +#define MAX_CI_SLOTS 2 + /* place to store all the necessary device information */ struct av7110 { /* devices */ @@ -163,7 +165,7 @@ struct av7110 { /* CA */ - struct ca_slot_info ci_slot[2]; + struct ca_slot_info ci_slot[MAX_CI_SLOTS]; enum av7110_video_mode vidmode; struct dmxdev dmxdev; diff --git a/drivers/staging/media/av7110/av7110_ca.c b/drivers/staging/media/av7110/av7110_ca.c index 6ce212c64e5d..fce4023c9dea 100644 --- a/drivers/staging/media/av7110/av7110_ca.c +++ b/drivers/staging/media/av7110/av7110_ca.c @@ -26,23 +26,28 @@ void CI_handle(struct av7110 *av7110, u8 *data, u16 len) { + unsigned slot_num; + dprintk(8, "av7110:%p\n", av7110); if (len < 3) return; switch (data[0]) { case CI_MSG_CI_INFO: - if (data[2] != 1 && data[2] != 2) + if (data[2] != 1 && data[2] != MAX_CI_SLOTS) break; + + slot_num = array_index_nospec(data[2] - 1, MAX_CI_SLOTS); + switch (data[1]) { case 0: - av7110->ci_slot[data[2] - 1].flags = 0; + av7110->ci_slot[slot_num].flags = 0; break; case 1: - av7110->ci_slot[data[2] - 1].flags |= CA_CI_MODULE_PRESENT; + av7110->ci_slot[slot_num].flags |= CA_CI_MODULE_PRESENT; break; case 2: - av7110->ci_slot[data[2] - 1].flags |= CA_CI_MODULE_READY; + av7110->ci_slot[slot_num].flags |= CA_CI_MODULE_READY; break; } break; @@ -262,15 +267,19 @@ static int dvb_ca_ioctl(struct file *file, unsigned int cmd, void *parg) case CA_GET_SLOT_INFO: { struct ca_slot_info *info = (struct ca_slot_info *)parg; + unsigned int slot_num; if (info->num < 0 || info->num > 1) { mutex_unlock(&av7110->ioctl_mutex); return -EINVAL; } - av7110->ci_slot[info->num].num = info->num; - av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? - CA_CI_LINK : CA_CI; - memcpy(info, &av7110->ci_slot[info->num], sizeof(struct ca_slot_info)); + slot_num = array_index_nospec(info->num, MAX_CI_SLOTS); + + av7110->ci_slot[slot_num].num = info->num; + av7110->ci_slot[slot_num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? + CA_CI_LINK : CA_CI; + memcpy(info, &av7110->ci_slot[slot_num], + sizeof(struct ca_slot_info)); break; } -- 2.47.0

11 months, 1 week

1
0
0 0

[PATCH v2 05/13] media: mgb4: protect driver against spectre

by Mauro Carvalho Chehab

Frequency range is set from sysfs via frequency_range_store(), being vulnerable to spectre, as reported by smatch: drivers/media/pci/mgb4/mgb4_cmt.c:231 mgb4_cmt_set_vin_freq_range() warn: potential spectre issue 'cmt_vals_in' [r] drivers/media/pci/mgb4/mgb4_cmt.c:238 mgb4_cmt_set_vin_freq_range() warn: possible spectre second half. 'reg_set' Fix it. Fixes: 0ab13674a9bd ("media: pci: mgb4: Added Digiteq Automotive MGB4 driver") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> Reviewed-by: Martin Tůma <martin.tuma(a)digiteqautomotive.com> --- drivers/media/pci/mgb4/mgb4_cmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/pci/mgb4/mgb4_cmt.c b/drivers/media/pci/mgb4/mgb4_cmt.c index 70dc78ef193c..a25b68403bc6 100644 --- a/drivers/media/pci/mgb4/mgb4_cmt.c +++ b/drivers/media/pci/mgb4/mgb4_cmt.c @@ -227,6 +227,8 @@ void mgb4_cmt_set_vin_freq_range(struct mgb4_vin_dev *vindev, u32 config; size_t i; + freq_range = array_index_nospec(freq_range, ARRAY_SIZE(cmt_vals_in)); + addr = cmt_addrs_in[vindev->config->id]; reg_set = cmt_vals_in[freq_range]; -- 2.47.0

11 months, 1 week

1
0
0 0

[PATCH v2 02/13] media: v4l2-tpg: prevent the risk of a division by zero

by Mauro Carvalho Chehab

As reported by Coverity, the logic at tpg_precalculate_line() blindly rescales the buffer even when scaled_witdh is equal to zero. If this ever happens, this will cause a division by zero. Instead, add a WARN_ON_ONCE() to trigger such cases and return without doing any precalculation. Fixes: 63881df94d3e ("[media] vivid: add the Test Pattern Generator") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/media/common/v4l2-tpg/v4l2-tpg-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c index c86343a4d0bf..940bfbf275ce 100644 --- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c +++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c @@ -1795,6 +1795,9 @@ static void tpg_precalculate_line(struct tpg_data *tpg) unsigned p; unsigned x; + if (WARN_ON_ONCE(!tpg->src_width || !tpg->scaled_width)) + return; + switch (tpg->pattern) { case TPG_PAT_GREEN: contrast = TPG_COLOR_100_RED; -- 2.47.0

11 months, 1 week

1
0
0 0

[PATCH 10/13] media: adv7604 prevent underflow condition when reporting colorspace

by Mauro Carvalho Chehab

Currently, adv76xx_log_status() reads some date using io_read() which may return negative values. The current logi doesn't check such errors, causing colorspace to be reported on a wrong way at adv76xx_log_status(). If I/O error happens there, print a different message, instead of reporting bogus messages to userspace. Fixes: 54450f591c99 ("[media] adv7604: driver for the Analog Devices ADV7604 video decoder") Cc: stable(a)vger.kernel.org Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org> --- drivers/media/i2c/adv7604.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index 48230d5109f0..272945a878b3 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -2519,10 +2519,10 @@ static int adv76xx_log_status(struct v4l2_subdev *sd) const struct adv76xx_chip_info *info = state->info; struct v4l2_dv_timings timings; struct stdi_readback stdi; - u8 reg_io_0x02 = io_read(sd, 0x02); + int ret; + u8 reg_io_0x02; u8 edid_enabled; u8 cable_det; - static const char * const csc_coeff_sel_rb[16] = { "bypassed", "YPbPr601 -> RGB", "reserved", "YPbPr709 -> RGB", "reserved", "RGB -> YPbPr601", "reserved", "RGB -> YPbPr709", @@ -2621,13 +2621,21 @@ static int adv76xx_log_status(struct v4l2_subdev *sd) v4l2_info(sd, "-----Color space-----\n"); v4l2_info(sd, "RGB quantization range ctrl: %s\n", rgb_quantization_range_txt[state->rgb_quantization_range]); - v4l2_info(sd, "Input color space: %s\n", - input_color_space_txt[reg_io_0x02 >> 4]); - v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n", - (reg_io_0x02 & 0x02) ? "RGB" : "YCbCr", - (((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ? - "(16-235)" : "(0-255)", - (reg_io_0x02 & 0x08) ? "enabled" : "disabled"); + + ret = io_read(sd, 0x02); + if (ret < 0) { + v4l2_info(sd, "Can't read Input/Output color space\n"); + } else { + reg_io_0x02 = ret; + + v4l2_info(sd, "Input color space: %s\n", + input_color_space_txt[reg_io_0x02 >> 4]); + v4l2_info(sd, "Output color space: %s %s, alt-gamma %s\n", + (reg_io_0x02 & 0x02) ? "RGB" : "YCbCr", + (((reg_io_0x02 >> 2) & 0x01) ^ (reg_io_0x02 & 0x01)) ? + "(16-235)" : "(0-255)", + (reg_io_0x02 & 0x08) ? "enabled" : "disabled"); + } v4l2_info(sd, "Color space conversion: %s\n", csc_coeff_sel_rb[cp_read(sd, info->cp_csc) >> 4]); -- 2.47.0

11 months, 1 week

2
4
0 0

[PATCH] LoongArch: Make KASAN usable for variable cpu_vabits

by Huacai Chen

Currently, KASAN on LoongArch assume the CPU VA bits is 48, which is true for Loongson-3 series, but not for Loongson-2 series (only 40 or lower), this patch fix that issue and make KASAN usable for variable cpu_vabits. 1. Define XRANGE_SHADOW_SHIFT which means valid address length from VA_BITS to min(cpu_vabits, VA_BITS). 2. In kasan_mem_to_shadow() let DMW addresses which exceed XRANGE_SIZE to return kasan_early_shadow_page. Cc: stable(a)vger.kernel.org Signed-off-by: Kanglong Wang <wangkanglong(a)loongson.cn> Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn> --- arch/loongarch/include/asm/kasan.h | 2 +- arch/loongarch/mm/kasan_init.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index cd6084f4e153..c6bce5fbff57 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -16,7 +16,7 @@ #define XRANGE_SHIFT (48) /* Valid address length */ -#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define XRANGE_SHADOW_SHIFT min(cpu_vabits, VA_BITS) /* Used for taking out the valid address */ #define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) /* One segment whole address space size */ diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index 427d6b1aec09..e5ecc8c12034 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -48,6 +48,10 @@ void *kasan_mem_to_shadow(const void *addr) return (void *)(kasan_early_shadow_page); maddr &= XRANGE_SHADOW_MASK; + + if (maddr >= XRANGE_SIZE) + return (void *)(kasan_early_shadow_page); + switch (xrange) { case XKPRANGE_CC_SEG: offset = XKPRANGE_CC_SHADOW_OFFSET; -- 2.43.5

11 months, 1 week

1
0
0 0

[PATCH] LoongArch: Enable IRQ if do_ale() triggered in irq-enabled context

by Huacai Chen

Unaligned access exception can be triggered in irq-enabled context such as user mode, in this case do_ale() may call get_user() which may cause sleep. Then we will get: BUG: sleeping function called from invalid context at arch/loongarch/kernel/access-helper.h:7 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 129, name: modprobe preempt_count: 0, expected: 0 RCU nest depth: 0, expected: 0 CPU: 0 UID: 0 PID: 129 Comm: modprobe Tainted: G W 6.12.0-rc1+ #1723 Tainted: [W]=WARN Stack : 9000000105e0bd48 0000000000000000 9000000003803944 9000000105e08000 9000000105e0bc70 9000000105e0bc78 0000000000000000 0000000000000000 9000000105e0bc78 0000000000000001 9000000185e0ba07 9000000105e0b890 ffffffffffffffff 9000000105e0bc78 73924b81763be05b 9000000100194500 000000000000020c 000000000000000a 0000000000000000 0000000000000003 00000000000023f0 00000000000e1401 00000000072f8000 0000007ffbb0e260 0000000000000000 0000000000000000 9000000005437650 90000000055d5000 0000000000000000 0000000000000003 0000007ffbb0e1f0 0000000000000000 0000005567b00490 0000000000000000 9000000003803964 0000007ffbb0dfec 00000000000000b0 0000000000000007 0000000000000003 0000000000071c1d ... Call Trace: [<9000000003803964>] show_stack+0x64/0x1a0 [<9000000004c57464>] dump_stack_lvl+0x74/0xb0 [<9000000003861ab4>] __might_resched+0x154/0x1a0 [<900000000380c96c>] emulate_load_store_insn+0x6c/0xf60 [<9000000004c58118>] do_ale+0x78/0x180 [<9000000003801bc8>] handle_ale+0x128/0x1e0 So enable IRQ if unaligned access exception is triggered in irq-enabled context to fix it. Cc: stable(a)vger.kernel.org Reported-by: Binbin Zhou <zhoubinbin(a)loongson.cn> Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn> --- arch/loongarch/kernel/traps.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index f9f4eb00c92e..c57b4134f3e8 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -555,6 +555,9 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) #else unsigned int *pc; + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); /* @@ -579,6 +582,8 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); #endif irqentry_exit(regs, state); } -- 2.43.5

11 months, 1 week

1
0
0 0

[PATCH] LoongArch: Get correct cores_per_package for SMT systems

by Huacai Chen

In loongson_sysconf, The "core" of cores_per_node and cores_per_package stands for a logical core, which means in a SMT system it stands for a thread indeed. This information is gotten from SMBIOS Type4 Structure, so in order to get a correct cores_per_package for both SMT and non-SMT systems in parse_cpu_table() we should use SMBIOS_THREAD_PACKAGE_OFFSET instead of SMBIOS_CORE_PACKAGE_OFFSET. Cc: stable(a)vger.kernel.org Reported-by: Chao Li <lichao(a)loongson.cn> Tested-by: Chao Li <lichao(a)loongson.cn> Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn> --- arch/loongarch/kernel/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 00e307203ddb..cbd3c09a93c1 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -55,6 +55,7 @@ #define SMBIOS_FREQHIGH_OFFSET 0x17 #define SMBIOS_FREQLOW_MASK 0xFF #define SMBIOS_CORE_PACKAGE_OFFSET 0x23 +#define SMBIOS_THREAD_PACKAGE_OFFSET 0x25 #define LOONGSON_EFI_ENABLE (1 << 3) unsigned long fw_arg0, fw_arg1, fw_arg2; @@ -125,7 +126,7 @@ static void __init parse_cpu_table(const struct dmi_header *dm) cpu_clock_freq = freq_temp * 1000000; loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]); - loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_CORE_PACKAGE_OFFSET); + loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_THREAD_PACKAGE_OFFSET); pr_info("CpuClock = %llu\n", cpu_clock_freq); } -- 2.43.5 diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 6d5846dd075c..7657e016233f 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -26,6 +26,10 @@ struct loongson_board_info { #define NR_WORDS DIV_ROUND_UP(NR_CPUS, BITS_PER_LONG) +/* + * The "core" of cores_per_node and cores_per_package stands for a + * logical core, which means in a SMT system it stands for a thread. + */ struct loongson_system_configuration { int nr_cpus; int nr_nodes;

11 months, 1 week

1
0
0 0

回复: 回复: [PATCH v1] riscv: dts: starfive: disable unused csi/camss nodes

by Changhuang Liang

Hi, Conor > On Thu, Oct 17, 2024 at 05:49:56AM +0000, Changhuang Liang wrote: > > Hi, Conor, > > > > > Hi, Conor > > > > > > Thanks for your patch. > > > > > > > From: Conor Dooley <conor.dooley(a)microchip.com> > > > > > > > > Aurelien reported probe failures due to the csi node being enabled > > > > without having a camera attached to it. A camera was in the > > > > initial submissions, but was removed from the dts, as it had not > > > > actually been present on the board, but was from an addon board > > > > used by the developer > > > of the relevant drivers. > > > > The non-camera pipeline nodes were not disabled when this happened > > > > and the probe failures are problematic for Debian. Disable them. > > > > > > > > CC: stable(a)vger.kernel.org > > > > Fixes: 28ecaaa5af192 ("riscv: dts: starfive: jh7110: Add camera > > > > subsystem > > > > nodes") > > > > > > Here you write it in 13 characters, should be "Fixes: 28ecaaa5af19 ..." > > > > > > > After fixing this: > > Reviewed-by: Changhuang Liang <changhuang.liang(a)starfivetech.com> > > Ye, I know it was 13 not 12. I don't think that's a problem though. Okay, that's fine. Best Regards, Changhuang

11 months, 1 week

1
0
0 0

+ resourcekexec-walk_system_ram_res_rev-must-retain-resource-flags.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: resource,kexec: walk_system_ram_res_rev must retain resource flags has been added to the -mm mm-hotfixes-unstable branch. Its filename is resourcekexec-walk_system_ram_res_rev-must-retain-resource-flags.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Gregory Price <gourry(a)gourry.net> Subject: resource,kexec: walk_system_ram_res_rev must retain resource flags Date: Thu, 17 Oct 2024 15:03:47 -0400 walk_system_ram_res_rev() erroneously discards resource flags when passing the information to the callback. This causes systems with IORESOURCE_SYSRAM_DRIVER_MANAGED memory to have these resources selected during kexec to store kexec buffers if that memory happens to be at placed above normal system ram. This leads to undefined behavior after reboot. If the kexec buffer is never touched, nothing happens. If the kexec buffer is touched, it could lead to a crash (like below) or undefined behavior. Tested on a system with CXL memory expanders with driver managed memory, TPM enabled, and CONFIG_IMA_KEXEC=y. Adding printk's showed the flags were being discarded and as a result the check for IORESOURCE_SYSRAM_DRIVER_MANAGED passes. find_next_iomem_res: name(System RAM (kmem)) start(10000000000) end(1034fffffff) flags(83000200) locate_mem_hole_top_down: start(10000000000) end(1034fffffff) flags(0) [.] BUG: unable to handle page fault for address: ffff89834ffff000 [.] #PF: supervisor read access in kernel mode [.] #PF: error_code(0x0000) - not-present page [.] PGD c04c8bf067 P4D c04c8bf067 PUD c04c8be067 PMD 0 [.] Oops: 0000 [#1] SMP [.] RIP: 0010:ima_restore_measurement_list+0x95/0x4b0 [.] RSP: 0018:ffffc900000d3a80 EFLAGS: 00010286 [.] RAX: 0000000000001000 RBX: 0000000000000000 RCX: ffff89834ffff000 [.] RDX: 0000000000000018 RSI: ffff89834ffff000 RDI: ffff89834ffff018 [.] RBP: ffffc900000d3ba0 R08: 0000000000000020 R09: ffff888132b8a900 [.] R10: 4000000000000000 R11: 000000003a616d69 R12: 0000000000000000 [.] R13: ffffffff8404ac28 R14: 0000000000000000 R15: ffff89834ffff000 [.] FS: 0000000000000000(0000) GS:ffff893d44640000(0000) knlGS:0000000000000000 [.] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [.] ata5: SATA link down (SStatus 0 SControl 300) [.] CR2: ffff89834ffff000 CR3: 000001034d00f001 CR4: 0000000000770ef0 [.] PKRU: 55555554 [.] Call Trace: [.] <TASK> [.] ? __die+0x78/0xc0 [.] ? page_fault_oops+0x2a8/0x3a0 [.] ? exc_page_fault+0x84/0x130 [.] ? asm_exc_page_fault+0x22/0x30 [.] ? ima_restore_measurement_list+0x95/0x4b0 [.] ? template_desc_init_fields+0x317/0x410 [.] ? crypto_alloc_tfm_node+0x9c/0xc0 [.] ? init_ima_lsm+0x30/0x30 [.] ima_load_kexec_buffer+0x72/0xa0 [.] ima_init+0x44/0xa0 [.] __initstub__kmod_ima__373_1201_init_ima7+0x1e/0xb0 [.] ? init_ima_lsm+0x30/0x30 [.] do_one_initcall+0xad/0x200 [.] ? idr_alloc_cyclic+0xaa/0x110 [.] ? new_slab+0x12c/0x420 [.] ? new_slab+0x12c/0x420 [.] ? number+0x12a/0x430 [.] ? sysvec_apic_timer_interrupt+0xa/0x80 [.] ? asm_sysvec_apic_timer_interrupt+0x16/0x20 [.] ? parse_args+0xd4/0x380 [.] ? parse_args+0x14b/0x380 [.] kernel_init_freeable+0x1c1/0x2b0 [.] ? rest_init+0xb0/0xb0 [.] kernel_init+0x16/0x1a0 [.] ret_from_fork+0x2f/0x40 [.] ? rest_init+0xb0/0xb0 [.] ret_from_fork_asm+0x11/0x20 [.] </TASK> Link: https://lore.kernel.org/all/20231114091658.228030-1-bhe@redhat.com/ Link: https://lkml.kernel.org/r/20241017190347.5578-1-gourry@gourry.net Fixes: 7acf164b259d ("resource: add walk_system_ram_res_rev()") Signed-off-by: Gregory Price <gourry(a)gourry.net> Cc: AKASHI Takahiro <takahiro.akashi(a)linaro.org> Cc: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com> Cc: Baoquan he <bhe(a)redhat.com> Cc: Bjorn Helgaas <bhelgaas(a)google.com> Cc: "Huang, Ying" <ying.huang(a)intel.com> Cc: Ilpo J��rvinen <ilpo.jarvinen(a)linux.intel.com> Cc: Mika Westerberg <mika.westerberg(a)linux.intel.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/resource.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) --- a/kernel/resource.c~resourcekexec-walk_system_ram_res_rev-must-retain-resource-flags +++ a/kernel/resource.c @@ -459,9 +459,7 @@ int walk_system_ram_res_rev(u64 start, u rams_size += 16; } - rams[i].start = res.start; - rams[i++].end = res.end; - + rams[i++] = res; start = res.end + 1; } _ Patches currently in -mm which might be from gourry(a)gourry.net are resourcekexec-walk_system_ram_res_rev-must-retain-resource-flags.patch

11 months, 1 week

1
0
0 0

+ nilfs2-fix-kernel-bug-due-to-missing-clearing-of-checked-flag.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: nilfs2: fix kernel bug due to missing clearing of checked flag has been added to the -mm mm-hotfixes-unstable branch. Its filename is nilfs2-fix-kernel-bug-due-to-missing-clearing-of-checked-flag.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Subject: nilfs2: fix kernel bug due to missing clearing of checked flag Date: Fri, 18 Oct 2024 04:33:10 +0900 Syzbot reported that in directory operations after nilfs2 detects filesystem corruption and degrades to read-only, __block_write_begin_int(), which is called to prepare block writes, may fail the BUG_ON check for accesses exceeding the folio/page size, triggering a kernel bug. This was found to be because the "checked" flag of a page/folio was not cleared when it was discarded by nilfs2's own routine, which causes the sanity check of directory entries to be skipped when the directory page/folio is reloaded. So, fix that. This was necessary when the use of nilfs2's own page discard routine was applied to more than just metadata files. Link: https://lkml.kernel.org/r/20241017193359.5051-1-konishi.ryusuke@gmail.com Fixes: 8c26c4e2694a ("nilfs2: fix issue with flush kernel thread after remount in RO mode because of driver's internal error or metadata corruption") Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com> Reported-by: syzbot+d6ca2daf692c7a82f959(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d6ca2daf692c7a82f959 Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/nilfs2/page.c | 1 + 1 file changed, 1 insertion(+) --- a/fs/nilfs2/page.c~nilfs2-fix-kernel-bug-due-to-missing-clearing-of-checked-flag +++ a/fs/nilfs2/page.c @@ -401,6 +401,7 @@ void nilfs_clear_folio_dirty(struct foli folio_clear_uptodate(folio); folio_clear_mappedtodisk(folio); + folio_clear_checked(folio); head = folio_buffers(folio); if (head) { _ Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are nilfs2-fix-kernel-bug-due-to-missing-clearing-of-buffer-delay-flag.patch nilfs2-fix-kernel-bug-due-to-missing-clearing-of-checked-flag.patch

11 months, 1 week

1
0
0 0

[PATCH 5.15 00/20] Fix NULL pointer dereference for corrupted UDF filesystems

by Thadeu Lima de Souza Cascardo

UDF filesystems which have relocated blocks past the end of the device may lead to a dcache without an inode that would lead to a NULL pointer dereference, like this: [ 20.554242] attempt to access beyond end of device [ 20.554242] loop0: rw=2049, want=2054, limit=2048 [ 20.557322] Buffer I/O error on dev loop0, logical block 1026, lost async page write [ 20.562948] ================================================================== [ 20.565002] BUG: KASAN: null-ptr-deref in path_openat+0x6ae/0x9f9 [ 20.566460] Read of size 2 at addr 0000000000000000 by task repro/415 [ 20.567768] [ 20.568112] CPU: 0 PID: 415 Comm: repro Not tainted 5.15.168-rc1-00692-g63cec7aeaef7 #5 [ 20.569739] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 20.571549] Call Trace: [ 20.571965] <TASK> [ 20.572338] dump_stack_lvl+0x45/0x5d [ 20.572991] ? path_openat+0x6ae/0x9f9 [ 20.573742] kasan_report+0x1b7/0x1d8 [ 20.574559] ? path_openat+0x6ae/0x9f9 [ 20.575241] path_openat+0x6ae/0x9f9 [ 20.575915] ? may_open+0x135/0x135 [ 20.576839] ? lockdep_hardirqs_on_prepare+0x1f1/0x1f1 [ 20.577953] ? kvm_sched_clock_read+0x5/0x11 [ 20.579140] ? sched_clock_cpu+0x1a/0x106 [ 20.580687] do_filp_open+0xab/0x12e [ 20.582278] ? path_openat+0x9f9/0x9f9 [ 20.583503] ? kvm_sched_clock_read+0x5/0x11 [ 20.584925] ? lock_downgrade+0x324/0x324 [ 20.586144] ? lock_acquired+0x2d1/0x333 [ 20.587385] ? __check_heap_object+0x5d/0xe0 [ 20.588436] ? do_raw_spin_unlock+0xca/0xd6 [ 20.589853] ? _raw_spin_unlock+0x1a/0x2e [ 20.590697] ? alloc_fd+0x218/0x22e [ 20.591460] do_sys_openat2+0xbd/0x15c [ 20.592241] ? file_open_root+0xee/0xee [ 20.593034] ? lock_downgrade+0x324/0x324 [ 20.593839] do_sys_open+0x7b/0xac [ 20.594532] ? filp_open+0x43/0x43 [ 20.595138] ? lockdep_hardirqs_on_prepare+0x1ce/0x1f1 [ 20.596062] ? __x64_sys_creat+0x1b/0x33 [ 20.596796] do_syscall_64+0x6d/0x84 [ 20.597485] entry_SYSCALL_64_after_hwframe+0x6c/0xd6 [ 20.598359] RIP: 0033:0x79f47fd46c7d [ 20.599067] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 81 0d 00 f7 d8 64 89 01 48 [ 20.603403] RSP: 002b:00007fffca44e7f8 EFLAGS: 00000202 ORIG_RAX: 0000000000000055 [ 20.605712] RAX: ffffffffffffffda RBX: 00007fffca44e928 RCX: 000079f47fd46c7d [ 20.607476] RDX: 000079f47fd46c7d RSI: 0000000000000000 RDI: 0000000020000d00 [ 20.609956] RBP: 00007fffca44e810 R08: 0000000000000000 R09: 0000000000000000 [ 20.612074] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000 [ 20.613364] R13: 00007fffca44e938 R14: 00005c3ad8078d10 R15: 000079f47fe87000 [ 20.615618] </TASK> [ 20.616752] ================================================================== Jan Kara (19): udf: New directory iteration code udf: Convert udf_expand_dir_adinicb() to new directory iteration udf: Move udf_expand_dir_adinicb() to its callsite udf: Implement searching for directory entry using new iteration code udf: Provide function to mark entry as deleted using new directory iteration code udf: Convert udf_rename() to new directory iteration code udf: Convert udf_readdir() to new directory iteration udf: Convert udf_lookup() to use new directory iteration code udf: Convert udf_get_parent() to new directory iteration code udf: Convert empty_dir() to new directory iteration code udf: Convert udf_rmdir() to new directory iteration code udf: Convert udf_unlink() to new directory iteration code udf: Implement adding of dir entries using new iteration code udf: Convert udf_add_nondir() to new directory iteration udf: Convert udf_mkdir() to new directory iteration code udf: Convert udf_link() to new directory iteration code udf: Remove old directory iteration code udf: Handle error when expanding directory udf: Don't return bh from udf_expand_dir_adinicb() fs/udf/dir.c | 148 ++----- fs/udf/directory.c | 564 ++++++++++++++++++------ fs/udf/inode.c | 90 ---- fs/udf/namei.c | 1052 +++++++++++++++----------------------------- fs/udf/udfdecl.h | 45 +- 5 files changed, 825 insertions(+), 1074 deletions(-) -- 2.34.1

11 months, 1 week

1
20
0 0

[PATCH v2] comedi: Flush partial mappings in error case

by Jann Horn

If some remap_pfn_range() calls succeeded before one failed, we still have buffer pages mapped into the userspace page tables when we drop the buffer reference with comedi_buf_map_put(bm). The userspace mappings are only cleaned up later in the mmap error path. Fix it by explicitly flushing all mappings in our VMA on the error path. See commit 79a61cc3fc04 ("mm: avoid leaving partial pfn mappings around in error case"). Cc: stable(a)vger.kernel.org Fixes: ed9eccbe8970 ("Staging: add comedi core") Signed-off-by: Jann Horn <jannh(a)google.com> --- Note: compile-tested only; I don't actually have comedi hardware, and I don't know anything about comedi. --- Changes in v2: - only do the zapping in the pfnmap path (Ian Abbott) - use zap_vma_ptes() instead of zap_page_range_single() (Ian Abbott) - Link to v1: https://lore.kernel.org/r/20241014-comedi-tlb-v1-1-4b699144b438@google.com --- drivers/comedi/comedi_fops.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c index 1b481731df96..68e5301e6281 100644 --- a/drivers/comedi/comedi_fops.c +++ b/drivers/comedi/comedi_fops.c @@ -2407,6 +2407,16 @@ static int comedi_mmap(struct file *file, struct vm_area_struct *vma) start += PAGE_SIZE; } + + /* + * Leaving behind a partial mapping of a buffer we're about to + * drop is unsafe, see remap_pfn_range_notrack(). + * We need to zap the range here ourselves instead of relying + * on the automatic zapping in remap_pfn_range() because we call + * remap_pfn_range() in a loop. + */ + if (retval) + zap_vma_ptes(vma, vma->vm_start, size); } if (retval == 0) { --- base-commit: 6485cf5ea253d40d507cd71253c9568c5470cd27 change-id: 20241014-comedi-tlb-400246505961 -- Jann Horn <jannh(a)google.com>

11 months, 1 week

3
8
0 0

FAILED: patch "[PATCH] mptcp: pm: fix UaF read in mptcp_pm_nl_rm_addr_or_subflow" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 7decd1f5904a489d3ccdcf131972f94645681689 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024101748-uneasy-framing-a214@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7decd1f5904a489d3ccdcf131972f94645681689 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org> Date: Tue, 15 Oct 2024 10:38:47 +0200 Subject: [PATCH] mptcp: pm: fix UaF read in mptcp_pm_nl_rm_addr_or_subflow Syzkaller reported this splat: ================================================================== BUG: KASAN: slab-use-after-free in mptcp_pm_nl_rm_addr_or_subflow+0xb44/0xcc0 net/mptcp/pm_netlink.c:881 Read of size 4 at addr ffff8880569ac858 by task syz.1.2799/14662 CPU: 0 UID: 0 PID: 14662 Comm: syz.1.2799 Not tainted 6.12.0-rc2-syzkaller-00307-g36c254515dc6 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: <TASK> __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc3/0x620 mm/kasan/report.c:488 kasan_report+0xd9/0x110 mm/kasan/report.c:601 mptcp_pm_nl_rm_addr_or_subflow+0xb44/0xcc0 net/mptcp/pm_netlink.c:881 mptcp_pm_nl_rm_subflow_received net/mptcp/pm_netlink.c:914 [inline] mptcp_nl_remove_id_zero_address+0x305/0x4a0 net/mptcp/pm_netlink.c:1572 mptcp_pm_nl_del_addr_doit+0x5c9/0x770 net/mptcp/pm_netlink.c:1603 genl_family_rcv_msg_doit+0x202/0x2f0 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x565/0x800 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x165/0x410 net/netlink/af_netlink.c:2551 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1331 [inline] netlink_unicast+0x53c/0x7f0 net/netlink/af_netlink.c:1357 netlink_sendmsg+0x8b8/0xd70 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg net/socket.c:744 [inline] ____sys_sendmsg+0x9ae/0xb40 net/socket.c:2607 ___sys_sendmsg+0x135/0x1e0 net/socket.c:2661 __sys_sendmsg+0x117/0x1f0 net/socket.c:2690 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 entry_SYSENTER_compat_after_hwframe+0x84/0x8e RIP: 0023:0xf7fe4579 Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d b4 26 00 00 00 00 8d b4 26 00 00 00 00 RSP: 002b:00000000f574556c EFLAGS: 00000296 ORIG_RAX: 0000000000000172 RAX: ffffffffffffffda RBX: 000000000000000b RCX: 0000000020000140 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000296 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 </TASK> Allocated by task 5387: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:394 kmalloc_noprof include/linux/slab.h:878 [inline] kzalloc_noprof include/linux/slab.h:1014 [inline] subflow_create_ctx+0x87/0x2a0 net/mptcp/subflow.c:1803 subflow_ulp_init+0xc3/0x4d0 net/mptcp/subflow.c:1956 __tcp_set_ulp net/ipv4/tcp_ulp.c:146 [inline] tcp_set_ulp+0x326/0x7f0 net/ipv4/tcp_ulp.c:167 mptcp_subflow_create_socket+0x4ae/0x10a0 net/mptcp/subflow.c:1764 __mptcp_subflow_connect+0x3cc/0x1490 net/mptcp/subflow.c:1592 mptcp_pm_create_subflow_or_signal_addr+0xbda/0x23a0 net/mptcp/pm_netlink.c:642 mptcp_pm_nl_fully_established net/mptcp/pm_netlink.c:650 [inline] mptcp_pm_nl_work+0x3a1/0x4f0 net/mptcp/pm_netlink.c:943 mptcp_worker+0x15a/0x1240 net/mptcp/protocol.c:2777 process_one_work+0x958/0x1b30 kernel/workqueue.c:3229 process_scheduled_works kernel/workqueue.c:3310 [inline] worker_thread+0x6c8/0xf00 kernel/workqueue.c:3391 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Freed by task 113: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:579 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x51/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:230 [inline] slab_free_hook mm/slub.c:2342 [inline] slab_free mm/slub.c:4579 [inline] kfree+0x14f/0x4b0 mm/slub.c:4727 kvfree+0x47/0x50 mm/util.c:701 kvfree_rcu_list+0xf5/0x2c0 kernel/rcu/tree.c:3423 kvfree_rcu_drain_ready kernel/rcu/tree.c:3563 [inline] kfree_rcu_monitor+0x503/0x8b0 kernel/rcu/tree.c:3632 kfree_rcu_shrink_scan+0x245/0x3a0 kernel/rcu/tree.c:3966 do_shrink_slab+0x44f/0x11c0 mm/shrinker.c:435 shrink_slab+0x32b/0x12a0 mm/shrinker.c:662 shrink_one+0x47e/0x7b0 mm/vmscan.c:4818 shrink_many mm/vmscan.c:4879 [inline] lru_gen_shrink_node mm/vmscan.c:4957 [inline] shrink_node+0x2452/0x39d0 mm/vmscan.c:5937 kswapd_shrink_node mm/vmscan.c:6765 [inline] balance_pgdat+0xc19/0x18f0 mm/vmscan.c:6957 kswapd+0x5ea/0xbf0 mm/vmscan.c:7226 kthread+0x2c1/0x3a0 kernel/kthread.c:389 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Last potentially related work creation: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xba/0xd0 mm/kasan/generic.c:541 kvfree_call_rcu+0x74/0xbe0 kernel/rcu/tree.c:3810 subflow_ulp_release+0x2ae/0x350 net/mptcp/subflow.c:2009 tcp_cleanup_ulp+0x7c/0x130 net/ipv4/tcp_ulp.c:124 tcp_v4_destroy_sock+0x1c5/0x6a0 net/ipv4/tcp_ipv4.c:2541 inet_csk_destroy_sock+0x1a3/0x440 net/ipv4/inet_connection_sock.c:1293 tcp_done+0x252/0x350 net/ipv4/tcp.c:4870 tcp_rcv_state_process+0x379b/0x4f30 net/ipv4/tcp_input.c:6933 tcp_v4_do_rcv+0x1ad/0xa90 net/ipv4/tcp_ipv4.c:1938 sk_backlog_rcv include/net/sock.h:1115 [inline] __release_sock+0x31b/0x400 net/core/sock.c:3072 __tcp_close+0x4f3/0xff0 net/ipv4/tcp.c:3142 __mptcp_close_ssk+0x331/0x14d0 net/mptcp/protocol.c:2489 mptcp_close_ssk net/mptcp/protocol.c:2543 [inline] mptcp_close_ssk+0x150/0x220 net/mptcp/protocol.c:2526 mptcp_pm_nl_rm_addr_or_subflow+0x2be/0xcc0 net/mptcp/pm_netlink.c:878 mptcp_pm_nl_rm_subflow_received net/mptcp/pm_netlink.c:914 [inline] mptcp_nl_remove_id_zero_address+0x305/0x4a0 net/mptcp/pm_netlink.c:1572 mptcp_pm_nl_del_addr_doit+0x5c9/0x770 net/mptcp/pm_netlink.c:1603 genl_family_rcv_msg_doit+0x202/0x2f0 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x565/0x800 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x165/0x410 net/netlink/af_netlink.c:2551 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1331 [inline] netlink_unicast+0x53c/0x7f0 net/netlink/af_netlink.c:1357 netlink_sendmsg+0x8b8/0xd70 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg net/socket.c:744 [inline] ____sys_sendmsg+0x9ae/0xb40 net/socket.c:2607 ___sys_sendmsg+0x135/0x1e0 net/socket.c:2661 __sys_sendmsg+0x117/0x1f0 net/socket.c:2690 do_syscall_32_irqs_on arch/x86/entry/common.c:165 [inline] __do_fast_syscall_32+0x73/0x120 arch/x86/entry/common.c:386 do_fast_syscall_32+0x32/0x80 arch/x86/entry/common.c:411 entry_SYSENTER_compat_after_hwframe+0x84/0x8e The buggy address belongs to the object at ffff8880569ac800 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 88 bytes inside of freed 512-byte region [ffff8880569ac800, ffff8880569aca00) The buggy address belongs to the physical page: page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x569ac head: order:2 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x4fff00000000040(head|node=1|zone=1|lastcpupid=0x7ff) page_type: f5(slab) raw: 04fff00000000040 ffff88801ac42c80 dead000000000100 dead000000000122 raw: 0000000000000000 0000000080100010 00000001f5000000 0000000000000000 head: 04fff00000000040 ffff88801ac42c80 dead000000000100 dead000000000122 head: 0000000000000000 0000000080100010 00000001f5000000 0000000000000000 head: 04fff00000000002 ffffea00015a6b01 ffffffffffffffff 0000000000000000 head: 0000000000000004 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 10238, tgid 10238 (kworker/u32:6), ts 597403252405, free_ts 597177952947 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x2d1/0x350 mm/page_alloc.c:1537 prep_new_page mm/page_alloc.c:1545 [inline] get_page_from_freelist+0x101e/0x3070 mm/page_alloc.c:3457 __alloc_pages_noprof+0x223/0x25a0 mm/page_alloc.c:4733 alloc_pages_mpol_noprof+0x2c9/0x610 mm/mempolicy.c:2265 alloc_slab_page mm/slub.c:2412 [inline] allocate_slab mm/slub.c:2578 [inline] new_slab+0x2ba/0x3f0 mm/slub.c:2631 ___slab_alloc+0xd1d/0x16f0 mm/slub.c:3818 __slab_alloc.constprop.0+0x56/0xb0 mm/slub.c:3908 __slab_alloc_node mm/slub.c:3961 [inline] slab_alloc_node mm/slub.c:4122 [inline] __kmalloc_cache_noprof+0x2c5/0x310 mm/slub.c:4290 kmalloc_noprof include/linux/slab.h:878 [inline] kzalloc_noprof include/linux/slab.h:1014 [inline] mld_add_delrec net/ipv6/mcast.c:743 [inline] igmp6_leave_group net/ipv6/mcast.c:2625 [inline] igmp6_group_dropped+0x4ab/0xe40 net/ipv6/mcast.c:723 __ipv6_dev_mc_dec+0x281/0x360 net/ipv6/mcast.c:979 addrconf_leave_solict net/ipv6/addrconf.c:2253 [inline] __ipv6_ifa_notify+0x3f6/0xc30 net/ipv6/addrconf.c:6283 addrconf_ifdown.isra.0+0xef9/0x1a20 net/ipv6/addrconf.c:3982 addrconf_notify+0x220/0x19c0 net/ipv6/addrconf.c:3781 notifier_call_chain+0xb9/0x410 kernel/notifier.c:93 call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:1996 call_netdevice_notifiers_extack net/core/dev.c:2034 [inline] call_netdevice_notifiers net/core/dev.c:2048 [inline] dev_close_many+0x333/0x6a0 net/core/dev.c:1589 page last free pid 13136 tgid 13136 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1108 [inline] free_unref_page+0x5f4/0xdc0 mm/page_alloc.c:2638 stack_depot_save_flags+0x2da/0x900 lib/stackdepot.c:666 kasan_save_stack+0x42/0x60 mm/kasan/common.c:48 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 unpoison_slab_object mm/kasan/common.c:319 [inline] __kasan_slab_alloc+0x89/0x90 mm/kasan/common.c:345 kasan_slab_alloc include/linux/kasan.h:247 [inline] slab_post_alloc_hook mm/slub.c:4085 [inline] slab_alloc_node mm/slub.c:4134 [inline] kmem_cache_alloc_noprof+0x121/0x2f0 mm/slub.c:4141 skb_clone+0x190/0x3f0 net/core/skbuff.c:2084 do_one_broadcast net/netlink/af_netlink.c:1462 [inline] netlink_broadcast_filtered+0xb11/0xef0 net/netlink/af_netlink.c:1540 netlink_broadcast+0x39/0x50 net/netlink/af_netlink.c:1564 uevent_net_broadcast_untagged lib/kobject_uevent.c:331 [inline] kobject_uevent_net_broadcast lib/kobject_uevent.c:410 [inline] kobject_uevent_env+0xacd/0x1670 lib/kobject_uevent.c:608 device_del+0x623/0x9f0 drivers/base/core.c:3882 snd_card_disconnect.part.0+0x58a/0x7c0 sound/core/init.c:546 snd_card_disconnect+0x1f/0x30 sound/core/init.c:495 snd_usx2y_disconnect+0xe9/0x1f0 sound/usb/usx2y/usbusx2y.c:417 usb_unbind_interface+0x1e8/0x970 drivers/usb/core/driver.c:461 device_remove drivers/base/dd.c:569 [inline] device_remove+0x122/0x170 drivers/base/dd.c:561 That's because 'subflow' is used just after 'mptcp_close_ssk(subflow)', which will initiate the release of its memory. Even if it is very likely the release and the re-utilisation will be done later on, it is of course better to avoid any issues and read the content of 'subflow' before closing it. Fixes: 1c1f72137598 ("mptcp: pm: only decrement add_addr_accepted for MPJ req") Cc: stable(a)vger.kernel.org Reported-by: syzbot+3c8b7a8e7df6a2a226ca(a)syzkaller.appspotmail.com Closes: https://lore.kernel.org/670d7337.050a0220.4cbc0.004f.GAE@google.com Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org> Acked-by: Paolo Abeni <pabeni(a)redhat.com> Link: https://patch.msgid.link/20241015-net-mptcp-uaf-pm-rm-v1-1-c4ee5d987a64@ker… Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 1a78998fe1f4..db586a5b3866 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -873,12 +873,12 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); + removed |= subflow->request_join; /* the following takes care of updating the subflows counter */ mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); - removed |= subflow->request_join; if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); }

11 months, 1 week

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror