- Linux-stable-mirror - lists.linaro.org

[PATCH v2] usb:xhci:Fix slot_id resource race conflict

by Weitao Wang

In such a scenario, device-A with slot_id equal to 1 is disconnecting while device-B is enumerating, device-B will fail to enumerate in the follow sequence. 1.[device-A] send disable slot command 2.[device-B] send enable slot command 3.[device-A] disable slot command completed and wakeup waiting thread 4.[device-B] enable slot command completed with slot_id equal to 1 and wakeup waiting thread 5.[device-B] driver check this slot_id was used by someone(device-A) in xhci_alloc_virt_device, this device fails to enumerate as this conflict 6.[device-A] xhci->devs[slot_id] set to NULL in xhci_free_virt_device To fix driver's slot_id resources conflict, let the xhci_free_virt_device functionm call in the interrupt handler when disable slot command success. Cc: stable(a)vger.kernel.org Fixes: 7faac1953ed1 ("xhci: avoid race between disable slot command and host runtime suspend") Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com> --- v1->v2 - Adjust the lock position in the function xhci_free_dev. drivers/usb/host/xhci-hub.c | 5 +++-- drivers/usb/host/xhci-ring.c | 7 +++++-- drivers/usb/host/xhci.c | 35 +++++++++++++++++++++++++---------- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 92bb84f8132a..fd8a64aa5779 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -705,10 +705,11 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci, continue; retval = xhci_disable_slot(xhci, i); - xhci_free_virt_device(xhci, i); - if (retval) + if (retval) { xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n", i, retval); + xhci_free_virt_device(xhci, i); + } } spin_lock_irqsave(&xhci->lock, *flags); /* Put all ports to the Disable state by clear PP */ diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 94c9c9271658..93dc28399c3c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1589,7 +1589,8 @@ static void xhci_handle_cmd_enable_slot(int slot_id, struct xhci_command *comman command->slot_id = 0; } -static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) +static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id, + u32 cmd_comp_code) { struct xhci_virt_device *virt_dev; struct xhci_slot_ctx *slot_ctx; @@ -1604,6 +1605,8 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); + if (cmd_comp_code == COMP_SUCCESS) + xhci_free_virt_device(xhci, slot_id); } static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id) @@ -1853,7 +1856,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, xhci_handle_cmd_enable_slot(slot_id, cmd, cmd_comp_code); break; case TRB_DISABLE_SLOT: - xhci_handle_cmd_disable_slot(xhci, slot_id); + xhci_handle_cmd_disable_slot(xhci, slot_id, cmd_comp_code); break; case TRB_CONFIG_EP: if (!cmd->completion) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8a819e853288..6c6f6ebb8953 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3931,13 +3931,14 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd, * the USB device has been reset. */ ret = xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); if (!ret) { ret = xhci_alloc_dev(hcd, udev); if (ret == 1) ret = 0; else ret = -EINVAL; + } else { + xhci_free_virt_device(xhci, udev->slot_id); } return ret; } @@ -4085,11 +4086,12 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) for (i = 0; i < 31; i++) virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING; virt_dev->udev = NULL; - xhci_disable_slot(xhci, udev->slot_id); - - spin_lock_irqsave(&xhci->lock, flags); - xhci_free_virt_device(xhci, udev->slot_id); - spin_unlock_irqrestore(&xhci->lock, flags); + ret = xhci_disable_slot(xhci, udev->slot_id); + if (ret) { + spin_lock_irqsave(&xhci->lock, flags); + xhci_free_virt_device(xhci, udev->slot_id); + spin_unlock_irqrestore(&xhci->lock, flags); + } } @@ -4128,9 +4130,20 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) wait_for_completion(command->completion); - if (command->status != COMP_SUCCESS) + if (command->status != COMP_SUCCESS) { xhci_warn(xhci, "Unsuccessful disable slot %u command, status %d\n", slot_id, command->status); + switch (command->status) { + case COMP_COMMAND_ABORTED: + case COMP_COMMAND_RING_STOPPED: + xhci_warn(xhci, "Timeout while waiting for disable slot command\n"); + ret = -ETIME; + break; + default: + ret = -EINVAL; + break; + } + } xhci_free_command(xhci, command); @@ -4243,8 +4256,9 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) return 1; disable_slot: - xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); + ret = xhci_disable_slot(xhci, udev->slot_id); + if (ret) + xhci_free_virt_device(xhci, udev->slot_id); return 0; } @@ -4381,10 +4395,11 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); - xhci_free_virt_device(xhci, udev->slot_id); if (!ret) { if (xhci_alloc_dev(hcd, udev) == 1) xhci_setup_addressable_virt_dev(xhci, udev); + } else { + xhci_free_virt_device(xhci, udev->slot_id); } kfree(command->completion); kfree(command); -- 2.32.0

1 month

3
4
0 0

FAILED: patch "[PATCH] i2c: stm32f7: unmap DMA mapped buffer" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 6aae87fe7f180cd93a74466cdb6cf2aa9bb28798 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072104-bacteria-resend-dcff@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6aae87fe7f180cd93a74466cdb6cf2aa9bb28798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Le=20Goffic?= <clement.legoffic(a)foss.st.com> Date: Fri, 4 Jul 2025 10:39:15 +0200 Subject: [PATCH] i2c: stm32f7: unmap DMA mapped buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before each I2C transfer using DMA, the I2C buffer is DMA'pped to make sure the memory buffer is DMA'able. This is handle in the function `stm32_i2c_prep_dma_xfer()`. If the transfer fails for any reason the I2C buffer must be unmap. Use the dma_callback to factorize the code and fix this issue. Note that the `stm32f7_i2c_dma_callback()` is now called in case of DMA transfer success and error and that the `complete()` on the dma_complete completion structure is done inconditionnally in case of transfer success or error as well as the `dmaengine_terminate_async()`. This is allowed as a `complete()` in case transfer error has no effect as well as a `dmaengine_terminate_async()` on a transfer success. Also fix the unneeded cast and remove not more needed variables. Fixes: 7ecc8cfde553 ("i2c: i2c-stm32f7: Add DMA support") Signed-off-by: Clément Le Goffic <clement.legoffic(a)foss.st.com> Cc: <stable(a)vger.kernel.org> # v4.18+ Acked-by: Alain Volmat <alain.volmat(a)foss.st.com> Signed-off-by: Andi Shyti <andi.shyti(a)kernel.org> Link: https://lore.kernel.org/r/20250704-i2c-upstream-v4-2-84a095a2c728@foss.st.c… diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 817d081460c2..73a7b8894c0d 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -739,10 +739,11 @@ static void stm32f7_i2c_disable_dma_req(struct stm32f7_i2c_dev *i2c_dev) static void stm32f7_i2c_dma_callback(void *arg) { - struct stm32f7_i2c_dev *i2c_dev = (struct stm32f7_i2c_dev *)arg; + struct stm32f7_i2c_dev *i2c_dev = arg; struct stm32_i2c_dma *dma = i2c_dev->dma; stm32f7_i2c_disable_dma_req(i2c_dev); + dmaengine_terminate_async(dma->chan_using); dma_unmap_single(i2c_dev->dev, dma->dma_buf, dma->dma_len, dma->dma_data_dir); complete(&dma->dma_complete); @@ -1510,7 +1511,6 @@ static irqreturn_t stm32f7_i2c_handle_isr_errs(struct stm32f7_i2c_dev *i2c_dev, u16 addr = f7_msg->addr; void __iomem *base = i2c_dev->base; struct device *dev = i2c_dev->dev; - struct stm32_i2c_dma *dma = i2c_dev->dma; /* Bus error */ if (status & STM32F7_I2C_ISR_BERR) { @@ -1551,10 +1551,8 @@ static irqreturn_t stm32f7_i2c_handle_isr_errs(struct stm32f7_i2c_dev *i2c_dev, } /* Disable dma */ - if (i2c_dev->use_dma) { - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); - } + if (i2c_dev->use_dma) + stm32f7_i2c_dma_callback(i2c_dev); i2c_dev->master_mode = false; complete(&i2c_dev->complete); @@ -1600,7 +1598,6 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) { struct stm32f7_i2c_dev *i2c_dev = data; struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; - struct stm32_i2c_dma *dma = i2c_dev->dma; void __iomem *base = i2c_dev->base; u32 status, mask; int ret; @@ -1619,10 +1616,8 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n", __func__, f7_msg->addr); writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR); - if (i2c_dev->use_dma) { - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); - } + if (i2c_dev->use_dma) + stm32f7_i2c_dma_callback(i2c_dev); f7_msg->result = -ENXIO; } @@ -1640,8 +1635,7 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data) ret = wait_for_completion_timeout(&i2c_dev->dma->dma_complete, HZ); if (!ret) { dev_dbg(i2c_dev->dev, "<%s>: Timed out\n", __func__); - stm32f7_i2c_disable_dma_req(i2c_dev); - dmaengine_terminate_async(dma->chan_using); + stm32f7_i2c_dma_callback(i2c_dev); f7_msg->result = -ETIMEDOUT; } }

1 month

3
13
0 0

[PATCH 5.4.y 0/8] Backport CVE-2022-4269 fix to stable kernel v5.4.y

by skulkarni＠mvista.com

From: Shubham Kulkarni <skulkarni(a)mvista.com> Hi Greg/All, This patch series backports the fix for CVE-2022-4269 along with its 7 dependency commits to 5.4 stable kernel. These patches are already part of the next stable kernel v5.10.y and I have referred to those commits to generate this series for v5.4. [CVE-2022-4269 - kernel: net: CPU soft lockup in TC mirred egress-to-ingress action] Patch 1: Dependency Patch #1 - mainline commit c8ecebd04cbb (v5.5-rc1) Patch 2: Dependency Patch #2 - mainline commit 5e1ad95b630e (v5.5-rc1) Patch 3: Dependency Patch #3 - mainline commit 26b537a88ca5 (v5.5-rc1) Patch 4: Dependency Patch #4 - mainline commit ef816f3c49c1 (v5.5-rc1) Patch 5: Dependency Patch #5 - mainline commit 075c8aa79d54 (v5.8-rc1) Patch 6: Dependency Patch #6 - v5.10.y commit bba7ebe10baf (v5.10.181) Patch 7: Dependency Patch #7 - v5.10.y commit f5bf8e3ca13e (v5.10.181) Patch 8: CVE-2022-4269 fix - v5.10.y commit 532451037863 (v5.10.181) --- Davide Caratti (2): net/sched: act_mirred: better wording on protection against excessive stack growth act_mirred: use the backlog for nested calls to mirred ingress Jiri Pirko (1): selftests: forwarding: tc_actions.sh: add matchall mirror test Vlad Buslov (4): net: sched: extract common action counters update code into function net: sched: extract bstats update code into function net: sched: extract qstats update code into functions net: sched: don't expose action qstats to skb_tc_reinsert() wenxu (1): net/sched: act_mirred: refactor the handle of xmit include/net/act_api.h | 25 +++++++ include/net/sch_generic.h | 13 ---- net/sched/act_api.c | 14 ++++ net/sched/act_csum.c | 4 +- net/sched/act_ct.c | 10 +-- net/sched/act_gact.c | 14 +--- net/sched/act_mirred.c | 55 ++++++++------ net/sched/act_police.c | 5 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 9 +-- .../selftests/net/forwarding/tc_actions.sh | 72 ++++++++++++++++--- 11 files changed, 150 insertions(+), 73 deletions(-) -- 2.25.1

1 month

4
19
0 0

[PATCH 6.6] arm64: kaslr: fix nokaslr cmdline parsing

by Chen Ridong

From: Chen Ridong <chenridong(a)huawei.com> Currently, when the command line contains "nokaslrxxx", it was incorrectly treated as a request to disable KASLR virtual memory. However, the behavior is different from physical address handling. This issue exists before the commit af73b9a2dd39 ("arm64: kaslr: Use feature override instead of parsing the cmdline again"). This patch fixes the parsing logic for the 'nokaslr' command line argument. Only the exact strings, 'nokaslr', will disable KASLR. Other inputs such as 'xxnokaslr', 'xxnokaslrxx', or 'xxnokaslr=xx' will not disable KASLR. Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Signed-off-by: Chen Ridong <chenridong(a)huawei.com> --- arch/arm64/kernel/pi/kaslr_early.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c index 17bff6e399e4..731d0a3f1a89 100644 --- a/arch/arm64/kernel/pi/kaslr_early.c +++ b/arch/arm64/kernel/pi/kaslr_early.c @@ -35,9 +35,14 @@ static char *__strstr(const char *s1, const char *s2) static bool cmdline_contains_nokaslr(const u8 *cmdline) { const u8 *str; + size_t len = strlen("nokaslr"); + const char *after = cmdline + len; str = __strstr(cmdline, "nokaslr"); - return str == cmdline || (str > cmdline && *(str - 1) == ' '); + if ((str == cmdline || (str > cmdline && *(str - 1) == ' ')) && + (*after == ' ' || *after == '\0')) + return true; + return false; } static bool is_kaslr_disabled_cmdline(void *fdt) -- 2.34.1

1 month

3
2
0 0

stable: queue/* branches no longer updated

by Matthieu Baerts

Hi Sasha, Thank you for maintaining the stable versions with Greg! If I remember well, you run some scripts on your side to maintain the queue/* branches in the linux-stable-rc Git tree [1], is that correct? These branches have not been updated for a bit more than 3 weeks. Is it normal? Personally, I find them useful. But if it is just me, I can work without them. [1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/… Cheers, Matt -- Sponsored by the NGI0 Core fund.

1 month

2
2
0 0

[PATCH v3] mm: slub: avoid deref of free pointer in sanity checks if object is invalid

by Li Qiong

For debugging, object_err() prints free pointer of the object. However, if check_valid_pointer() returns false for a object, dereferncing `object + s->offset` can lead to a crash. Therefore, print the object's address in such cases. Fixes: bb192ed9aa71 ("mm/slub: Convert most struct page to struct slab by spatch") Cc: <stable(a)vger.kernel.org> Signed-off-by: Li Qiong <liqiong(a)nfschina.com> --- v2: - rephrase the commit message, add comment for object_err(). v3: - check object pointer in object_err(). --- mm/slub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 31e11ef256f9..d3abae5a2193 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1104,7 +1104,11 @@ static void object_err(struct kmem_cache *s, struct slab *slab, return; slab_bug(s, reason); - print_trailer(s, slab, object); + if (!check_valid_pointer(s, slab, object)) { + print_slab_info(slab); + pr_err("invalid object 0x%p\n", object); + } else + print_trailer(s, slab, object); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); WARN_ON(1); @@ -1587,7 +1591,7 @@ static inline int alloc_consistency_checks(struct kmem_cache *s, return 0; if (!check_valid_pointer(s, slab, object)) { - object_err(s, slab, object, "Freelist Pointer check fails"); + slab_err(s, slab, "Freelist Pointer(0x%p) check fails", object); return 0; } -- 2.30.2

1 month

3
4
0 0

"stack state/frame" and "jump dest instruction" errors (was Re: Linux 6.16)

by Alan J. Wylie

#regzbot introduced: 6.15.8..6.16 Linus Torvalds <torvalds(a)linux-foundation.org> writes: > It's Sunday afternoon, and the release cycle has come to an end. Last > week was nice and calm, and there were no big show-stopper surprises > to keep us from the regular schedule, so I've tagged and pushed out > 6.16 as planned. Even after a "make mrproper" and "git clean -fxd" I'm seeing lots of warnings and errors. can't find jump dest instruction stack state mismatch return with modified stack frame objtool: can't decode instruction can't find starting instruction gcc (Gentoo Hardened 14.3.0 p8) 14.3.0 I selected "Y" to the new config option "X86_NATIVE_CPU" CPU is AMD FX-8350 .config attached The build is fine on a cross-compile of a minimal kernel for an Intel Atom server. Possibly related to https://lore.kernel.org/lkml/5263a182e608408bf42dc1ed12bc43dee9598ac9.17509… LD [M] arch/x86/events/amd/amd-uncore.o arch/x86/events/amd/amd-uncore.o: warning: objtool: amd_uncore_df_ctx_scan+0x54: can't find jump dest instruction at .text+0x1b6 make[5]: *** [scripts/Makefile.build:502: arch/x86/events/amd/amd-uncore.o] Error 255 make[5]: *** Deleting file 'arch/x86/events/amd/amd-uncore.o' make[4]: *** [scripts/Makefile.build:555: arch/x86/events/amd] Error 2 make[3]: *** [scripts/Makefile.build:555: arch/x86/events] Error 2 make[3]: *** Waiting for unfinished jobs.... CC fs/pidfs.o crypto/cmac.o: warning: objtool: crypto_cmac_digest_setkey+0xc2: stack state mismatch: cfa1=4+32 cfa2=4+24 CC [M] sound/core/device.o crypto/md4.o: warning: objtool: md4_final+0xd1: return with modified stack frame LD [M] block/bfq.o block/bfq.o: warning: objtool: bfq_timeout_sync_store+0x42: can't find jump dest instruction at .text+0x10f9 make[3]: *** [scripts/Makefile.build:502: block/bfq.o] Error 255 make[3]: *** Deleting file 'block/bfq.o' make[2]: *** [scripts/Makefile.build:555: block] Error 2 make[2]: *** Waiting for unfinished jobs.... CC arch/x86/kernel/acpi/madt_wakeup.o crypto/wp512.o: warning: objtool: wp512_init+0x58: return with modified stack frame crypto/wp512.o: warning: objtool: wp512_process_buffer+0x149: stack state mismatch: cfa1=4+288 cfa2=4-352 CC [M] sound/core/vmaster.o crypto/blake2b_generic.o: warning: objtool: crypto_blake2b_setkey+0x68: return with modified stack frame crypto/blake2b_generic.o: warning: objtool: crypto_blake2b_finup.constprop.0.isra.0+0x132: return with modified stack frame CC drivers/pci/pcie/err.o crypto/ccm.o: warning: objtool: crypto_ccm_init_crypt+0x1b6: stack state mismatch: cfa1=4+56 cfa2=4+48 crypto/ccm.o: warning: objtool: crypto_ccm_auth+0x45e: return with modified stack frame CC drivers/pci/pcie/pme.o crypto/cryptd.o: warning: objtool: cryptd_enqueue_request+0x89: can't find jump dest instruction at .text+0x94c make[3]: *** [scripts/Makefile.build:287: crypto/cryptd.o] Error 255 make[3]: *** Deleting file 'crypto/cryptd.o' make[2]: *** [scripts/Makefile.build:555: crypto] Error 2 LD [M] sound/core/snd.o sound/core/snd.o: warning: objtool: snd_ctl_open+0x10c: can't find jump dest instruction at .text+0x47c6 make[4]: *** [scripts/Makefile.build:502: sound/core/snd.o] Error 255 make[4]: *** Deleting file 'sound/core/snd.o' make[4]: *** Waiting for unfinished jobs.... LD [M] sound/core/oss/snd-mixer-oss.o sound/core/oss/snd-mixer-oss.o: warning: objtool: mixer_slot_clear+0x4c: return with modified stack frame CC lib/crypto/mpi/mpi-mod.o sound/core/oss/snd-pcm-oss.o: warning: objtool: mulaw_decode+0xf9: can't find jump dest instruction at .text+0x722b make[5]: *** [scripts/Makefile.build:502: sound/core/oss/snd-pcm-oss.o] Error 255 make[5]: *** Deleting file 'sound/core/oss/snd-pcm-oss.o' make[4]: *** [scripts/Makefile.build:555: sound/core/oss] Error 2 CC arch/x86/kernel/smpboot.o drivers/char/ipmi/ipmi_msghandler.o: warning: objtool: ipmi_set_gets_events+0x155: can't find jump dest instruction at .text+0x21c8 make[4]: *** [scripts/Makefile.build:287: drivers/char/ipmi/ipmi_msghandler.o] Error 255 make[4]: *** Deleting file 'drivers/char/ipmi/ipmi_msghandler.o' make[3]: *** [scripts/Makefile.build:555: drivers/char/ipmi] Error 2 make[3]: *** Waiting for unfinished jobs.... CC kernel/entry/kvm.o lib/crypto/gf128mul.o: warning: objtool: gf128mul_init_64k_bbe+0x94: stack state mismatch: cfa1=5+16 cfa2=4+8 CC [M] sound/core/seq/oss/seq_oss_event.o make[2]: *** [scripts/Makefile.build:555: drivers] Error 2 LD [M] sound/core/seq/snd-seq.o sound/core/seq/snd-seq.o: warning: objtool: snd_seq_open+0xd7: can't find jump dest instruction at .text+0x2224 make[5]: *** [scripts/Makefile.build:502: sound/core/seq/snd-seq.o] Error 255 make[5]: *** Deleting file 'sound/core/seq/snd-seq.o' make[5]: *** Waiting for unfinished jobs.... LD [M] lib/crypto/libdes.o lib/crypto/libdes.o: warning: objtool: des3_ede_encrypt+0x86: can't find jump dest instruction at .text+0xcb8 make[4]: *** [scripts/Makefile.build:502: lib/crypto/libdes.o] Error 255 make[4]: *** Deleting file 'lib/crypto/libdes.o' make[3]: *** [scripts/Makefile.build:555: lib/crypto] Error 2 make[2]: *** [scripts/Makefile.build:555: lib] Error 2 LD [M] sound/core/seq/oss/snd-seq-oss.o sound/core/seq/oss/snd-seq-oss.o: warning: objtool: alloc_seq_queue+0xc7: return with modified stack frame sound/core/seq/oss/snd-seq-oss.o: warning: objtool: delete_seq_queue.isra.0+0xb5: return with modified stack frame sound/core/seq/oss/snd-seq-oss.o: warning: objtool: snd_seq_oss_synth_info_user+0xb7: return with modified stack frame sound/core/seq/oss/snd-seq-oss.o: warning: objtool: snd_seq_oss_midi_info_user+0xa5: return with modified stack frame sound/core/seq/oss/snd-seq-oss.o: warning: objtool: snd_seq_oss_process_event+0x73c: stack state mismatch: cfa1=4+72 cfa2=4+64 sound/core/seq/oss/snd-seq-oss.o: warning: objtool: snd_seq_oss_write+0x77: stack state mismatch: cfa1=4+120 cfa2=4+112 sound/core/seq/oss/snd-seq-oss.o: warning: objtool: snd_seq_oss_synth_setup+0x14c: return with modified stack frame sound/core/seq/oss/snd-seq-oss.o: warning: objtool: snd_seq_oss_synth_make_info+0x17d: stack state mismatch: cfa1=4+160 cfa2=4+152 make[4]: *** [scripts/Makefile.build:555: sound/core/seq] Error 2 make[3]: *** [scripts/Makefile.build:555: sound/core] Error 2 make[3]: *** Waiting for unfinished jobs.... LD [M] sound/pci/hda/snd-hda-codec.o sound/pci/hda/snd-hda-codec.o: error: objtool: can't decode instruction at .text:0xbf1c make[5]: *** [scripts/Makefile.build:502: sound/pci/hda/snd-hda-codec.o] Error 255 make[5]: *** Deleting file 'sound/pci/hda/snd-hda-codec.o' make[4]: *** [scripts/Makefile.build:555: sound/pci/hda] Error 2 make[3]: *** [scripts/Makefile.build:555: sound/pci] Error 2 make[2]: *** [scripts/Makefile.build:555: sound] Error 2 CC fs/ext4/mballoc.o fs/binfmt_misc.o: warning: objtool: load_misc_binary+0xf4: can't find jump dest instruction at .text+0xf07 make[3]: *** [scripts/Makefile.build:287: fs/binfmt_misc.o] Error 255 make[3]: *** Deleting file 'fs/binfmt_misc.o' make[3]: *** Waiting for unfinished jobs.... LD [M] fs/cramfs/cramfs.o fs/cramfs/cramfs.o: warning: objtool: cramfs_blkdev_fill_super+0x2b2: return with modified stack frame AR arch/x86/kernel/built-in.a make[2]: *** [scripts/Makefile.build:555: arch/x86] Error 2 LD [M] fs/configfs/configfs.o fs/configfs/configfs.o: error: objtool: configfs_create_link(): can't find starting instruction make[4]: *** [scripts/Makefile.build:502: fs/configfs/configfs.o] Error 255 make[4]: *** Deleting file 'fs/configfs/configfs.o' make[3]: *** [scripts/Makefile.build:555: fs/configfs] Error 2 AR fs/ext4/built-in.a make[2]: *** [scripts/Makefile.build:555: fs] Error 2 AR kernel/built-in.a make[1]: *** [/work/src.git/linux/Makefile:2003: .] Error 2 make: *** [Makefile:248: __sub-make] Error 2 -- Alan J. Wylie https://www.wylie.me.uk/ mailto:<alan(a)wylie.me.uk> Dance like no-one's watching. / Encrypt like everyone is. Security is inversely proportional to convenience

1 month

4
4
0 0

[PATCH 6.12.y] mm: khugepaged: fix call hpage_collapse_scan_file() for anonymous vma

by Jakub Acs

From: Liu Shixin <liushixin2(a)huawei.com> commit f1897f2f08b28ae59476d8b73374b08f856973af upstream. syzkaller reported such a BUG_ON(): ------------[ cut here ]------------ kernel BUG at mm/khugepaged.c:1835! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... CPU: 6 UID: 0 PID: 8009 Comm: syz.15.106 Kdump: loaded Tainted: G W 6.13.0-rc6 #22 Tainted: [W]=WARN Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : collapse_file+0xa44/0x1400 lr : collapse_file+0x88/0x1400 sp : ffff80008afe3a60 ... Call trace: collapse_file+0xa44/0x1400 (P) hpage_collapse_scan_file+0x278/0x400 madvise_collapse+0x1bc/0x678 madvise_vma_behavior+0x32c/0x448 madvise_walk_vmas.constprop.0+0xbc/0x140 do_madvise.part.0+0xdc/0x2c8 __arm64_sys_madvise+0x68/0x88 invoke_syscall+0x50/0x120 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0x128 el0t_64_sync_handler+0xc8/0xd0 el0t_64_sync+0x190/0x198 This indicates that the pgoff is unaligned. After analysis, I confirm the vma is mapped to /dev/zero. Such a vma certainly has vm_file, but it is set to anonymous by mmap_zero(). So even if it's mmapped by 2m-unaligned, it can pass the check in thp_vma_allowable_order() as it is an anonymous-mmap, but then be collapsed as a file-mmap. It seems the problem has existed for a long time, but actually, since we have khugepaged_max_ptes_none check before, we will skip collapse it as it is /dev/zero and so has no present page. But commit d8ea7cc8547c limit the check for only khugepaged, so the BUG_ON() can be triggered by madvise_collapse(). Add vma_is_anonymous() check to make such vma be processed by hpage_collapse_scan_pmd(). Link: https://lkml.kernel.org/r/20250111034511.2223353-1-liushixin2@huawei.com Fixes: d8ea7cc8547c ("mm/khugepaged: add flag to predicate khugepaged-only behavior") Signed-off-by: Liu Shixin <liushixin2(a)huawei.com> Reviewed-by: Yang Shi <yang(a)os.amperecomputing.com> Acked-by: David Hildenbrand <david(a)redhat.com> Cc: Chengming Zhou <chengming.zhou(a)linux.dev> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Mattew Wilcox <willy(a)infradead.org> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Nanyong Sun <sunnanyong(a)huawei.com> Cc: Qi Zheng <zhengqi.arch(a)bytedance.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> [acsjakub: backport, clean apply] Cc: Jakub Acs <acsjakub(a)amazon.de> Cc: linux-mm(a)kvack.org --- Ran into the crash with syzkaller, backporting this patch works - the reproducer no longer crashes. Please let me know if there was a reason not to backport. mm/khugepaged.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b538c3d48386..abd5764e4864 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2404,7 +2404,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, VM_BUG_ON(khugepaged_scan.address < hstart || khugepaged_scan.address + HPAGE_PMD_SIZE > hend); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, khugepaged_scan.address); @@ -2750,7 +2750,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); nodes_clear(cc->alloc_nmask); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597

1 month

3
3
0 0

[PATCH v2 6.1.y] mm: khugepaged: fix call hpage_collapse_scan_file() for anonymous vma

by Jakub Acs

From: Liu Shixin <liushixin2(a)huawei.com> commit f1897f2f08b28ae59476d8b73374b08f856973af upstream. syzkaller reported such a BUG_ON(): ------------[ cut here ]------------ kernel BUG at mm/khugepaged.c:1835! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... CPU: 6 UID: 0 PID: 8009 Comm: syz.15.106 Kdump: loaded Tainted: G W 6.13.0-rc6 #22 Tainted: [W]=WARN Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : collapse_file+0xa44/0x1400 lr : collapse_file+0x88/0x1400 sp : ffff80008afe3a60 ... Call trace: collapse_file+0xa44/0x1400 (P) hpage_collapse_scan_file+0x278/0x400 madvise_collapse+0x1bc/0x678 madvise_vma_behavior+0x32c/0x448 madvise_walk_vmas.constprop.0+0xbc/0x140 do_madvise.part.0+0xdc/0x2c8 __arm64_sys_madvise+0x68/0x88 invoke_syscall+0x50/0x120 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0x128 el0t_64_sync_handler+0xc8/0xd0 el0t_64_sync+0x190/0x198 This indicates that the pgoff is unaligned. After analysis, I confirm the vma is mapped to /dev/zero. Such a vma certainly has vm_file, but it is set to anonymous by mmap_zero(). So even if it's mmapped by 2m-unaligned, it can pass the check in thp_vma_allowable_order() as it is an anonymous-mmap, but then be collapsed as a file-mmap. It seems the problem has existed for a long time, but actually, since we have khugepaged_max_ptes_none check before, we will skip collapse it as it is /dev/zero and so has no present page. But commit d8ea7cc8547c limit the check for only khugepaged, so the BUG_ON() can be triggered by madvise_collapse(). Add vma_is_anonymous() check to make such vma be processed by hpage_collapse_scan_pmd(). Link: https://lkml.kernel.org/r/20250111034511.2223353-1-liushixin2@huawei.com Fixes: d8ea7cc8547c ("mm/khugepaged: add flag to predicate khugepaged-only behavior") Signed-off-by: Liu Shixin <liushixin2(a)huawei.com> Reviewed-by: Yang Shi <yang(a)os.amperecomputing.com> Acked-by: David Hildenbrand <david(a)redhat.com> Cc: Chengming Zhou <chengming.zhou(a)linux.dev> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Mattew Wilcox <willy(a)infradead.org> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Nanyong Sun <sunnanyong(a)huawei.com> Cc: Qi Zheng <zhengqi.arch(a)bytedance.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> [acsjakub: backport, clean apply] Signed-off-by: Jakub Acs <acsjakub(a)amazon.de> Cc: linux-mm(a)kvack.org --- v1 -> v2: fix missing sign-off Ran into the crash with syzkaller, backporting this patch works - the reproducer no longer crashes. Please let me know if there was a reason not to backport. mm/khugepaged.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b538c3d48386..abd5764e4864 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2404,7 +2404,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, VM_BUG_ON(khugepaged_scan.address < hstart || khugepaged_scan.address + HPAGE_PMD_SIZE > hend); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, khugepaged_scan.address); @@ -2750,7 +2750,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); nodes_clear(cc->alloc_nmask); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597

1 month

1
0
0 0

[PATCH] media: s5p-mfc: Always pass NULL to s5p_mfc_cmd_host2risc_v6()

by Nathan Chancellor

A new warning in clang [1] points out a few places in s5p_mfc_cmd_v6.c where an uninitialized variable is passed as a const pointer: drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c:45:7: error: variable 'h2r_args' is uninitialized when passed as a const pointer argument here [-Werror,-Wuninitialized-const-pointer] 45 | &h2r_args); | ^~~~~~~~ drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c:133:7: error: variable 'h2r_args' is uninitialized when passed as a const pointer argument here [-Werror,-Wuninitialized-const-pointer] 133 | &h2r_args); | ^~~~~~~~ drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c:148:7: error: variable 'h2r_args' is uninitialized when passed as a const pointer argument here [-Werror,-Wuninitialized-const-pointer] 148 | &h2r_args); | ^~~~~~~~ The args parameter in s5p_mfc_cmd_host2risc_v6() is never actually used, so just pass NULL to it in the places where h2r_args is currently passed, clearing up the warning and not changing the functionality of the code. Cc: stable(a)vger.kernel.org Fixes: f96f3cfa0bb8 ("[media] s5p-mfc: Update MFC v4l2 driver to support MFC6.x") Link: https://github.com/llvm/llvm-project/commit/00dacf8c22f065cb52efb14cd091d44… [1] Closes: https://github.com/ClangBuiltLinux/linux/issues/2103 Signed-off-by: Nathan Chancellor <nathan(a)kernel.org> --- From what I can tell, it seems like ->cmd_host2risc() is only ever called from v6 code, which always passes NULL? It seems like it should be possible to just drop .cmd_host2risc on the v5 side, then update .cmd_host2risc to only take two parameters? If so, I can send a follow up as a clean up, so that this can go back relatively conflict free. --- .../platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c index 47bc3014b5d8..735471c50dbb 100644 --- a/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c +++ b/drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c @@ -31,7 +31,6 @@ static int s5p_mfc_cmd_host2risc_v6(struct s5p_mfc_dev *dev, int cmd, static int s5p_mfc_sys_init_cmd_v6(struct s5p_mfc_dev *dev) { - struct s5p_mfc_cmd_args h2r_args; const struct s5p_mfc_buf_size_v6 *buf_size = dev->variant->buf_size->priv; int ret; @@ -41,33 +40,23 @@ static int s5p_mfc_sys_init_cmd_v6(struct s5p_mfc_dev *dev) mfc_write(dev, dev->ctx_buf.dma, S5P_FIMV_CONTEXT_MEM_ADDR_V6); mfc_write(dev, buf_size->dev_ctx, S5P_FIMV_CONTEXT_MEM_SIZE_V6); - return s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_SYS_INIT_V6, - &h2r_args); + return s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_SYS_INIT_V6, NULL); } static int s5p_mfc_sleep_cmd_v6(struct s5p_mfc_dev *dev) { - struct s5p_mfc_cmd_args h2r_args; - - memset(&h2r_args, 0, sizeof(struct s5p_mfc_cmd_args)); - return s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_SLEEP_V6, - &h2r_args); + return s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_SLEEP_V6, NULL); } static int s5p_mfc_wakeup_cmd_v6(struct s5p_mfc_dev *dev) { - struct s5p_mfc_cmd_args h2r_args; - - memset(&h2r_args, 0, sizeof(struct s5p_mfc_cmd_args)); - return s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_WAKEUP_V6, - &h2r_args); + return s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_WAKEUP_V6, NULL); } /* Open a new instance and get its number */ static int s5p_mfc_open_inst_cmd_v6(struct s5p_mfc_ctx *ctx) { struct s5p_mfc_dev *dev = ctx->dev; - struct s5p_mfc_cmd_args h2r_args; int codec_type; mfc_debug(2, "Requested codec mode: %d\n", ctx->codec_mode); @@ -130,14 +119,13 @@ static int s5p_mfc_open_inst_cmd_v6(struct s5p_mfc_ctx *ctx) mfc_write(dev, 0, S5P_FIMV_D_CRC_CTRL_V6); /* no crc */ return s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_OPEN_INSTANCE_V6, - &h2r_args); + NULL); } /* Close instance */ static int s5p_mfc_close_inst_cmd_v6(struct s5p_mfc_ctx *ctx) { struct s5p_mfc_dev *dev = ctx->dev; - struct s5p_mfc_cmd_args h2r_args; int ret = 0; dev->curr_ctx = ctx->num; @@ -145,7 +133,7 @@ static int s5p_mfc_close_inst_cmd_v6(struct s5p_mfc_ctx *ctx) mfc_write(dev, ctx->inst_no, S5P_FIMV_INSTANCE_ID_V6); ret = s5p_mfc_cmd_host2risc_v6(dev, S5P_FIMV_H2R_CMD_CLOSE_INSTANCE_V6, - &h2r_args); + NULL); } else { ret = -EINVAL; } --- base-commit: 347e9f5043c89695b01e66b3ed111755afcf1911 change-id: 20250715-media-s5p-mfc-fix-uninit-const-pointer-cbf944ae4b4b Best regards, -- Nathan Chancellor <nathan(a)kernel.org>

1 month

3
3
0 0

[PATCH 1/2] ALSA: hda/realtek - Fix mute LED for HP Victus 16-s0xxx

by edip＠medip.dev

From: Edip Hazuri <edip(a)medip.dev> The mute led on this laptop is using ALC245 but requires a quirk to work This patch enables the existing quirk for the device. Tested on Victus 16-S0063NT Laptop. The LED behaviour works as intended. Cc: <stable(a)vger.kernel.org> Signed-off-by: Edip Hazuri <edip(a)medip.dev> --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 05019fa732..77322ff8a6 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6528,6 +6528,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8bbe, "HP Victus 16-r0xxx (MB 8BBE)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bc8, "HP Victus 15-fa1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bcd, "HP Omen 16-xd0xxx", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), + SND_PCI_QUIRK(0x103c, 0x8bd4, "HP Victus 16-s0xxx (MB 8BD4)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bdd, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bde, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bdf, "HP Envy 15", ALC287_FIXUP_CS35L41_I2C_2), -- 2.50.1

1 month

2
2
0 0

[PATCH 5.4.y 0/3] Backport series: "permit write-sealed memfd read-only shared mappings"

by Isaac J. Manjarres

Hello, Until kernel version 6.7, a write-sealed memfd could not be mapped as shared and read-only. This was clearly a bug, and was not inline with the description of F_SEAL_WRITE in the man page for fcntl()[1]. Lorenzo's series [2] fixed that issue and was merged in kernel version 6.7, but was not backported to older kernels. So, this issue is still present on kernels 5.4, 5.10, 5.15, 6.1, and 6.6. This series backports Lorenzo's series to the 5.4 kernel. [1] https://man7.org/linux/man-pages/man2/fcntl.2.html [2] https://lore.kernel.org/all/913628168ce6cce77df7d13a63970bae06a526e0.169711… Lorenzo Stoakes (3): mm: drop the assumption that VM_SHARED always implies writable mm: update memfd seal write check to include F_SEAL_WRITE mm: perform the mapping_map_writable() check after call_mmap() fs/hugetlbfs/inode.c | 2 +- include/linux/fs.h | 4 ++-- include/linux/mm.h | 26 +++++++++++++++++++------- kernel/fork.c | 2 +- mm/filemap.c | 2 +- mm/madvise.c | 2 +- mm/mmap.c | 26 ++++++++++++++++---------- mm/shmem.c | 2 +- 8 files changed, 42 insertions(+), 24 deletions(-) -- 2.50.1.552.g942d659e1b-goog

1 month

4
8
0 0

[PATCH 6.1] HID: mcp2221: Set driver data before I2C adapter add

by Sumanth Gavini

The process of adding an I2C adapter can invoke I2C accesses on that new adapter (see i2c_detect()). Ensure we have set the adapter's driver data to avoid null pointer dereferences in the xfer functions during the adapter add. This has been noted in the past and the same fix proposed but not completed. See: https://lore.kernel.org/lkml/ef597e73-ed71-168e-52af-0d19b03734ac@vigem.de/ Signed-off-by: Hamish Martin <hamish.martin(a)alliedtelesis.co.nz> Signed-off-by: Jiri Kosina <jkosina(a)suse.cz> Signed-off-by: Sumanth Gavini <sumanth.gavini(a)yahoo.com> --- drivers/hid/hid-mcp2221.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index de52e9f7bb8c..9973545c1c4b 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -873,12 +873,12 @@ static int mcp2221_probe(struct hid_device *hdev, "MCP2221 usb-i2c bridge on hidraw%d", ((struct hidraw *)hdev->hidraw)->minor); + i2c_set_adapdata(&mcp->adapter, mcp); ret = i2c_add_adapter(&mcp->adapter); if (ret) { hid_err(hdev, "can't add usb-i2c adapter: %d\n", ret); goto err_i2c; } - i2c_set_adapdata(&mcp->adapter, mcp); /* Setup GPIO chip */ mcp->gc = devm_kzalloc(&hdev->dev, sizeof(*mcp->gc), GFP_KERNEL); -- 2.43.0

1 month

3
5
0 0

[PATCH] kbuild: userprogs: use correct linker when mixing clang and GNU ld

by Thomas Weißschuh

The userprogs infrastructure does not expect clang being used with GNU ld and in that case uses /usr/bin/ld for linking, not the configured $(LD). This fallback is problematic as it will break when cross-compiling. Mixing clang and GNU ld is used for example when building for SPARC64, as ld.lld is not sufficient; see Documentation/kbuild/llvm.rst. Relax the check around --ld-path so it gets used for all linkers. Fixes: dfc1b168a8c4 ("kbuild: userprogs: use correct lld when linking through clang") Cc: stable(a)vger.kernel.org Signed-off-by: Thomas Weißschuh <thomas.weissschuh(a)linutronix.de> --- Nathan, you originally proposed the check for $(CONFIG_LD_IS_LLD) [0], could you take a look at this? [0] https://lore.kernel.org/all/20250213175437.GA2756218@ax162/ --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c09766beb7eff4780574682b8ea44475fc0a5188..e300c6546c845c300edb5f0033719963c7da8f9b 100644 --- a/Makefile +++ b/Makefile @@ -1134,7 +1134,7 @@ KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # userspace programs are linked via the compiler, use the correct linker -ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_LD_IS_LLD),yy) +ifneq ($(CONFIG_CC_IS_CLANG),) KBUILD_USERLDFLAGS += --ld-path=$(LD) endif --- base-commit: 6832a9317eee280117cd695fa885b2b7a7a38daf change-id: 20250723-userprogs-clang-gnu-ld-7a1c16fc852d Best regards, -- Thomas Weißschuh <thomas.weissschuh(a)linutronix.de>

1 month

3
4
0 0

FAILED: patch "[PATCH] Revert "xfrm: destroy xfrm_state synchronously on net exit" failed to apply to 6.12-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.12-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y git checkout FETCH_HEAD git cherry-pick -x 2a198bbec6913ae1c90ec963750003c6213668c7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025072924-postbox-exorcism-f636@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 2a198bbec6913ae1c90ec963750003c6213668c7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd(a)queasysnail.net> Date: Fri, 4 Jul 2025 16:54:34 +0200 Subject: [PATCH] Revert "xfrm: destroy xfrm_state synchronously on net exit path" This reverts commit f75a2804da391571563c4b6b29e7797787332673. With all states (whether user or kern) removed from the hashtables during deletion, there's no need for synchronous destruction of states. xfrm6_tunnel states still need to have been destroyed (which will be the case when its last user is deleted (not destroyed)) so that xfrm6_tunnel_free_spi removes it from the per-netns hashtable before the netns is destroyed. This has the benefit of skipping one synchronize_rcu per state (in __xfrm_state_destroy(sync=true)) when we exit a netns. Signed-off-by: Sabrina Dubroca <sd(a)queasysnail.net> Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com> diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 91d52a380e37..f3014e4f54fc 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -915,7 +915,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) xfrm_pol_put(pols[i]); } -void __xfrm_state_destroy(struct xfrm_state *, bool); +void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) { @@ -925,13 +925,7 @@ static inline void __xfrm_state_put(struct xfrm_state *x) static inline void xfrm_state_put(struct xfrm_state *x) { if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x, false); -} - -static inline void xfrm_state_put_sync(struct xfrm_state *x) -{ - if (refcount_dec_and_test(&x->refcnt)) - __xfrm_state_destroy(x, true); + __xfrm_state_destroy(x); } static inline void xfrm_state_hold(struct xfrm_state *x) @@ -1769,7 +1763,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num); int xfrm_state_delete(struct xfrm_state *x); -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 7fd8bc08e6eb..5120a763da0d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -334,7 +334,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; - xfrm_state_flush(net, 0, false, true); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); xfrm_flush_gc(); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) diff --git a/net/key/af_key.c b/net/key/af_key.c index efc2a91f4c48..b5d761700776 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1766,7 +1766,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m if (proto == 0) return -EINVAL; - err = xfrm_state_flush(net, proto, true, false); + err = xfrm_state_flush(net, proto, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f7110a658897..327a1a6f892c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -592,7 +592,7 @@ void xfrm_state_free(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_free); -static void ___xfrm_state_destroy(struct xfrm_state *x) +static void xfrm_state_gc_destroy(struct xfrm_state *x) { if (x->mode_cbs && x->mode_cbs->destroy_state) x->mode_cbs->destroy_state(x); @@ -631,7 +631,7 @@ static void xfrm_state_gc_task(struct work_struct *work) synchronize_rcu(); hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) - ___xfrm_state_destroy(x); + xfrm_state_gc_destroy(x); } static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) @@ -795,19 +795,14 @@ void xfrm_dev_state_free(struct xfrm_state *x) } #endif -void __xfrm_state_destroy(struct xfrm_state *x, bool sync) +void __xfrm_state_destroy(struct xfrm_state *x) { WARN_ON(x->km.state != XFRM_STATE_DEAD); - if (sync) { - synchronize_rcu(); - ___xfrm_state_destroy(x); - } else { - spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &xfrm_state_gc_list); - spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); - } + spin_lock_bh(&xfrm_state_gc_lock); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); + spin_unlock_bh(&xfrm_state_gc_lock); + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -922,7 +917,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool } #endif -int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync) +int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) { int i, err = 0, cnt = 0; @@ -3283,7 +3278,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - xfrm_state_flush(net, 0, false, true); + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1db18f470f42..684239018bec 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2635,7 +2635,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_usersa_flush *p = nlmsg_data(nlh); int err; - err = xfrm_state_flush(net, p->proto, true, false); + err = xfrm_state_flush(net, p->proto, true); if (err) { if (err == -ESRCH) /* empty table */ return 0;

1 month

3
4
0 0

[PATCH 6.12 0/4] drm/xe: Fix xe_force_wake_get return handling

by Tomita Moeko

This patchset fixes the xe driver probe fail with -ETIMEDOUT issue in linux 6.12.35 and later version. The failure is caused by commit d42b44736ea2 ("drm/xe/gt: Update handling of xe_force_wake_get return"), which incorrectly handles the return value of xe_force_wake_get as "refcounted domain mask" (as introduced in 6.13), rather than status code (as used in 6.12). In 6.12 stable kernel, xe_force_wake_get still returns a status code. The update incorrectly treats the return value as a mask, causing the return value of 0 to be misinterpreted as an error. As a result, the driver probe fails with -ETIMEDOUT in xe_pci_probe -> xe_device_probe -> xe_gt_init_hwconfig -> xe_force_wake_get. [ 1254.323172] xe 0000:00:02.0: [drm] Found ALDERLAKE_P (device ID 46a6) display version 13.00 stepping D0 [ 1254.323175] xe 0000:00:02.0: [drm:xe_pci_probe [xe]] ALDERLAKE_P 46a6:000c dgfx:0 gfx:Xe_LP (12.00) media:Xe_M (12.00) display:yes dma_m_s:39 tc:1 gscfi:0 cscfi:0 [ 1254.323275] xe 0000:00:02.0: [drm:xe_pci_probe [xe]] Stepping = (G:C0, M:C0, B:**) [ 1254.323328] xe 0000:00:02.0: [drm:xe_pci_probe [xe]] SR-IOV support: no (mode: none) [ 1254.323379] xe 0000:00:02.0: [drm:intel_pch_type [xe]] Found Alder Lake PCH [ 1254.323475] xe 0000:00:02.0: probe with driver xe failed with error -110 Similar return handling issue cause by API mismatch are also found in: Commit 95a75ed2b005 ("drm/xe/tests/mocs: Update xe_force_wake_get() return handling") Commit 9ffd6ec2de08 ("drm/xe/devcoredump: Update handling of xe_force_wake_get return") This patchset fixes them by reverting them all. Additionally, commit deb05f8431f3 ("drm/xe/forcewake: Add a helper xe_force_wake_ref_has_domain()") is also reverted as it is not needed in 6.12. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5373 Cc: Badal Nilawar <badal.nilawar(a)intel.com> Cc: Matthew Brost <matthew.brost(a)intel.com> Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com> Cc: Lucas De Marchi <lucas.demarchi(a)intel.com> Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray(a)intel.com> Cc: Nirmoy Das <nirmoy.das(a)intel.com> Tomita Moeko (4): Revert "drm/xe/gt: Update handling of xe_force_wake_get return" Revert "drm/xe/tests/mocs: Update xe_force_wake_get() return handling" Revert "drm/xe/devcoredump: Update handling of xe_force_wake_get return" Revert "drm/xe/forcewake: Add a helper xe_force_wake_ref_has_domain()" drivers/gpu/drm/xe/tests/xe_mocs.c | 21 +++--- drivers/gpu/drm/xe/xe_devcoredump.c | 14 ++-- drivers/gpu/drm/xe/xe_force_wake.h | 16 ----- drivers/gpu/drm/xe/xe_gt.c | 105 +++++++++++++--------------- 4 files changed, 63 insertions(+), 93 deletions(-) -- 2.47.2

1 month

4
10
0 0

[PATCH 6.1.y 0/3] mptcp: fix recent failed backports (20250721)

by Matthieu Baerts (NGI0)

Greg recently reported 3 patches that could not be applied without conflicts in v6.1: - f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic") - def5b7b2643e ("mptcp: plug races between subflow fail and subflow creation") - da9b2fc7b73d ("mptcp: reset fallback status gracefully at disconnect() time") Conflicts have been resolved, and documented in each patch. Paolo Abeni (3): mptcp: make fallback action and fallback decision atomic mptcp: plug races between subflow fail and subflow creation mptcp: reset fallback status gracefully at disconnect() time net/mptcp/options.c | 3 ++- net/mptcp/pm.c | 8 ++++++- net/mptcp/protocol.c | 55 ++++++++++++++++++++++++++++++++++++-------- net/mptcp/protocol.h | 27 +++++++++++++++++----- net/mptcp/subflow.c | 30 +++++++++++++++--------- 5 files changed, 95 insertions(+), 28 deletions(-) -- 2.50.0

1 month

2
6
0 0

+ mm-memory-tiering-fix-pgpromote_candidate-counting.patch added to mm-new branch

by Andrew Morton

The patch titled Subject: mm: memory-tiering: fix PGPROMOTE_CANDIDATE counting has been added to the -mm mm-new branch. Its filename is mm-memory-tiering-fix-pgpromote_candidate-counting.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-new branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Note, mm-new is a provisional staging ground for work-in-progress patches, and acceptance into mm-new is a notification for others take notice and to finish up reviews. Please do not hesitate to respond to review feedback and post updated versions to replace or incrementally fixup patches in mm-new. Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Ruan Shiyang <ruansy.fnst(a)fujitsu.com> Subject: mm: memory-tiering: fix PGPROMOTE_CANDIDATE counting Date: Tue, 29 Jul 2025 11:51:01 +0800 Goto-san reported confusing pgpromote statistics where the pgpromote_success count significantly exceeded pgpromote_candidate. On a system with three nodes (nodes 0-1: DRAM 4GB, node 2: NVDIMM 4GB): # Enable demotion only echo 1 > /sys/kernel/mm/numa/demotion_enabled numactl -m 0-1 memhog -r200 3500M >/dev/null & pid=$! sleep 2 numactl memhog -r100 2500M >/dev/null & sleep 10 kill -9 $pid # terminate the 1st memhog # Enable promotion echo 2 > /proc/sys/kernel/numa_balancing After a few seconds, we observeed `pgpromote_candidate < pgpromote_success` $ grep -e pgpromote /proc/vmstat pgpromote_success 2579 pgpromote_candidate 0 In this scenario, after terminating the first memhog, the conditions for pgdat_free_space_enough() are quickly met, and triggers promotion. However, these migrated pages are only counted for in PGPROMOTE_SUCCESS, not in PGPROMOTE_CANDIDATE. To solve these confusing statistics, introduce PGPROMOTE_CANDIDATE_NRL to count the missed promotion pages. And also, not counting these pages into PGPROMOTE_CANDIDATE is to avoid changing the existing algorithm or performance of the promotion rate limit. Link: https://lkml.kernel.org/r/20250729035101.1601407-1-ruansy.fnst@fujitsu.com Signed-off-by: Li Zhijian <lizhijian(a)fujitsu.com> Signed-off-by: Ruan Shiyang <ruansy.fnst(a)fujitsu.com> Reported-by: Yasunori Gotou (Fujitsu) <y-goto(a)fujitsu.com> Suggested-by: Huang Ying <ying.huang(a)linux.alibaba.com> Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Juri Lelli <juri.lelli(a)redhat.com> Cc: Vincent Guittot <vincent.guittot(a)linaro.org> Cc: Dietmar Eggemann <dietmar.eggemann(a)arm.com> Cc: Steven Rostedt <rostedt(a)goodmis.org> Cc: Ben Segall <bsegall(a)google.com> Cc: Mel Gorman <mgorman(a)suse.de> Cc: Valentin Schneider <vschneid(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/mmzone.h | 16 +++++++++++++++- kernel/sched/fair.c | 5 +++-- mm/vmstat.c | 1 + 3 files changed, 19 insertions(+), 3 deletions(-) --- a/include/linux/mmzone.h~mm-memory-tiering-fix-pgpromote_candidate-counting +++ a/include/linux/mmzone.h @@ -234,7 +234,21 @@ enum node_stat_item { #endif #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ - PGPROMOTE_CANDIDATE, /* candidate pages to promote */ + /** + * Candidate pages for promotion based on hint fault latency. This + * counter is used to control the promotion rate and adjust the hot + * threshold. + */ + PGPROMOTE_CANDIDATE, + /** + * Not rate-limited (NRL) candidate pages for those can be promoted + * without considering hot threshold because of enough free pages in + * fast-tier node. These promotions bypass the regular hotness checks + * and do NOT influence the promotion rate-limiter or + * threshold-adjustment logic. + * This is for statistics/monitoring purposes. + */ + PGPROMOTE_CANDIDATE_NRL, #endif /* PGDEMOTE_*: pages demoted */ PGDEMOTE_KSWAPD, --- a/kernel/sched/fair.c~mm-memory-tiering-fix-pgpromote_candidate-counting +++ a/kernel/sched/fair.c @@ -1940,11 +1940,13 @@ bool should_numa_migrate_memory(struct t struct pglist_data *pgdat; unsigned long rate_limit; unsigned int latency, th, def_th; + long nr = folio_nr_pages(folio); pgdat = NODE_DATA(dst_nid); if (pgdat_free_space_enough(pgdat)) { /* workload changed, reset hot threshold */ pgdat->nbp_threshold = 0; + mod_node_page_state(pgdat, PGPROMOTE_CANDIDATE_NRL, nr); return true; } @@ -1958,8 +1960,7 @@ bool should_numa_migrate_memory(struct t if (latency >= th) return false; - return !numa_promotion_rate_limit(pgdat, rate_limit, - folio_nr_pages(folio)); + return !numa_promotion_rate_limit(pgdat, rate_limit, nr); } this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); --- a/mm/vmstat.c~mm-memory-tiering-fix-pgpromote_candidate-counting +++ a/mm/vmstat.c @@ -1280,6 +1280,7 @@ const char * const vmstat_text[] = { #ifdef CONFIG_NUMA_BALANCING [I(PGPROMOTE_SUCCESS)] = "pgpromote_success", [I(PGPROMOTE_CANDIDATE)] = "pgpromote_candidate", + [I(PGPROMOTE_CANDIDATE_NRL)] = "pgpromote_candidate_nrl", #endif [I(PGDEMOTE_KSWAPD)] = "pgdemote_kswapd", [I(PGDEMOTE_DIRECT)] = "pgdemote_direct", _ Patches currently in -mm which might be from ruansy.fnst(a)fujitsu.com are mm-memory-tiering-fix-pgpromote_candidate-counting.patch

1 month

1
0
0 0

+ mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped-v3.patch added to mm-unstable branch

by Andrew Morton

The patch titled Subject: mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped-v3 has been added to the -mm mm-unstable branch. Its filename is mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped-v3.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Suren Baghdasaryan <surenb(a)google.com> Subject: mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped-v3 Date: Tue, 29 Jul 2025 07:57:09 -0700 - Addressed Lorenzo's nits, per Lorenzo Stoakes - Added a warning comment for vma_start_read() - Added Reviewed-by and Acked-by, per Vlastimil Babka and Lorenzo Stoakes Link: https://lkml.kernel.org/r/20250729145709.2731370-1-surenb@google.com Fixes: 3104138517fc ("mm: make vma cache SLAB_TYPESAFE_BY_RCU") Reported-by: Jann Horn <jannh(a)google.com> Closes: https://lore.kernel.org/all/CAG48ez0-deFbVH=E3jbkWx=X3uVbd8nWeo6kbJPQ0KoUD+… Signed-off-by: Suren Baghdasaryan <surenb(a)google.com> Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz> Acked-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Cc: Liam Howlett <liam.howlett(a)oracle.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/mmap_lock.h | 7 +++++++ mm/mmap_lock.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) --- a/include/linux/mmap_lock.h~mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped-v3 +++ a/include/linux/mmap_lock.h @@ -155,6 +155,10 @@ static inline void vma_refcount_put(stru * reused and attached to a different mm before we lock it. * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, struct vm_area_struct *vma) @@ -194,9 +198,12 @@ static inline struct vm_area_struct *vma if (unlikely(vma->vm_mm != mm)) { /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ struct mm_struct *other_mm = vma->vm_mm; + /* * __mmdrop() is a heavy operation and we don't need RCU * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. */ rcu_read_unlock(); mmgrab(other_mm); --- a/mm/mmap_lock.c~mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped-v3 +++ a/mm/mmap_lock.c @@ -235,7 +235,7 @@ retry: goto fallback; } - /* Verify the vma is not behind of the last search position. */ + /* Verify the vma is not behind the last search position. */ if (unlikely(from_addr >= vma->vm_end)) goto fallback_unlock; _ Patches currently in -mm which might be from surenb(a)google.com are mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped.patch mm-fix-a-uaf-when-vma-mm-is-freed-after-vma-vm_refcnt-got-dropped-v3.patch

1 month

1
0
0 0

[PATCH v5.10] btrfs: fix deadlock when cloning inline extents and using qgroups

by Shivani Agarwal

From: Filipe Manana <fdmanana(a)suse.com> commit f9baa501b4fd6962257853d46ddffbc21f27e344 upstream. There are a few exceptional cases where cloning an inline extent needs to copy the inline extent data into a page of the destination inode. When this happens, we end up starting a transaction while having a dirty page for the destination inode and while having the range locked in the destination's inode iotree too. Because when reserving metadata space for a transaction we may need to flush existing delalloc in case there is not enough free space, we have a mechanism in place to prevent a deadlock, which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when cloning inline extent and low on free metadata space"). However when using qgroups, a transaction also reserves metadata qgroup space, which can also result in flushing delalloc in case there is not enough available space at the moment. When this happens we deadlock, since flushing delalloc requires locking the file range in the inode's iotree and the range was already locked at the very beginning of the clone operation, before attempting to start the transaction. When this issue happens, stack traces like the following are reported: [72747.556262] task:kworker/u81:9 state:D stack: 0 pid: 225 ppid: 2 flags:0x00004000 [72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142) [72747.556271] Call Trace: [72747.556273] __schedule+0x296/0x760 [72747.556277] schedule+0x3c/0xa0 [72747.556279] io_schedule+0x12/0x40 [72747.556284] __lock_page+0x13c/0x280 [72747.556287] ? generic_file_readonly_mmap+0x70/0x70 [72747.556325] extent_write_cache_pages+0x22a/0x440 [btrfs] [72747.556331] ? __set_page_dirty_nobuffers+0xe7/0x160 [72747.556358] ? set_extent_buffer_dirty+0x5e/0x80 [btrfs] [72747.556362] ? update_group_capacity+0x25/0x210 [72747.556366] ? cpumask_next_and+0x1a/0x20 [72747.556391] extent_writepages+0x44/0xa0 [btrfs] [72747.556394] do_writepages+0x41/0xd0 [72747.556398] __writeback_single_inode+0x39/0x2a0 [72747.556403] writeback_sb_inodes+0x1ea/0x440 [72747.556407] __writeback_inodes_wb+0x5f/0xc0 [72747.556410] wb_writeback+0x235/0x2b0 [72747.556414] ? get_nr_inodes+0x35/0x50 [72747.556417] wb_workfn+0x354/0x490 [72747.556420] ? newidle_balance+0x2c5/0x3e0 [72747.556424] process_one_work+0x1aa/0x340 [72747.556426] worker_thread+0x30/0x390 [72747.556429] ? create_worker+0x1a0/0x1a0 [72747.556432] kthread+0x116/0x130 [72747.556435] ? kthread_park+0x80/0x80 [72747.556438] ret_from_fork+0x1f/0x30 [72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs] [72747.566961] Call Trace: [72747.566964] __schedule+0x296/0x760 [72747.566968] ? finish_wait+0x80/0x80 [72747.566970] schedule+0x3c/0xa0 [72747.566995] wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs] [72747.566999] ? finish_wait+0x80/0x80 [72747.567024] lock_extent_bits+0x37/0x90 [btrfs] [72747.567047] btrfs_invalidatepage+0x299/0x2c0 [btrfs] [72747.567051] ? find_get_pages_range_tag+0x2cd/0x380 [72747.567076] __extent_writepage+0x203/0x320 [btrfs] [72747.567102] extent_write_cache_pages+0x2bb/0x440 [btrfs] [72747.567106] ? update_load_avg+0x7e/0x5f0 [72747.567109] ? enqueue_entity+0xf4/0x6f0 [72747.567134] extent_writepages+0x44/0xa0 [btrfs] [72747.567137] ? enqueue_task_fair+0x93/0x6f0 [72747.567140] do_writepages+0x41/0xd0 [72747.567144] __filemap_fdatawrite_range+0xc7/0x100 [72747.567167] btrfs_run_delalloc_work+0x17/0x40 [btrfs] [72747.567195] btrfs_work_helper+0xc2/0x300 [btrfs] [72747.567200] process_one_work+0x1aa/0x340 [72747.567202] worker_thread+0x30/0x390 [72747.567205] ? create_worker+0x1a0/0x1a0 [72747.567208] kthread+0x116/0x130 [72747.567211] ? kthread_park+0x80/0x80 [72747.567214] ret_from_fork+0x1f/0x30 [72747.569686] task:fsstress state:D stack: 0 pid:841421 ppid:841417 flags:0x00000000 [72747.569689] Call Trace: [72747.569691] __schedule+0x296/0x760 [72747.569694] schedule+0x3c/0xa0 [72747.569721] try_flush_qgroup+0x95/0x140 [btrfs] [72747.569725] ? finish_wait+0x80/0x80 [72747.569753] btrfs_qgroup_reserve_data+0x34/0x50 [btrfs] [72747.569781] btrfs_check_data_free_space+0x5f/0xa0 [btrfs] [72747.569804] btrfs_buffered_write+0x1f7/0x7f0 [btrfs] [72747.569810] ? path_lookupat.isra.48+0x97/0x140 [72747.569833] btrfs_file_write_iter+0x81/0x410 [btrfs] [72747.569836] ? __kmalloc+0x16a/0x2c0 [72747.569839] do_iter_readv_writev+0x160/0x1c0 [72747.569843] do_iter_write+0x80/0x1b0 [72747.569847] vfs_writev+0x84/0x140 [72747.569869] ? btrfs_file_llseek+0x38/0x270 [btrfs] [72747.569873] do_writev+0x65/0x100 [72747.569876] do_syscall_64+0x33/0x40 [72747.569879] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [72747.569899] task:fsstress state:D stack: 0 pid:841424 ppid:841417 flags:0x00004000 [72747.569903] Call Trace: [72747.569906] __schedule+0x296/0x760 [72747.569909] schedule+0x3c/0xa0 [72747.569936] try_flush_qgroup+0x95/0x140 [btrfs] [72747.569940] ? finish_wait+0x80/0x80 [72747.569967] __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs] [72747.569989] start_transaction+0x279/0x580 [btrfs] [72747.570014] clone_copy_inline_extent+0x332/0x490 [btrfs] [72747.570041] btrfs_clone+0x5b7/0x7a0 [btrfs] [72747.570068] ? lock_extent_bits+0x64/0x90 [btrfs] [72747.570095] btrfs_clone_files+0xfc/0x150 [btrfs] [72747.570122] btrfs_remap_file_range+0x3d8/0x4a0 [btrfs] [72747.570126] do_clone_file_range+0xed/0x200 [72747.570131] vfs_clone_file_range+0x37/0x110 [72747.570134] ioctl_file_clone+0x7d/0xb0 [72747.570137] do_vfs_ioctl+0x138/0x630 [72747.570140] __x64_sys_ioctl+0x62/0xc0 [72747.570143] do_syscall_64+0x33/0x40 [72747.570146] entry_SYSCALL_64_after_hwframe+0x44/0xa9 So fix this by skipping the flush of delalloc for an inode that is flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under such a special case of cloning an inline extent, when flushing delalloc during qgroup metadata reservation. The special cases for cloning inline extents were added in kernel 5.7 by by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for inline extents"), while having qgroup metadata space reservation flushing delalloc when low on space was added in kernel 5.9 by commit c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable kernel backports. Reported-by: Wang Yugui <wangyugui(a)e16-tech.com> Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.c… Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT") CC: stable(a)vger.kernel.org # 5.9+ Reviewed-by: Qu Wenruo <wqu(a)suse.com> Signed-off-by: Filipe Manana <fdmanana(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> [Shivani: Modified to apply on 5.10.y, Passed false to btrfs_start_delalloc_flush() in fs/btrfs/transaction.c file to maintain the default behaviour] Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com> --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 4 ++-- fs/btrfs/ioctl.c | 2 +- fs/btrfs/qgroup.c | 2 +- fs/btrfs/send.c | 4 ++-- fs/btrfs/transaction.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7ad3091db571..d9d6a57acafe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3013,7 +3013,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct inode *inode, u64 new_size, u32 min_type); -int btrfs_start_delalloc_snapshot(struct btrfs_root *root); +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8d7ca8a21525..99aad39fad13 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9566,7 +9566,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, return ret; } -int btrfs_start_delalloc_snapshot(struct btrfs_root *root) +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context) { struct writeback_control wbc = { .nr_to_write = LONG_MAX, @@ -9579,7 +9579,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &wbc, true, false); + return start_delalloc_inodes(root, &wbc, true, in_reclaim_context); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 24c4d059cfab..9d5dfcec22de 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1030,7 +1030,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, */ btrfs_drew_read_lock(&root->snapshot_lock); - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) goto out; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 95a39d535a82..bc1feb97698c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3704,7 +3704,7 @@ static int try_flush_qgroup(struct btrfs_root *root) return 0; } - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, true); if (ret < 0) goto out; btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 3e7bb24eb227..d86b4d13cae4 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7207,7 +7207,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) int i; if (root) { - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); @@ -7215,7 +7215,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) for (i = 0; i < sctx->clone_roots_cnt; i++) { root = sctx->clone_roots[i].root; - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 21a5a963c70e..424b1dd3fe27 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2045,7 +2045,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans) list_for_each_entry(pending, head, list) { int ret; - ret = btrfs_start_delalloc_snapshot(pending->root); + ret = btrfs_start_delalloc_snapshot(pending->root, false); if (ret) return ret; } -- 2.40.4

1 month

2
1
0 0

[PATCH 6.12.y v2] mm/vmalloc: fix data race in show_numa_info()

by Jeongjun Park

commit 5c5f0468d172ddec2e333d738d2a1f85402cf0bc upstream. The following data-race was found in show_numa_info(): ================================================================== BUG: KCSAN: data-race in vmalloc_info_show / vmalloc_info_show read to 0xffff88800971fe30 of 4 bytes by task 8289 on cpu 0: show_numa_info mm/vmalloc.c:4936 [inline] vmalloc_info_show+0x5a8/0x7e0 mm/vmalloc.c:5016 seq_read_iter+0x373/0xb40 fs/seq_file.c:230 proc_reg_read_iter+0x11e/0x170 fs/proc/inode.c:299 .... write to 0xffff88800971fe30 of 4 bytes by task 8287 on cpu 1: show_numa_info mm/vmalloc.c:4934 [inline] vmalloc_info_show+0x38f/0x7e0 mm/vmalloc.c:5016 seq_read_iter+0x373/0xb40 fs/seq_file.c:230 proc_reg_read_iter+0x11e/0x170 fs/proc/inode.c:299 .... value changed: 0x0000008f -> 0x00000000 ================================================================== According to this report,there is a read/write data-race because m->private is accessible to multiple CPUs. To fix this, instead of allocating the heap in proc_vmalloc_init() and passing the heap address to m->private, vmalloc_info_show() should allocate the heap. Link: https://lkml.kernel.org/r/20250508165620.15321-1-aha310510@gmail.com Fixes: 8e1d743f2c26 ("mm: vmalloc: support multiple nodes in vmallocinfo") Signed-off-by: Jeongjun Park <aha310510(a)gmail.com> Suggested-by: Eric Dumazet <edumazet(a)google.com> Suggested-by: Andrew Morton <akpm(a)linux-foundation.org> Reviewed-by: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/vmalloc.c | 63 +++++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index cc04e501b1c5..7888600b6a79 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3095,7 +3095,7 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm) /* * Before removing VM_UNINITIALIZED, * we should make sure that vm has proper values. - * Pair with smp_rmb() in show_numa_info(). + * Pair with smp_rmb() in vread_iter() and vmalloc_info_show(). */ smp_wmb(); vm->flags &= ~VM_UNINITIALIZED; @@ -4938,28 +4938,29 @@ bool vmalloc_dump_obj(void *object) #endif #ifdef CONFIG_PROC_FS -static void show_numa_info(struct seq_file *m, struct vm_struct *v) -{ - if (IS_ENABLED(CONFIG_NUMA)) { - unsigned int nr, *counters = m->private; - unsigned int step = 1U << vm_area_page_order(v); - if (!counters) - return; +/* + * Print number of pages allocated on each memory node. + * + * This function can only be called if CONFIG_NUMA is enabled + * and VM_UNINITIALIZED bit in v->flags is disabled. + */ +static void show_numa_info(struct seq_file *m, struct vm_struct *v, + unsigned int *counters) +{ + unsigned int nr; + unsigned int step = 1U << vm_area_page_order(v); - if (v->flags & VM_UNINITIALIZED) - return; - /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ - smp_rmb(); + if (!counters) + return; - memset(counters, 0, nr_node_ids * sizeof(unsigned int)); + memset(counters, 0, nr_node_ids * sizeof(unsigned int)); - for (nr = 0; nr < v->nr_pages; nr += step) - counters[page_to_nid(v->pages[nr])] += step; - for_each_node_state(nr, N_HIGH_MEMORY) - if (counters[nr]) - seq_printf(m, " N%u=%u", nr, counters[nr]); - } + for (nr = 0; nr < v->nr_pages; nr += step) + counters[page_to_nid(v->pages[nr])] += step; + for_each_node_state(nr, N_HIGH_MEMORY) + if (counters[nr]) + seq_printf(m, " N%u=%u", nr, counters[nr]); } static void show_purge_info(struct seq_file *m) @@ -4987,6 +4988,10 @@ static int vmalloc_info_show(struct seq_file *m, void *p) struct vmap_area *va; struct vm_struct *v; int i; + unsigned int *counters; + + if (IS_ENABLED(CONFIG_NUMA)) + counters = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); for (i = 0; i < nr_vmap_nodes; i++) { vn = &vmap_nodes[i]; @@ -5003,6 +5008,11 @@ static int vmalloc_info_show(struct seq_file *m, void *p) } v = va->vm; + if (v->flags & VM_UNINITIALIZED) + continue; + + /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ + smp_rmb(); seq_printf(m, "0x%pK-0x%pK %7ld", v->addr, v->addr + v->size, v->size); @@ -5037,7 +5047,9 @@ static int vmalloc_info_show(struct seq_file *m, void *p) if (is_vmalloc_addr(v->pages)) seq_puts(m, " vpages"); - show_numa_info(m, v); + if (IS_ENABLED(CONFIG_NUMA)) + show_numa_info(m, v, counters); + seq_putc(m, '\n'); } spin_unlock(&vn->busy.lock); @@ -5047,19 +5059,14 @@ static int vmalloc_info_show(struct seq_file *m, void *p) * As a final step, dump "unpurged" areas. */ show_purge_info(m); + if (IS_ENABLED(CONFIG_NUMA)) + kfree(counters); return 0; } static int __init proc_vmalloc_init(void) { - void *priv_data = NULL; - - if (IS_ENABLED(CONFIG_NUMA)) - priv_data = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); - - proc_create_single_data("vmallocinfo", - 0400, NULL, vmalloc_info_show, priv_data); - + proc_create_single("vmallocinfo", 0400, NULL, vmalloc_info_show); return 0; } module_init(proc_vmalloc_init); --

1 month

3
4
0 0

[PATCH 6.12.y] KVM: x86: Free vCPUs before freeing VM state

by Kevin Cheng

From: Sean Christopherson <seanjc(a)google.com> [ Upstream commit 17bcd714426386fda741a4bccd96a2870179344b ] Free vCPUs before freeing any VM state, as both SVM and VMX may access VM state when "freeing" a vCPU that is currently "in" L2, i.e. that needs to be kicked out of nested guest mode. Commit 6fcee03df6a1 ("KVM: x86: avoid loading a vCPU after .vm_destroy was called") partially fixed the issue, but for unknown reasons only moved the MMU unloading before VM destruction. Complete the change, and free all vCPU state prior to destroying VM state, as nVMX accesses even more state than nSVM. In addition to the AVIC, KVM can hit a use-after-free on MSR filters: kvm_msr_allowed+0x4c/0xd0 __kvm_set_msr+0x12d/0x1e0 kvm_set_msr+0x19/0x40 load_vmcs12_host_state+0x2d8/0x6e0 [kvm_intel] nested_vmx_vmexit+0x715/0xbd0 [kvm_intel] nested_vmx_free_vcpu+0x33/0x50 [kvm_intel] vmx_free_vcpu+0x54/0xc0 [kvm_intel] kvm_arch_vcpu_destroy+0x28/0xf0 kvm_vcpu_destroy+0x12/0x50 kvm_arch_destroy_vm+0x12c/0x1c0 kvm_put_kvm+0x263/0x3c0 kvm_vm_release+0x21/0x30 and an upcoming fix to process injectable interrupts on nested VM-Exit will access the PIC: BUG: kernel NULL pointer dereference, address: 0000000000000090 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 23 UID: 1000 PID: 2658 Comm: kvm-nx-lpage-re RIP: 0010:kvm_cpu_has_extint+0x2f/0x60 [kvm] Call Trace: <TASK> kvm_cpu_has_injectable_intr+0xe/0x60 [kvm] nested_vmx_vmexit+0x2d7/0xdf0 [kvm_intel] nested_vmx_free_vcpu+0x40/0x50 [kvm_intel] vmx_vcpu_free+0x2d/0x80 [kvm_intel] kvm_arch_vcpu_destroy+0x2d/0x130 [kvm] kvm_destroy_vcpus+0x8a/0x100 [kvm] kvm_arch_destroy_vm+0xa7/0x1d0 [kvm] kvm_destroy_vm+0x172/0x300 [kvm] kvm_vcpu_release+0x31/0x50 [kvm] Inarguably, both nSVM and nVMX need to be fixed, but punt on those cleanups for the moment. Conceptually, vCPUs should be freed before VM state. Assets like the I/O APIC and PIC _must_ be allocated before vCPUs are created, so it stands to reason that they must be freed _after_ vCPUs are destroyed. Reported-by: Aaron Lewis <aaronlewis(a)google.com> Closes: https://lore.kernel.org/all/20240703175618.2304869-2-aaronlewis@google.com Cc: Jim Mattson <jmattson(a)google.com> Cc: Yan Zhao <yan.y.zhao(a)intel.com> Cc: Rick P Edgecombe <rick.p.edgecombe(a)intel.com> Cc: Kai Huang <kai.huang(a)intel.com> Cc: Isaku Yamahata <isaku.yamahata(a)intel.com> Signed-off-by: Sean Christopherson <seanjc(a)google.com> Signed-off-by: Kevin Cheng <chengkev(a)google.com> --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f378d479fea3f..7f91b11e6f0ec 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12888,11 +12888,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } kvm_unload_vcpu_mmus(kvm); + kvm_destroy_vcpus(kvm); kvm_x86_call(vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); - kvm_destroy_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); -- 2.50.1.470.g6ba607880d-goog

1 month

3
5
0 0

[PATCH 6.6.y] mm: khugepaged: fix call hpage_collapse_scan_file() for anonymous vma

by Jakub Acs

From: Liu Shixin <liushixin2(a)huawei.com> commit f1897f2f08b28ae59476d8b73374b08f856973af upstream. syzkaller reported such a BUG_ON(): ------------[ cut here ]------------ kernel BUG at mm/khugepaged.c:1835! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... CPU: 6 UID: 0 PID: 8009 Comm: syz.15.106 Kdump: loaded Tainted: G W 6.13.0-rc6 #22 Tainted: [W]=WARN Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : collapse_file+0xa44/0x1400 lr : collapse_file+0x88/0x1400 sp : ffff80008afe3a60 ... Call trace: collapse_file+0xa44/0x1400 (P) hpage_collapse_scan_file+0x278/0x400 madvise_collapse+0x1bc/0x678 madvise_vma_behavior+0x32c/0x448 madvise_walk_vmas.constprop.0+0xbc/0x140 do_madvise.part.0+0xdc/0x2c8 __arm64_sys_madvise+0x68/0x88 invoke_syscall+0x50/0x120 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0x128 el0t_64_sync_handler+0xc8/0xd0 el0t_64_sync+0x190/0x198 This indicates that the pgoff is unaligned. After analysis, I confirm the vma is mapped to /dev/zero. Such a vma certainly has vm_file, but it is set to anonymous by mmap_zero(). So even if it's mmapped by 2m-unaligned, it can pass the check in thp_vma_allowable_order() as it is an anonymous-mmap, but then be collapsed as a file-mmap. It seems the problem has existed for a long time, but actually, since we have khugepaged_max_ptes_none check before, we will skip collapse it as it is /dev/zero and so has no present page. But commit d8ea7cc8547c limit the check for only khugepaged, so the BUG_ON() can be triggered by madvise_collapse(). Add vma_is_anonymous() check to make such vma be processed by hpage_collapse_scan_pmd(). Link: https://lkml.kernel.org/r/20250111034511.2223353-1-liushixin2@huawei.com Fixes: d8ea7cc8547c ("mm/khugepaged: add flag to predicate khugepaged-only behavior") Signed-off-by: Liu Shixin <liushixin2(a)huawei.com> Reviewed-by: Yang Shi <yang(a)os.amperecomputing.com> Acked-by: David Hildenbrand <david(a)redhat.com> Cc: Chengming Zhou <chengming.zhou(a)linux.dev> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Mattew Wilcox <willy(a)infradead.org> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Nanyong Sun <sunnanyong(a)huawei.com> Cc: Qi Zheng <zhengqi.arch(a)bytedance.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> [acsjakub: backport, clean apply] Cc: Jakub Acs <acsjakub(a)amazon.de> Cc: linux-mm(a)kvack.org --- Ran into the crash with syzkaller, backporting this patch works - the reproducer no longer crashes. Please let me know if there was a reason not to backport. mm/khugepaged.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b538c3d48386..abd5764e4864 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2404,7 +2404,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, VM_BUG_ON(khugepaged_scan.address < hstart || khugepaged_scan.address + HPAGE_PMD_SIZE > hend); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, khugepaged_scan.address); @@ -2750,7 +2750,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); nodes_clear(cc->alloc_nmask); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597

1 month

2
1
0 0

[PATCH 6.1.y] mm: khugepaged: fix call hpage_collapse_scan_file() for anonymous vma

by Jakub Acs

From: Liu Shixin <liushixin2(a)huawei.com> commit f1897f2f08b28ae59476d8b73374b08f856973af upstream. syzkaller reported such a BUG_ON(): ------------[ cut here ]------------ kernel BUG at mm/khugepaged.c:1835! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... CPU: 6 UID: 0 PID: 8009 Comm: syz.15.106 Kdump: loaded Tainted: G W 6.13.0-rc6 #22 Tainted: [W]=WARN Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : collapse_file+0xa44/0x1400 lr : collapse_file+0x88/0x1400 sp : ffff80008afe3a60 ... Call trace: collapse_file+0xa44/0x1400 (P) hpage_collapse_scan_file+0x278/0x400 madvise_collapse+0x1bc/0x678 madvise_vma_behavior+0x32c/0x448 madvise_walk_vmas.constprop.0+0xbc/0x140 do_madvise.part.0+0xdc/0x2c8 __arm64_sys_madvise+0x68/0x88 invoke_syscall+0x50/0x120 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0x128 el0t_64_sync_handler+0xc8/0xd0 el0t_64_sync+0x190/0x198 This indicates that the pgoff is unaligned. After analysis, I confirm the vma is mapped to /dev/zero. Such a vma certainly has vm_file, but it is set to anonymous by mmap_zero(). So even if it's mmapped by 2m-unaligned, it can pass the check in thp_vma_allowable_order() as it is an anonymous-mmap, but then be collapsed as a file-mmap. It seems the problem has existed for a long time, but actually, since we have khugepaged_max_ptes_none check before, we will skip collapse it as it is /dev/zero and so has no present page. But commit d8ea7cc8547c limit the check for only khugepaged, so the BUG_ON() can be triggered by madvise_collapse(). Add vma_is_anonymous() check to make such vma be processed by hpage_collapse_scan_pmd(). Link: https://lkml.kernel.org/r/20250111034511.2223353-1-liushixin2@huawei.com Fixes: d8ea7cc8547c ("mm/khugepaged: add flag to predicate khugepaged-only behavior") Signed-off-by: Liu Shixin <liushixin2(a)huawei.com> Reviewed-by: Yang Shi <yang(a)os.amperecomputing.com> Acked-by: David Hildenbrand <david(a)redhat.com> Cc: Chengming Zhou <chengming.zhou(a)linux.dev> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Mattew Wilcox <willy(a)infradead.org> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Nanyong Sun <sunnanyong(a)huawei.com> Cc: Qi Zheng <zhengqi.arch(a)bytedance.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> [acsjakub: backport, clean apply] Cc: Jakub Acs <acsjakub(a)amazon.de> Cc: linux-mm(a)kvack.org --- Ran into the crash with syzkaller, backporting this patch works - the reproducer no longer crashes. Please let me know if there was a reason not to backport. mm/khugepaged.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b538c3d48386..abd5764e4864 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2404,7 +2404,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result, VM_BUG_ON(khugepaged_scan.address < hstart || khugepaged_scan.address + HPAGE_PMD_SIZE > hend); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, khugepaged_scan.address); @@ -2750,7 +2750,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); nodes_clear(cc->alloc_nmask); - if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { + if (IS_ENABLED(CONFIG_SHMEM) && !vma_is_anonymous(vma)) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597

1 month

2
1
0 0

[PATCH v5.10] block: don't call rq_qos_ops->done_bio if the bio isn't tracked

by Shivani Agarwal

From: Ming Lei <ming.lei(a)redhat.com> [ Upstream commit a647a524a46736786c95cdb553a070322ca096e3 ] rq_qos framework is only applied on request based driver, so: 1) rq_qos_done_bio() needn't to be called for bio based driver 2) rq_qos_done_bio() needn't to be called for bio which isn't tracked, such as bios ended from error handling code. Especially in bio_endio(): 1) request queue is referred via bio->bi_bdev->bd_disk->queue, which may be gone since request queue refcount may not be held in above two cases 2) q->rq_qos may be freed in blk_cleanup_queue() when calling into __rq_qos_done_bio() Fix the potential kernel panic by not calling rq_qos_ops->done_bio if the bio isn't tracked. This way is safe because both ioc_rqos_done_bio() and blkcg_iolatency_done_bio() are nop if the bio isn't tracked. Reported-by: Yu Kuai <yukuai3(a)huawei.com> Cc: tj(a)kernel.org Signed-off-by: Ming Lei <ming.lei(a)redhat.com> Reviewed-by: Christoph Hellwig <hch(a)lst.de> Acked-by: Tejun Heo <tj(a)kernel.org> Link: https://lore.kernel.org/r/20210924110704.1541818-1-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe(a)kernel.dk> Signed-off-by: Sasha Levin <sashal(a)kernel.org> [Shivani: Modified to apply on 5.10.y] Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com> --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 88a09c31095f..7851f54edc76 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1430,7 +1430,7 @@ void bio_endio(struct bio *bio) if (!bio_integrity_endio(bio)) return; - if (bio->bi_disk) + if (bio->bi_disk && bio_flagged(bio, BIO_TRACKED)) rq_qos_done_bio(bio->bi_disk->queue, bio); /* -- 2.40.4

1 month

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror