The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 27834971f616c5e154423c578fa95e0444444ce1
Gitweb: https://git.kernel.org/tip/27834971f616c5e154423c578fa95e0444444ce1
Author: Li RongQing <lirongqing(a)baidu.com>
AuthorDate: Wed, 19 Jun 2024 19:18:01 +08:00
Committer: Ingo Molnar <mingo(a)kernel.org>
CommitterDate: Sun, 29 Dec 2024 10:18:44 +01:00
virt: tdx-guest: Just leak decrypted memory on unrecoverable errors
In CoCo VMs it is possible for the untrusted host to cause
set_memory_decrypted() to fail such that an error is returned
and the resulting memory is shared. Callers need to take care
to handle these errors to avoid returning decrypted (shared)
memory to the page allocator, which could lead to functional
or security issues.
Leak the decrypted memory when set_memory_decrypted() fails,
and don't need to print an error since set_memory_decrypted()
will call WARN_ONCE().
Fixes: f4738f56d1dc ("virt: tdx-guest: Add Quote generation support using TSM_REPORTS")
Signed-off-by: Li RongQing <lirongqing(a)baidu.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Rick Edgecombe <rick.p.edgecombe(a)intel.com>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20240619111801.25630-1-lirongqing%40baidu.com
---
drivers/virt/coco/tdx-guest/tdx-guest.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c
index d7db6c8..224e7dd 100644
--- a/drivers/virt/coco/tdx-guest/tdx-guest.c
+++ b/drivers/virt/coco/tdx-guest/tdx-guest.c
@@ -124,10 +124,8 @@ static void *alloc_quote_buf(void)
if (!addr)
return NULL;
- if (set_memory_decrypted((unsigned long)addr, count)) {
- free_pages_exact(addr, len);
+ if (set_memory_decrypted((unsigned long)addr, count))
return NULL;
- }
return addr;
}
From: Chuck Lever <chuck.lever(a)oracle.com>
Testing shows that the EBUSY error return from mtree_alloc_cyclic()
leaks into user space. The ERRORS section of "man creat(2)" says:
> EBUSY O_EXCL was specified in flags and pathname refers
> to a block device that is in use by the system
> (e.g., it is mounted).
ENOSPC is closer to what applications expect in this situation.
Note that the normal range of simple directory offset values is
2..2^63, so hitting this error is going to be rare to impossible.
Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
Cc: <stable(a)vger.kernel.org> # v6.9+
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Reviewed-by: Yang Erkun <yangerkun(a)huawei.com>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/libfs.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/libfs.c b/fs/libfs.c
index 748ac5923154..3da58a92f48f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -292,8 +292,8 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
LONG_MAX, &octx->next_offset, GFP_KERNEL);
- if (ret < 0)
- return ret;
+ if (unlikely(ret < 0))
+ return ret == -EBUSY ? -ENOSPC : ret;
offset_set(dentry, offset);
return 0;
--
2.47.0
>
>
>> 2024年12月25日 13:37,Kun Hu <huk23(a)m.fudan.edu.cn> 写道:
>>
>> Hello,
>>
>>> BUG: KASAN: slab-out-of-bounds in snd_seq_oss_synth_sysex+0x5d1/0x6c0 sound/core/seq/oss/seq_oss_synth.c:516
>>
>> We further analyzed the issue at line 516 in ./sound/core/seq/oss/seq_oss_synth.c.
>> The slab-out-of-bounds crash occurs in line 509, when sysex->len = 128. Specifically, the write operation to dest[0] accesses memory beyond the bounds of sysex->buf (128 byte).
>> To resolve this issue, we suggest adding 6 lines of code to validate the legality of the address write to sysex->buf before entering the loop:
>>
>> if (sysex->len >= MAX_SYSEX_BUFLEN) {
>> sysex->len = 0;
>> sysex->skip = 1;
>> return -EINVAL; /* Exit early if sysex->len is out of bounds */
>> }
>>
>> If you fix this issue, please add the following tag to the commit:
>> Reported-by: Kun Hu <huk23(a)m.fudan.edu.cn>
>>
>> —————
>> Thanks,
>> Kun Hu
>>
>>> 2024年12月24日 19:16,Kun Hu <huk23(a)m.fudan.edu.cn> 写道:
>>>
>>> Hello,
>>>
>>> When using fuzzer tool to fuzz the latest Linux kernel, the following crash
>>> was triggered.
>>>
>>> HEAD commit: 78d4f34e2115b517bcbfe7ec0d018bbbb6f9b0b8
>>> git tree: upstream
>>> Console output:https://drive.google.com/file/d/17oCyKDW_kNhSW5Bbvm23vnpD1eo0MHFi/vi…
>>> Kernel config: https://drive.google.com/file/d/1RhT5dFTs6Vx1U71PbpenN7TPtnPoa3NI/view?usp=…
>>> C reproducer: https://drive.google.com/file/d/177HJht6a7-6F3YLudKb_d4kiPGd1VA_i/view?usp=…
>>> Syzlang reproducer: https://drive.google.com/file/d/1AuP5UGGc47rEXXPuvjmCKgJ3d0U1P84j/view?usp=…
>>>
>>>
>>> If you fix this issue, please add the following tag to the commit:
>>> Reported-by: Kun Hu <huk23(a)m.fudan.edu.cn>
>>>
>>> ==================================================================
>>> BUG: KASAN: slab-out-of-bounds in snd_seq_oss_synth_sysex+0x5d1/0x6c0 sound/core/seq/oss/seq_oss_synth.c:516
>>> Write of size 1 at addr ff1100000588e288 by task syz-executor411/824
>>>
>>> CPU: 2 UID: 0 PID: 824 Comm: syz-executor411 Not tainted 6.13.0-rc3 #5
>>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
>>> Call Trace:
>>> <TASK>
>>> __dump_stack lib/dump_stack.c:94 [inline]
>>> dump_stack_lvl+0x116/0x1b0 lib/dump_stack.c:120
>>> print_address_description mm/kasan/report.c:378 [inline]
>>> print_report+0xcf/0x5f0 mm/kasan/report.c:489
>>> kasan_report+0x93/0xc0 mm/kasan/report.c:602
>>> snd_seq_oss_synth_sysex+0x5d1/0x6c0 sound/core/seq/oss/seq_oss_synth.c:516
>>> snd_seq_oss_process_event+0x46a/0x2620 sound/core/seq/oss/seq_oss_event.c:61
>>> insert_queue sound/core/seq/oss/seq_oss_rw.c:167 [inline]
>>> snd_seq_oss_write+0x261/0x7f0 sound/core/seq/oss/seq_oss_rw.c:135
>>> odev_write+0x53/0xa0 sound/core/seq/oss/seq_oss.c:168
>>> vfs_write fs/read_write.c:677 [inline]
>>> vfs_write+0x2e3/0x10f0 fs/read_write.c:659
>>> ksys_write+0x122/0x240 fs/read_write.c:731
>>> do_syscall_x64 arch/x86/entry/common.c:52 [inline]
>>> do_syscall_64+0xc3/0x1d0 arch/x86/entry/common.c:83
>>> entry_SYSCALL_64_after_hwframe+0x77/0x7f
>>
>
Hello,
Is this issue being considered and is it possible that the value of sysex->len in line 509 of the snd_seq_oss_synth_sysex function could exceed 127 and thus be out of bounds?
———
Best Regards,
Kun Hu
The patch titled
Subject: mm/kmemleak: fix percpu memory leak detection failure
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-kmemleak-fix-percpu-memory-leak-detection-failure.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Guo Weikang <guoweikang.kernel(a)gmail.com>
Subject: mm/kmemleak: fix percpu memory leak detection failure
Date: Fri, 27 Dec 2024 17:23:10 +0800
kmemleak_alloc_percpu gives an incorrect min_count parameter, causing
percpu memory to be considered a gray object.
Link: https://lkml.kernel.org/r/20241227092311.3572500-1-guoweikang.kernel@gmail.…
Fixes: 8c8685928910 ("mm/kmemleak: use IS_ERR_PCPU() for pointer in the percpu address space")
Signed-off-by: Guo Weikang <guoweikang.kernel(a)gmail.com>
Acked-by: Uros Bizjak <ubizjak(a)gmail.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Guo Weikang <guoweikang.kernel(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kmemleak.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/kmemleak.c~mm-kmemleak-fix-percpu-memory-leak-detection-failure
+++ a/mm/kmemleak.c
@@ -1093,7 +1093,7 @@ void __ref kmemleak_alloc_percpu(const v
pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size);
if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr))
- create_object_percpu((__force unsigned long)ptr, size, 0, gfp);
+ create_object_percpu((__force unsigned long)ptr, size, 1, gfp);
}
EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
_
Patches currently in -mm which might be from guoweikang.kernel(a)gmail.com are
mm-kmemleak-fix-percpu-memory-leak-detection-failure.patch
mm-shmem-refactor-to-reuse-vfs_parse_monolithic_sep-for-option-parsing.patch
mm-early_ioremap-add-null-pointer-checks-to-prevent-null-pointer-dereference.patch
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 11e6831fd81468cf48155b9b3c11295c391da723
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122708-spud-properly-0780@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 11e6831fd81468cf48155b9b3c11295c391da723 Mon Sep 17 00:00:00 2001
From: Maciej Andrzejewski <maciej.andrzejewski(a)m-works.net>
Date: Mon, 2 Dec 2024 19:58:36 +0100
Subject: [PATCH] mtd: rawnand: arasan: Fix missing de-registration of NAND
The NAND chip-selects are registered for the Arasan driver during
initialization but are not de-registered when the driver is unloaded. As a
result, if the driver is loaded again, the chip-selects remain registered
and busy, making them unavailable for use.
Fixes: 197b88fecc50 ("mtd: rawnand: arasan: Add new Arasan NAND controller")
Cc: stable(a)vger.kernel.org
Signed-off-by: Maciej Andrzejewski ICEYE <maciej.andrzejewski(a)m-works.net>
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c
index 26b506107a1a..865754737f5f 100644
--- a/drivers/mtd/nand/raw/arasan-nand-controller.c
+++ b/drivers/mtd/nand/raw/arasan-nand-controller.c
@@ -1478,8 +1478,15 @@ static int anfc_probe(struct platform_device *pdev)
static void anfc_remove(struct platform_device *pdev)
{
+ int i;
struct arasan_nfc *nfc = platform_get_drvdata(pdev);
+ for (i = 0; i < nfc->ncs; i++) {
+ if (nfc->cs_array[i]) {
+ gpiod_put(nfc->cs_array[i]);
+ }
+ }
+
anfc_chips_cleanup(nfc);
}
since updating from 6.12.5-100.fc40.x86_64 to 6.12.6-100.fc40.x86_64
I am getting many, many:
kernel: r8169 0000:01:00.0 enp1s0: Rx ERROR. status = 352ac5ee
and some:
kernel: r8169 0000:01:00.0 enp1s0: NETDEV WATCHDOG: CPU: 3:
transmit queue 0 timed out 5376 ms
kernel: r8169 0000:01:00.0: can't disable ASPM; OS doesn't have ASPM control
Thanks!
Mike Eddy
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f0e870a0e9c5521f2952ea9f3ea9d3d122631a89
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122729-array-crusader-3062@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f0e870a0e9c5521f2952ea9f3ea9d3d122631a89 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Date: Mon, 4 Nov 2024 11:50:50 +0200
Subject: [PATCH] dmaengine: dw: Select only supported masters for ACPI devices
The recently submitted fix-commit revealed a problem in the iDMA 32-bit
platform code. Even though the controller supported only a single master
the dw_dma_acpi_filter() method hard-coded two master interfaces with IDs
0 and 1. As a result the sanity check implemented in the commit
b336268dde75 ("dmaengine: dw: Add peripheral bus width verification")
got incorrect interface data width and thus prevented the client drivers
from configuring the DMA-channel with the EINVAL error returned. E.g.,
the next error was printed for the PXA2xx SPI controller driver trying
to configure the requested channels:
> [ 164.525604] pxa2xx_spi_pci 0000:00:07.1: DMA slave config failed
> [ 164.536105] pxa2xx_spi_pci 0000:00:07.1: failed to get DMA TX descriptor
> [ 164.543213] spidev spi-SPT0001:00: SPI transfer failed: -16
The problem would have been spotted much earlier if the iDMA 32-bit
controller supported more than one master interfaces. But since it
supports just a single master and the iDMA 32-bit specific code just
ignores the master IDs in the CTLLO preparation method, the issue has
been gone unnoticed so far.
Fix the problem by specifying the default master ID for both memory
and peripheral devices in the driver data. Thus the issue noticed for
the iDMA 32-bit controllers will be eliminated and the ACPI-probed
DW DMA controllers will be configured with the correct master ID by
default.
Cc: stable(a)vger.kernel.org
Fixes: b336268dde75 ("dmaengine: dw: Add peripheral bus width verification")
Fixes: 199244d69458 ("dmaengine: dw: add support of iDMA 32-bit hardware")
Reported-by: Ferry Toth <fntoth(a)gmail.com>
Closes: https://lore.kernel.org/dmaengine/ZuXbCKUs1iOqFu51@black.fi.intel.com/
Reported-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Closes: https://lore.kernel.org/dmaengine/ZuXgI-VcHpMgbZ91@black.fi.intel.com/
Tested-by: Ferry Toth <fntoth(a)gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Link: https://lore.kernel.org/r/20241104095142.157925-1-andriy.shevchenko@linux.i…
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/dma/dw/acpi.c b/drivers/dma/dw/acpi.c
index c510c109d2c3..b6452fffa657 100644
--- a/drivers/dma/dw/acpi.c
+++ b/drivers/dma/dw/acpi.c
@@ -8,13 +8,15 @@
static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param)
{
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_dma_chip_pdata *data = dev_get_drvdata(dw->dma.dev);
struct acpi_dma_spec *dma_spec = param;
struct dw_dma_slave slave = {
.dma_dev = dma_spec->dev,
.src_id = dma_spec->slave_id,
.dst_id = dma_spec->slave_id,
- .m_master = 0,
- .p_master = 1,
+ .m_master = data->m_master,
+ .p_master = data->p_master,
};
return dw_dma_filter(chan, &slave);
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 563ce73488db..f1bd06a20cd6 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -51,11 +51,15 @@ struct dw_dma_chip_pdata {
int (*probe)(struct dw_dma_chip *chip);
int (*remove)(struct dw_dma_chip *chip);
struct dw_dma_chip *chip;
+ u8 m_master;
+ u8 p_master;
};
static __maybe_unused const struct dw_dma_chip_pdata dw_dma_chip_pdata = {
.probe = dw_dma_probe,
.remove = dw_dma_remove,
+ .m_master = 0,
+ .p_master = 1,
};
static const struct dw_dma_platform_data idma32_pdata = {
@@ -72,6 +76,8 @@ static __maybe_unused const struct dw_dma_chip_pdata idma32_chip_pdata = {
.pdata = &idma32_pdata,
.probe = idma32_dma_probe,
.remove = idma32_dma_remove,
+ .m_master = 0,
+ .p_master = 0,
};
static const struct dw_dma_platform_data xbar_pdata = {
@@ -88,6 +94,8 @@ static __maybe_unused const struct dw_dma_chip_pdata xbar_chip_pdata = {
.pdata = &xbar_pdata,
.probe = idma32_dma_probe,
.remove = idma32_dma_remove,
+ .m_master = 0,
+ .p_master = 0,
};
#endif /* _DMA_DW_INTERNAL_H */
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index ad2d4d012cf7..e8a0eb81726a 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -56,10 +56,10 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
if (ret)
return ret;
- dw_dma_acpi_controller_register(chip->dw);
-
pci_set_drvdata(pdev, data);
+ dw_dma_acpi_controller_register(chip->dw);
+
return 0;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0a92ea87bdd6f77ca4e17fe19649882cf5209edd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122731-scotch-canning-0796@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0a92ea87bdd6f77ca4e17fe19649882cf5209edd Mon Sep 17 00:00:00 2001
From: Justin Chen <justin.chen(a)broadcom.com>
Date: Thu, 24 Oct 2024 14:35:40 -0700
Subject: [PATCH] phy: usb: Toggle the PHY power during init
When bringing up the PHY, it might be in a bad state if left powered.
One case is we lose the PLL lock if the PLL is gated while the PHY
is powered. Toggle the PHY power so we can start from a known state.
Fixes: 4e5b9c9a73b3 ("phy: usb: Add support for new Synopsys USB controller on the 7216")
Signed-off-by: Justin Chen <justin.chen(a)broadcom.com>
Acked-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Link: https://lore.kernel.org/r/20241024213540.1059412-1-justin.chen@broadcom.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index 950b7ae1d1a8..dc452610934a 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -325,6 +325,12 @@ static void usb_init_common_7216(struct brcm_usb_init_params *params)
void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
USB_CTRL_UNSET(ctrl, USB_PM, XHC_S2_CLK_SWITCH_EN);
+
+ /*
+ * The PHY might be in a bad state if it is already powered
+ * up. Toggle the power just in case.
+ */
+ USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN);
USB_CTRL_UNSET(ctrl, USB_PM, USB_PWRDN);
/* 1 millisecond - for USB clocks to settle down */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0a92ea87bdd6f77ca4e17fe19649882cf5209edd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122729-anemic-selector-ee96@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0a92ea87bdd6f77ca4e17fe19649882cf5209edd Mon Sep 17 00:00:00 2001
From: Justin Chen <justin.chen(a)broadcom.com>
Date: Thu, 24 Oct 2024 14:35:40 -0700
Subject: [PATCH] phy: usb: Toggle the PHY power during init
When bringing up the PHY, it might be in a bad state if left powered.
One case is we lose the PLL lock if the PLL is gated while the PHY
is powered. Toggle the PHY power so we can start from a known state.
Fixes: 4e5b9c9a73b3 ("phy: usb: Add support for new Synopsys USB controller on the 7216")
Signed-off-by: Justin Chen <justin.chen(a)broadcom.com>
Acked-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Link: https://lore.kernel.org/r/20241024213540.1059412-1-justin.chen@broadcom.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index 950b7ae1d1a8..dc452610934a 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -325,6 +325,12 @@ static void usb_init_common_7216(struct brcm_usb_init_params *params)
void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
USB_CTRL_UNSET(ctrl, USB_PM, XHC_S2_CLK_SWITCH_EN);
+
+ /*
+ * The PHY might be in a bad state if it is already powered
+ * up. Toggle the power just in case.
+ */
+ USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN);
USB_CTRL_UNSET(ctrl, USB_PM, USB_PWRDN);
/* 1 millisecond - for USB clocks to settle down */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x c0b82ab95b4f1fbc3e3aeab9d829d012669524b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122759-prune-pavement-a482@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c0b82ab95b4f1fbc3e3aeab9d829d012669524b6 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu(a)quicinc.com>
Date: Fri, 13 Dec 2024 20:36:42 +0800
Subject: [PATCH] phy: core: Fix that API devm_of_phy_provider_unregister()
fails to unregister the phy provider
For devm_of_phy_provider_unregister(), its comment says it needs to invoke
of_phy_provider_unregister() to unregister the phy provider, but it will
not actually invoke the function since devres_destroy() does not call
devm_phy_provider_release(), and the missing of_phy_provider_unregister()
call will cause:
- The phy provider fails to be unregistered.
- Leak both memory and the OF node refcount.
Fortunately, the faulty API has not been used by current kernel tree.
Fix by using devres_release() instead of devres_destroy() within the API.
Fixes: ff764963479a ("drivers: phy: add generic PHY framework")
Reviewed-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
Link: https://lore.kernel.org/stable/20241213-phy_core_fix-v6-2-40ae28f5015a%40qu…
Link: https://lore.kernel.org/r/20241213-phy_core_fix-v6-2-40ae28f5015a@quicinc.c…
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index f190d7126613..de07e1616b34 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -1259,12 +1259,12 @@ EXPORT_SYMBOL_GPL(of_phy_provider_unregister);
* of_phy_provider_unregister to unregister the phy provider.
*/
void devm_of_phy_provider_unregister(struct device *dev,
- struct phy_provider *phy_provider)
+ struct phy_provider *phy_provider)
{
int r;
- r = devres_destroy(dev, devm_phy_provider_release, devm_phy_match,
- phy_provider);
+ r = devres_release(dev, devm_phy_provider_release, devm_phy_match,
+ phy_provider);
dev_WARN_ONCE(dev, r, "couldn't find PHY provider device resource\n");
}
EXPORT_SYMBOL_GPL(devm_of_phy_provider_unregister);
While testing the MMC nodes proposed in [1], it was noted that mmc0/1
would fail to initialize, with "mmc: fatal err update clk timeout" in
the kernel logs. A closer look at the clock definitions showed that the MMC
MPs had the "CLK_SET_RATE_NO_REPARENT" flag set. No reason was given for
adding this flag in the first place, and its original purpose is unknown,
but it doesn't seem to make sense and results in severe limitations to MMC
speeds. Thus, remove this flag from the 3 MMC MPs.
[1] https://msgid.link/20241024170540.2721307-10-masterr3c0rd@epochal.quest
Fixes: fb038ce4db55 ("clk: sunxi-ng: add support for the Allwinner A100 CCU")
Cc: stable(a)vger.kernel.org
Signed-off-by: Cody Eksal <masterr3c0rd(a)epochal.quest>
Reviewed-by: Andre Przywara <andre.przywara(a)arm.com>
---
drivers/clk/sunxi-ng/ccu-sun50i-a100.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a100.c b/drivers/clk/sunxi-ng/ccu-sun50i-a100.c
index bbaa82978716..a59e420b195d 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-a100.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-a100.c
@@ -436,7 +436,7 @@ static SUNXI_CCU_MP_WITH_MUX_GATE_POSTDIV(mmc0_clk, "mmc0", mmc_parents, 0x830,
24, 2, /* mux */
BIT(31), /* gate */
2, /* post-div */
- CLK_SET_RATE_NO_REPARENT);
+ 0);
static SUNXI_CCU_MP_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1", mmc_parents, 0x834,
0, 4, /* M */
@@ -444,7 +444,7 @@ static SUNXI_CCU_MP_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1", mmc_parents, 0x834,
24, 2, /* mux */
BIT(31), /* gate */
2, /* post-div */
- CLK_SET_RATE_NO_REPARENT);
+ 0);
static SUNXI_CCU_MP_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc_parents, 0x838,
0, 4, /* M */
@@ -452,7 +452,7 @@ static SUNXI_CCU_MP_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc_parents, 0x838,
24, 2, /* mux */
BIT(31), /* gate */
2, /* post-div */
- CLK_SET_RATE_NO_REPARENT);
+ 0);
static SUNXI_CCU_GATE(bus_mmc0_clk, "bus-mmc0", "ahb3", 0x84c, BIT(0), 0);
static SUNXI_CCU_GATE(bus_mmc1_clk, "bus-mmc1", "ahb3", 0x84c, BIT(1), 0);
--
2.47.0
The CLKOUTn may be fed from PLL1/2/3, but the PLL1/2/3 has to be enabled
first by setting PLL_CLKE bit 11 in CCM_ANALOG_SYS_PLLn_GEN_CTRL register.
The CCM_ANALOG_SYS_PLLn_GEN_CTRL bit 11 is modeled by plln_out clock. Fix
the clock tree and place the clkout1/2 under plln_sel instead of plain plln
to let the clock subsystem correctly control the bit 11 and enable the PLL
in case the CLKOUTn is supplied by PLL1/2/3.
Fixes: 43896f56b59e ("clk: imx8mp: add clkout1/2 support")
Signed-off-by: Marek Vasut <marex(a)denx.de>
---
Cc: Abel Vesa <abelvesa(a)kernel.org>
Cc: Fabio Estevam <festevam(a)gmail.com>
Cc: Michael Turquette <mturquette(a)baylibre.com>
Cc: Peng Fan <peng.fan(a)nxp.com>
Cc: Pengutronix Kernel Team <kernel(a)pengutronix.de>
Cc: Sascha Hauer <s.hauer(a)pengutronix.de>
Cc: Shawn Guo <shawnguo(a)kernel.org>
Cc: Stephen Boyd <sboyd(a)kernel.org>
Cc: imx(a)lists.linux.dev
Cc: linux-arm-kernel(a)lists.infradead.org
Cc: linux-clk(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.18+
---
drivers/clk/imx/clk-imx8mp.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c
index 516dbd170c8a3..fb18f507f1213 100644
--- a/drivers/clk/imx/clk-imx8mp.c
+++ b/drivers/clk/imx/clk-imx8mp.c
@@ -399,8 +399,9 @@ static const char * const imx8mp_dram_core_sels[] = {"dram_pll_out", "dram_alt_r
static const char * const imx8mp_clkout_sels[] = {"audio_pll1_out", "audio_pll2_out", "video_pll1_out",
"dummy", "dummy", "gpu_pll_out", "vpu_pll_out",
- "arm_pll_out", "sys_pll1", "sys_pll2", "sys_pll3",
- "dummy", "dummy", "osc_24m", "dummy", "osc_32k"};
+ "arm_pll_out", "sys_pll1_out", "sys_pll2_out",
+ "sys_pll3_out", "dummy", "dummy", "osc_24m",
+ "dummy", "osc_32k"};
static struct clk_hw **hws;
static struct clk_hw_onecell_data *clk_hw_data;
--
2.45.2
From: Michal Pecio <michal.pecio(a)gmail.com>
If a command is queued to the final usable TRB of a ring segment, the
enqueue pointer is advanced to the subsequent link TRB and no further.
If the command is later aborted, when the abort completion is handled
the dequeue pointer is advanced to the first TRB of the next segment.
If no further commands are queued, xhci_handle_stopped_cmd_ring() sees
the ring pointers unequal and assumes that there is a pending command,
so it calls xhci_mod_cmd_timer() which crashes if cur_cmd was NULL.
Don't attempt timer setup if cur_cmd is NULL. The subsequent doorbell
ring likely is unnecessary too, but it's harmless. Leave it alone.
This is probably Bug 219532, but no confirmation has been received.
The issue has been independently reproduced and confirmed fixed using
a USB MCU programmed to NAK the Status stage of SET_ADDRESS forever.
Everything continued working normally after several prevented crashes.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219532
Fixes: c311e391a7ef ("xhci: rework command timeout and cancellation,")
CC: stable(a)vger.kernel.org
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 4cf5363875c7..da26e317ab0c 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -422,7 +422,8 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) &&
!(xhci->xhc_state & XHCI_STATE_DYING)) {
xhci->current_cmd = cur_cmd;
- xhci_mod_cmd_timer(xhci);
+ if (cur_cmd)
+ xhci_mod_cmd_timer(xhci);
xhci_ring_cmd_db(xhci);
}
}
--
2.25.1
Commit c45ae598fc16 ("clk: qcom: support for alpha mode configuration")
added support for configuring alpha mode, but it seems that the feature
was never working in practice.
The value of the alpha_{en,mode}_mask members of the configuration gets
added to the value parameter passed to the regmap_update_bits() function,
however the same values are not getting applied to the bitmask. As the
result, the respective bits in the USER_CTL register are never modifed
which leads to improper configuration of several PLLs.
The following table shows the PLL configurations where the 'alpha_en_mask'
member is set and which are passed as a parameter for the
clk_alpha_pll_configure() function. In the table the 'expected rate' column
shows the rate the PLL should run at with the given configuration, and
the 'real rate' column shows the rate the PLL runs at actually. The real
rates has been verified on hardwareOn IPQ* platforms, on other platforms,
those are computed values only.
file pll expected rate real rate
dispcc-qcm2290.c disp_cc_pll0 768.0 MHz 768.0 MHz
dispcc-sm6115.c disp_cc_pll0 768.0 MHz 768.0 MHz
gcc-ipq5018.c ubi32_pll 1000.0 MHz != 984.0 MHz
gcc-ipq6018.c nss_crypto_pll 1200.0 MHz 1200.0 MHz
gcc-ipq6018.c ubi32_pll 1497.6 MHz != 1488.0 MHz
gcc-ipq8074.c nss_crypto_pll 1200.0 MHz != 1190.4 MHz
gcc-qcm2290.c gpll11 532.0 MHz != 518.4 MHz
gcc-qcm2290.c gpll8 533.2 MHz != 518.4 MHz
gcc-qcs404.c gpll3 921.6 MHz 921.6 MHz
gcc-sm6115.c gpll11 600.0 MHz != 595.2 MHz
gcc-sm6115.c gpll8 800.0 MHz != 787.2 MHz
gpucc-sdm660.c gpu_cc_pll0 800.0 MHz != 787.2 MHz
gpucc-sdm660.c gpu_cc_pll1 740.0 MHz != 729.6 MHz
gpucc-sm6115.c gpu_cc_pll0 1200.0 MHz != 1190.4 MHz
gpucc-sm6115.c gpu_cc_pll1 640.0 MHz != 633.6 MHz
gpucc-sm6125.c gpu_pll0 1020.0 MHz != 1017.6 MHz
gpucc-sm6125.c gpu_pll1 930.0 MHz != 921.6 MHz
mmcc-sdm660.c mmpll8 930.0 MHz != 921.6 MHz
mmcc-sdm660.c mmpll5 825.0 MHz != 806.4 MHz
As it can be seen from the above, there are several PLLs which are
configured incorrectly.
Change the code to apply both 'alpha_en_mask' and 'alpha_mode_mask'
values to the bitmask in order to configure the alpha mode correctly.
Applying the 'alpha_en_mask' fixes the initial rate of the PLLs showed
in the table above. Since the 'alpha_mode_mask' is not used by any driver
currently, that part of the change causes no functional changes.
Cc: stable(a)vger.kernel.org
Fixes: c45ae598fc16 ("clk: qcom: support for alpha mode configuration")
Signed-off-by: Gabor Juhos <j4g8y7(a)gmail.com>
---
drivers/clk/qcom/clk-alpha-pll.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index f9105443d7dbb104e3cb091e59f43df25999f8b3..03cc7aa092480bfdd9eaa986d44f0545944b3b89 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -421,6 +421,8 @@ void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
mask |= config->pre_div_mask;
mask |= config->post_div_mask;
mask |= config->vco_mask;
+ mask |= config->alpha_en_mask;
+ mask |= config->alpha_mode_mask;
regmap_update_bits(regmap, PLL_USER_CTL(pll), mask, val);
---
base-commit: 9852d85ec9d492ebef56dc5f229416c925758edc
change-id: 20241021-fix-alpha-mode-config-8383238a4854
Best regards,
--
Gabor Juhos <j4g8y7(a)gmail.com>
The orc_sort_cmp() function, used with qsort(), previously violated the
symmetry and transitivity rules required by the C standard.
Specifically, when both entries are ORC_TYPE_UNDEFINED, it could result
in both a < b and b < a, which breaks the required symmetry and
transitivity. This can lead to undefined behavior and incorrect sorting
results, potentially causing memory corruption in glibc
implementations [1].
Symmetry: If x < y, then y > x.
Transitivity: If x < y and y < z, then x < z.
Fix the comparison logic to return 0 when both entries are
ORC_TYPE_UNDEFINED, ensuring compliance with qsort() requirements.
Link: https://www.qualys.com/2024/01/30/qsort.txt [1]
Fixes: 57fa18994285 ("scripts/sorttable: Implement build-time ORC unwind table sorting")
Fixes: fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in two")
Cc: stable(a)vger.kernel.org
Signed-off-by: Kuan-Wei Chiu <visitorckw(a)gmail.com>
---
scripts/sorttable.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index 7bd0184380d3..a7c5445baf00 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -110,7 +110,7 @@ static inline unsigned long orc_ip(const int *ip)
static int orc_sort_cmp(const void *_a, const void *_b)
{
- struct orc_entry *orc_a;
+ struct orc_entry *orc_a, *orc_b;
const int *a = g_orc_ip_table + *(int *)_a;
const int *b = g_orc_ip_table + *(int *)_b;
unsigned long a_val = orc_ip(a);
@@ -128,6 +128,9 @@ static int orc_sort_cmp(const void *_a, const void *_b)
* whitelisted .o files which didn't get objtool generation.
*/
orc_a = g_orc_table + (a - g_orc_ip_table);
+ orc_b = g_orc_table + (b - g_orc_ip_table);
+ if (orc_a->type == ORC_TYPE_UNDEFINED && orc_b->type == ORC_TYPE_UNDEFINED)
+ return 0;
return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
}
--
2.34.1
The patch titled
Subject: alloc_tag: skip pgalloc_tag_swap if profiling is disabled
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
alloc_tag-skip-pgalloc_tag_swap-if-profiling-is-disabled.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Suren Baghdasaryan <surenb(a)google.com>
Subject: alloc_tag: skip pgalloc_tag_swap if profiling is disabled
Date: Thu, 26 Dec 2024 13:16:39 -0800
When memory allocation profiling is disabled, there is no need to swap
allocation tags during migration. Skip it to avoid unnecessary overhead.
Link: https://lkml.kernel.org/r/20241226211639.1357704-2-surenb@google.com
Fixes: e0a955bf7f61 ("mm/codetag: add pgalloc_tag_copy()")
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: David Wang <00107082(a)163.com>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: Zhenhua Huang <quic_zhenhuah(a)quicinc.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/alloc_tag.c | 3 +++
1 file changed, 3 insertions(+)
--- a/lib/alloc_tag.c~alloc_tag-skip-pgalloc_tag_swap-if-profiling-is-disabled
+++ a/lib/alloc_tag.c
@@ -197,6 +197,9 @@ void pgalloc_tag_swap(struct folio *new,
union codetag_ref ref_old, ref_new;
struct alloc_tag *tag_old, *tag_new;
+ if (!mem_alloc_profiling_enabled())
+ return;
+
tag_old = pgalloc_tag_get(&old->page);
if (!tag_old)
return;
_
Patches currently in -mm which might be from surenb(a)google.com are
alloc_tag-avoid-current-alloc_tag-manipulations-when-profiling-is-disabled.patch
alloc_tag-skip-pgalloc_tag_swap-if-profiling-is-disabled.patch
seqlock-add-raw_seqcount_try_begin.patch
mm-convert-mm_lock_seq-to-a-proper-seqcount.patch
mm-introduce-mmap_lock_speculate_try_beginretry.patch
mm-introduce-vma_start_read_locked_nested-helpers.patch
mm-move-per-vma-lock-into-vm_area_struct.patch
mm-mark-vma-as-detached-until-its-added-into-vma-tree.patch
mm-modify-vma_iter_store_gfp-to-indicate-if-its-storing-a-new-vma.patch
mm-mark-vmas-detached-upon-exit.patch
mm-nommu-fix-the-last-places-where-vma-is-not-locked-before-being-attached.patch
types-move-struct-rcuwait-into-typesh.patch
mm-allow-vma_start_read_locked-vma_start_read_locked_nested-to-fail.patch
mm-move-mmap_init_lock-out-of-the-header-file.patch
mm-uninline-the-main-body-of-vma_start_write.patch
refcount-introduce-__refcount_addinc_not_zero_limited.patch
mm-replace-vm_lock-and-detached-flag-with-a-reference-count.patch
mm-debug-print-vm_refcnt-state-when-dumping-the-vma.patch
mm-debug-print-vm_refcnt-state-when-dumping-the-vma-fix.patch
mm-remove-extra-vma_numab_state_init-call.patch
mm-prepare-lock_vma_under_rcu-for-vma-reuse-possibility.patch
mm-make-vma-cache-slab_typesafe_by_rcu.patch
docs-mm-document-latest-changes-to-vm_lock.patch
The patch titled
Subject: alloc_tag: avoid current->alloc_tag manipulations when profiling is disabled
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
alloc_tag-avoid-current-alloc_tag-manipulations-when-profiling-is-disabled.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Suren Baghdasaryan <surenb(a)google.com>
Subject: alloc_tag: avoid current->alloc_tag manipulations when profiling is disabled
Date: Thu, 26 Dec 2024 13:16:38 -0800
When memory allocation profiling is disabled there is no need to update
current->alloc_tag and these manipulations add unnecessary overhead. Fix
the overhead by skipping these extra updates.
Link: https://lkml.kernel.org/r/20241226211639.1357704-1-surenb@google.com
Fixes: b951aaff5035 ("mm: enable page allocation tagging")
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: David Wang <00107082(a)163.com>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: Zhenhua Huang <quic_zhenhuah(a)quicinc.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/alloc_tag.h | 11 ++++++++---
lib/alloc_tag.c | 2 ++
2 files changed, 10 insertions(+), 3 deletions(-)
--- a/include/linux/alloc_tag.h~alloc_tag-avoid-current-alloc_tag-manipulations-when-profiling-is-disabled
+++ a/include/linux/alloc_tag.h
@@ -224,9 +224,14 @@ static inline void alloc_tag_sub(union c
#define alloc_hooks_tag(_tag, _do_alloc) \
({ \
- struct alloc_tag * __maybe_unused _old = alloc_tag_save(_tag); \
- typeof(_do_alloc) _res = _do_alloc; \
- alloc_tag_restore(_tag, _old); \
+ typeof(_do_alloc) _res; \
+ if (mem_alloc_profiling_enabled()) { \
+ struct alloc_tag * __maybe_unused _old; \
+ _old = alloc_tag_save(_tag); \
+ _res = _do_alloc; \
+ alloc_tag_restore(_tag, _old); \
+ } else \
+ _res = _do_alloc; \
_res; \
})
--- a/lib/alloc_tag.c~alloc_tag-avoid-current-alloc_tag-manipulations-when-profiling-is-disabled
+++ a/lib/alloc_tag.c
@@ -29,6 +29,8 @@ EXPORT_SYMBOL(_shared_alloc_tag);
DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
mem_alloc_profiling_key);
+EXPORT_SYMBOL(mem_alloc_profiling_key);
+
DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
_
Patches currently in -mm which might be from surenb(a)google.com are
alloc_tag-avoid-current-alloc_tag-manipulations-when-profiling-is-disabled.patch
alloc_tag-skip-pgalloc_tag_swap-if-profiling-is-disabled.patch
seqlock-add-raw_seqcount_try_begin.patch
mm-convert-mm_lock_seq-to-a-proper-seqcount.patch
mm-introduce-mmap_lock_speculate_try_beginretry.patch
mm-introduce-vma_start_read_locked_nested-helpers.patch
mm-move-per-vma-lock-into-vm_area_struct.patch
mm-mark-vma-as-detached-until-its-added-into-vma-tree.patch
mm-modify-vma_iter_store_gfp-to-indicate-if-its-storing-a-new-vma.patch
mm-mark-vmas-detached-upon-exit.patch
mm-nommu-fix-the-last-places-where-vma-is-not-locked-before-being-attached.patch
types-move-struct-rcuwait-into-typesh.patch
mm-allow-vma_start_read_locked-vma_start_read_locked_nested-to-fail.patch
mm-move-mmap_init_lock-out-of-the-header-file.patch
mm-uninline-the-main-body-of-vma_start_write.patch
refcount-introduce-__refcount_addinc_not_zero_limited.patch
mm-replace-vm_lock-and-detached-flag-with-a-reference-count.patch
mm-debug-print-vm_refcnt-state-when-dumping-the-vma.patch
mm-debug-print-vm_refcnt-state-when-dumping-the-vma-fix.patch
mm-remove-extra-vma_numab_state_init-call.patch
mm-prepare-lock_vma_under_rcu-for-vma-reuse-possibility.patch
mm-make-vma-cache-slab_typesafe_by_rcu.patch
docs-mm-document-latest-changes-to-vm_lock.patch
The patch titled
Subject: memcg: fix soft lockup in the OOM process
has been added to the -mm mm-unstable branch. Its filename is
memcg-fix-soft-lockup-in-the-oom-process.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Chen Ridong <chenridong(a)huawei.com>
Subject: memcg: fix soft lockup in the OOM process
Date: Tue, 24 Dec 2024 02:52:38 +0000
A soft lockup issue was found in the product with about 56,000 tasks were
in the OOM cgroup, it was traversing them when the soft lockup was
triggered.
watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [VM Thread:1503066]
CPU: 2 PID: 1503066 Comm: VM Thread Kdump: loaded Tainted: G
Hardware name: Huawei Cloud OpenStack Nova, BIOS
RIP: 0010:console_unlock+0x343/0x540
RSP: 0000:ffffb751447db9a0 EFLAGS: 00000247 ORIG_RAX: ffffffffffffff13
RAX: 0000000000000001 RBX: 0000000000000000 RCX: 00000000ffffffff
RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000247
RBP: ffffffffafc71f90 R08: 0000000000000000 R09: 0000000000000040
R10: 0000000000000080 R11: 0000000000000000 R12: ffffffffafc74bd0
R13: ffffffffaf60a220 R14: 0000000000000247 R15: 0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f2fe6ad91f0 CR3: 00000004b2076003 CR4: 0000000000360ee0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
vprintk_emit+0x193/0x280
printk+0x52/0x6e
dump_task+0x114/0x130
mem_cgroup_scan_tasks+0x76/0x100
dump_header+0x1fe/0x210
oom_kill_process+0xd1/0x100
out_of_memory+0x125/0x570
mem_cgroup_out_of_memory+0xb5/0xd0
try_charge+0x720/0x770
mem_cgroup_try_charge+0x86/0x180
mem_cgroup_try_charge_delay+0x1c/0x40
do_anonymous_page+0xb5/0x390
handle_mm_fault+0xc4/0x1f0
This is because thousands of processes are in the OOM cgroup, it takes a
long time to traverse all of them. As a result, this lead to soft lockup
in the OOM process.
To fix this issue, call 'cond_resched' in the 'mem_cgroup_scan_tasks'
function per 1000 iterations. For global OOM, call
'touch_softlockup_watchdog' per 1000 iterations to avoid this issue.
Link: https://lkml.kernel.org/r/20241224025238.3768787-1-chenridong@huaweicloud.c…
Fixes: 9cbb78bb3143 ("mm, memcg: introduce own oom handler to iterate only over its own threads")
Signed-off-by: Chen Ridong <chenridong(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 7 ++++++-
mm/oom_kill.c | 8 +++++++-
2 files changed, 13 insertions(+), 2 deletions(-)
--- a/mm/memcontrol.c~memcg-fix-soft-lockup-in-the-oom-process
+++ a/mm/memcontrol.c
@@ -1161,6 +1161,7 @@ void mem_cgroup_scan_tasks(struct mem_cg
{
struct mem_cgroup *iter;
int ret = 0;
+ int i = 0;
BUG_ON(mem_cgroup_is_root(memcg));
@@ -1169,8 +1170,12 @@ void mem_cgroup_scan_tasks(struct mem_cg
struct task_struct *task;
css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
- while (!ret && (task = css_task_iter_next(&it)))
+ while (!ret && (task = css_task_iter_next(&it))) {
+ /* Avoid potential softlockup warning */
+ if ((++i & 1023) == 0)
+ cond_resched();
ret = fn(task, arg);
+ }
css_task_iter_end(&it);
if (ret) {
mem_cgroup_iter_break(memcg, iter);
--- a/mm/oom_kill.c~memcg-fix-soft-lockup-in-the-oom-process
+++ a/mm/oom_kill.c
@@ -44,6 +44,7 @@
#include <linux/init.h>
#include <linux/mmu_notifier.h>
#include <linux/cred.h>
+#include <linux/nmi.h>
#include <asm/tlb.h>
#include "internal.h"
@@ -430,10 +431,15 @@ static void dump_tasks(struct oom_contro
mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
else {
struct task_struct *p;
+ int i = 0;
rcu_read_lock();
- for_each_process(p)
+ for_each_process(p) {
+ /* Avoid potential softlockup warning */
+ if ((++i & 1023) == 0)
+ touch_softlockup_watchdog();
dump_task(p, oc);
+ }
rcu_read_unlock();
}
}
_
Patches currently in -mm which might be from chenridong(a)huawei.com are
mm-vmscan-retry-folios-written-back-while-isolated-for-traditional-lru.patch
memcg-fix-soft-lockup-in-the-oom-process.patch
If a clk_rcg2 has a parent, it should also have parent_map defined,
otherwise we'll get a NULL pointer dereference when calling clk_set_rate
on those clocks.
Correct this on clocks in both gcc-sm6350 and dispcc-sm6350.
Signed-off-by: Luca Weiss <luca.weiss(a)fairphone.com>
---
Luca Weiss (2):
clk: qcom: gcc-sm6350: Add missing parent_map for two clocks
clk: qcom: dispcc-sm6350: Add missing parent_map for a clock
drivers/clk/qcom/dispcc-sm6350.c | 7 +++----
drivers/clk/qcom/gcc-sm6350.c | 22 ++++++++++++++--------
2 files changed, 17 insertions(+), 12 deletions(-)
---
base-commit: 8155b4ef3466f0e289e8fcc9e6e62f3f4dceeac2
change-id: 20241220-sm6350-parent_map-c03e9b61a718
Best regards,
--
Luca Weiss <luca.weiss(a)fairphone.com>
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a2e740e216f5bf49ccb83b6d490c72a340558a43
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122359-compactly-cranium-ec0b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a2e740e216f5bf49ccb83b6d490c72a340558a43 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Date: Wed, 11 Dec 2024 20:25:37 +0000
Subject: [PATCH] vmalloc: fix accounting with i915
If the caller of vmap() specifies VM_MAP_PUT_PAGES (currently only the
i915 driver), we will decrement nr_vmalloc_pages and MEMCG_VMALLOC in
vfree(). These counters are incremented by vmalloc() but not by vmap() so
this will cause an underflow. Check the VM_MAP_PUT_PAGES flag before
decrementing either counter.
Link: https://lkml.kernel.org/r/20241211202538.168311-1-willy@infradead.org
Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap")
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeel.butt(a)linux.dev>
Reviewed-by: Balbir Singh <balbirs(a)nvidia.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f009b21705c1..5c88d0e90c20 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3374,7 +3374,8 @@ void vfree(const void *addr)
struct page *page = vm->pages[i];
BUG_ON(!page);
- mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
+ if (!(vm->flags & VM_MAP_PUT_PAGES))
+ mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
/*
* High-order allocs for huge vmallocs are split, so
* can be freed as an array of order-0 allocations
@@ -3382,7 +3383,8 @@ void vfree(const void *addr)
__free_page(page);
cond_resched();
}
- atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
+ if (!(vm->flags & VM_MAP_PUT_PAGES))
+ atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
kvfree(vm->pages);
kfree(vm);
}
Following is an attempt to backport fix of CVE-2024-44986 back to stable
5.4 and 5.10. 3 extra pre-requisite patches were required to introduce
the skb_expand_head() function and use it in ip6_finish_output2() for
the fix patch to be applicable.
Eric Dumazet (1):
ipv6: fix possible UAF in ip6_finish_output2()
Vasily Averin (3):
skbuff: introduce skb_expand_head()
ipv6: use skb_expand_head in ip6_finish_output2
ipv6: use skb_expand_head in ip6_xmit
include/linux/skbuff.h | 1 +
net/core/skbuff.c | 42 ++++++++++++++++++++++
net/ipv6/ip6_output.c | 82 ++++++++++++++++--------------------------
3 files changed, 74 insertions(+), 51 deletions(-)
--
2.46.0
The patch titled
Subject: scripts/sorttable: fix orc_sort_cmp() to maintain symmetry and transitivity
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
scripts-sorttable-fix-orc_sort_cmp-to-maintain-symmetry-and-transitivity.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kuan-Wei Chiu <visitorckw(a)gmail.com>
Subject: scripts/sorttable: fix orc_sort_cmp() to maintain symmetry and transitivity
Date: Thu, 26 Dec 2024 22:03:32 +0800
The orc_sort_cmp() function, used with qsort(), previously violated the
symmetry and transitivity rules required by the C standard. Specifically,
when both entries are ORC_TYPE_UNDEFINED, it could result in both a < b
and b < a, which breaks the required symmetry and transitivity. This can
lead to undefined behavior and incorrect sorting results, potentially
causing memory corruption in glibc implementations [1].
Symmetry: If x < y, then y > x.
Transitivity: If x < y and y < z, then x < z.
Fix the comparison logic to return 0 when both entries are
ORC_TYPE_UNDEFINED, ensuring compliance with qsort() requirements.
Link: https://www.qualys.com/2024/01/30/qsort.txt [1]
Link: https://lkml.kernel.org/r/20241226140332.2670689-1-visitorckw@gmail.com
Fixes: 57fa18994285 ("scripts/sorttable: Implement build-time ORC unwind table sorting")
Fixes: fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in two")
Signed-off-by: Kuan-Wei Chiu <visitorckw(a)gmail.com>
Cc: Ching-Chun (Jim) Huang <jserv(a)ccns.ncku.edu.tw>
Cc: <chuang(a)cs.nycu.edu.tw>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Shile Zhang <shile.zhang(a)linux.alibaba.com>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/sorttable.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/scripts/sorttable.h~scripts-sorttable-fix-orc_sort_cmp-to-maintain-symmetry-and-transitivity
+++ a/scripts/sorttable.h
@@ -110,7 +110,7 @@ static inline unsigned long orc_ip(const
static int orc_sort_cmp(const void *_a, const void *_b)
{
- struct orc_entry *orc_a;
+ struct orc_entry *orc_a, *orc_b;
const int *a = g_orc_ip_table + *(int *)_a;
const int *b = g_orc_ip_table + *(int *)_b;
unsigned long a_val = orc_ip(a);
@@ -128,6 +128,9 @@ static int orc_sort_cmp(const void *_a,
* whitelisted .o files which didn't get objtool generation.
*/
orc_a = g_orc_table + (a - g_orc_ip_table);
+ orc_b = g_orc_table + (b - g_orc_ip_table);
+ if (orc_a->type == ORC_TYPE_UNDEFINED && orc_b->type == ORC_TYPE_UNDEFINED)
+ return 0;
return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
}
_
Patches currently in -mm which might be from visitorckw(a)gmail.com are
scripts-sorttable-fix-orc_sort_cmp-to-maintain-symmetry-and-transitivity.patch
lib-min_heap-improve-type-safety-in-min_heap-macros-by-using-container_of.patch
lib-test_min_heap-use-inline-min-heap-variants-to-reduce-attack-vector.patch
lib-min_heap-add-brief-introduction-to-min-heap-api.patch
documentation-core-api-min_heap-add-author-information.patch
From: Konrad Dybcio <konrad.dybcio(a)oss.qualcomm.com>
The Last Level Cache is split into many slices, each one of which can
be toggled on or off.
Only certain slices are recommended to be turned on unconditionally,
in order to reach optimal performance/latency/power levels.
Enable WRCACHE on X1 at boot, in accordance with internal
recommendations.
No significant performance difference is expected.
Fixes: b3cf69a43502 ("soc: qcom: llcc: Add configuration data for X1E80100")
Cc: stable(a)vger.kernel.org
Reviewed-by: Rajendra Nayak <quic_rjendra(a)quicinc.com>
Signed-off-by: Konrad Dybcio <konrad.dybcio(a)oss.qualcomm.com>
---
Changes in v3:
- Improve the commit message
- Link to v2: https://lore.kernel.org/r/20241212-topic-llcc_x1e_wrcache-v2-1-e44d3058d06c…
Changes in v2:
- Cc stable
- Add more context lines
- Pick up r-b
- Link to v1: https://lore.kernel.org/r/20241207-topic-llcc_x1e_wrcache-v1-1-232e6aff49e4…
---
drivers/soc/qcom/llcc-qcom.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
index 32c3bc887cefb87c296e3ba67a730c87fa2fa346..1560db00a01248197e5c2936e785a5ea77f74ad8 100644
--- a/drivers/soc/qcom/llcc-qcom.c
+++ b/drivers/soc/qcom/llcc-qcom.c
@@ -2997,20 +2997,21 @@ static const struct llcc_slice_config x1e80100_data[] = {
.bonus_ways = 0xfff,
.cache_mode = 0,
}, {
.usecase_id = LLCC_WRCACHE,
.slice_id = 31,
.max_cap = 1024,
.priority = 1,
.fixed_size = true,
.bonus_ways = 0xfff,
.cache_mode = 0,
+ .activate_on_init = true,
}, {
.usecase_id = LLCC_CAMEXP0,
.slice_id = 4,
.max_cap = 256,
.priority = 4,
.fixed_size = true,
.bonus_ways = 0x3,
.cache_mode = 0,
}, {
.usecase_id = LLCC_CAMEXP1,
---
base-commit: 3e42dc9229c5950e84b1ed705f94ed75ed208228
change-id: 20241207-topic-llcc_x1e_wrcache-647070e2d130
Best regards,
--
Konrad Dybcio <konrad.dybcio(a)oss.qualcomm.com>
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 900bbaae67e980945dec74d36f8afe0de7556d5a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122326-viscous-dreaded-d15d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 900bbaae67e980945dec74d36f8afe0de7556d5a Mon Sep 17 00:00:00 2001
From: Xuewen Yan <xuewen.yan(a)unisoc.com>
Date: Fri, 26 Apr 2024 16:05:48 +0800
Subject: [PATCH] epoll: Add synchronous wakeup support for ep_poll_callback
Now, the epoll only use wake_up() interface to wake up task.
However, sometimes, there are epoll users which want to use
the synchronous wakeup flag to hint the scheduler, such as
Android binder driver.
So add a wake_up_sync() define, and use the wake_up_sync()
when the sync is true in ep_poll_callback().
Co-developed-by: Jing Xia <jing.xia(a)unisoc.com>
Signed-off-by: Jing Xia <jing.xia(a)unisoc.com>
Signed-off-by: Xuewen Yan <xuewen.yan(a)unisoc.com>
Link: https://lore.kernel.org/r/20240426080548.8203-1-xuewen.yan@unisoc.com
Tested-by: Brian Geffon <bgeffon(a)google.com>
Reviewed-by: Brian Geffon <bgeffon(a)google.com>
Reported-by: Benoit Lize <lizeb(a)google.com>
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 90fbab6b6f03..1a06e462b6ef 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1373,7 +1373,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
break;
}
}
- wake_up(&ep->wq);
+ if (sync)
+ wake_up_sync(&ep->wq);
+ else
+ wake_up(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 8aa3372f21a0..2b322a9b88a2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
+#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL)
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
The orig_a0 is missing in struct user_regs_struct of riscv, and there is
no way to add it without breaking UAPI. (See Link tag below)
Like NT_ARM_SYSTEM_CALL do, we add a new regset name NT_RISCV_ORIG_A0 to
access original a0 register from userspace via ptrace API.
Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Signed-off-by: Celeste Liu <uwu(a)coelacanthus.name>
---
Changes in v2:
- Fix integer width.
- Add selftest.
- Link to v1: https://lore.kernel.org/r/20241201-riscv-new-regset-v1-1-c83c58abcc7b@coela…
---
Celeste Liu (1):
riscv/ptrace: add new regset to access original a0 register
Charlie Jenkins (1):
riscv: selftests: Add a ptrace test to verify syscall parameter modification
arch/riscv/kernel/ptrace.c | 32 +++++++
include/uapi/linux/elf.h | 1 +
tools/testing/selftests/riscv/abi/.gitignore | 1 +
tools/testing/selftests/riscv/abi/Makefile | 5 +-
tools/testing/selftests/riscv/abi/ptrace.c | 134 +++++++++++++++++++++++++++
5 files changed, 172 insertions(+), 1 deletion(-)
---
base-commit: 0e287d31b62bb53ad81d5e59778384a40f8b6f56
change-id: 20241201-riscv-new-regset-d529b952ad0d
Best regards,
--
Celeste Liu <uwu(a)coelacanthus.name>
The orig_a0 is missing in struct user_regs_struct of riscv, and there is
no way to add it without breaking UAPI. (See Link tag below)
Like NT_ARM_SYSTEM_CALL do, we add a new regset name NT_RISCV_ORIG_A0 to
access original a0 register from userspace via ptrace API.
Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Signed-off-by: Celeste Liu <uwu(a)coelacanthus.name>
---
Changes in v4:
- Fix a copy paste error in selftest. (Forget to commit...)
- Link to v3: https://lore.kernel.org/r/20241226-riscv-new-regset-v3-0-f5b96465826b@coela…
Changes in v3:
- Use return 0 directly for readability.
- Fix test for modify a0.
- Add Fixes: tag
- Remove useless Cc: stable.
- Selftest will check both a0 and orig_a0, but depends on the
correctness of PTRACE_GET_SYSCALL_INFO.
- Link to v2: https://lore.kernel.org/r/20241203-riscv-new-regset-v2-0-d37da8c0cba6@coela…
Changes in v2:
- Fix integer width.
- Add selftest.
- Link to v1: https://lore.kernel.org/r/20241201-riscv-new-regset-v1-1-c83c58abcc7b@coela…
---
Celeste Liu (2):
riscv/ptrace: add new regset to access original a0 register
riscv: selftests: Add a ptrace test to verify syscall parameter modification
arch/riscv/kernel/ptrace.c | 32 ++++++
include/uapi/linux/elf.h | 1 +
tools/testing/selftests/riscv/abi/.gitignore | 1 +
tools/testing/selftests/riscv/abi/Makefile | 5 +-
tools/testing/selftests/riscv/abi/ptrace.c | 151 +++++++++++++++++++++++++++
5 files changed, 189 insertions(+), 1 deletion(-)
---
base-commit: 0e287d31b62bb53ad81d5e59778384a40f8b6f56
change-id: 20241201-riscv-new-regset-d529b952ad0d
Best regards,
--
Celeste Liu <uwu(a)coelacanthus.name>
The orig_a0 is missing in struct user_regs_struct of riscv, and there is
no way to add it without breaking UAPI. (See Link tag below)
Like NT_ARM_SYSTEM_CALL do, we add a new regset name NT_RISCV_ORIG_A0 to
access original a0 register from userspace via ptrace API.
Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@gmail.com/
Signed-off-by: Celeste Liu <uwu(a)coelacanthus.name>
---
Changes in v3:
- Use return 0 directly for readability.
- Fix test for modify a0.
- Add Fixes: tag
- Remove useless Cc: stable.
- Selftest will check both a0 and orig_a0, but depends on the
correctness of PTRACE_GET_SYSCALL_INFO.
- Link to v2: https://lore.kernel.org/r/20241203-riscv-new-regset-v2-0-d37da8c0cba6@coela…
Changes in v2:
- Fix integer width.
- Add selftest.
- Link to v1: https://lore.kernel.org/r/20241201-riscv-new-regset-v1-1-c83c58abcc7b@coela…
---
Celeste Liu (2):
riscv/ptrace: add new regset to access original a0 register
riscv: selftests: Add a ptrace test to verify syscall parameter modification
arch/riscv/kernel/ptrace.c | 32 ++++++
include/uapi/linux/elf.h | 1 +
tools/testing/selftests/riscv/abi/.gitignore | 1 +
tools/testing/selftests/riscv/abi/Makefile | 5 +-
tools/testing/selftests/riscv/abi/ptrace.c | 151 +++++++++++++++++++++++++++
5 files changed, 189 insertions(+), 1 deletion(-)
---
base-commit: 0e287d31b62bb53ad81d5e59778384a40f8b6f56
change-id: 20241201-riscv-new-regset-d529b952ad0d
Best regards,
--
Celeste Liu <uwu(a)coelacanthus.name>
[ Sasha's backport helper bot ]
Hi,
Found matching upstream commit: c968fd23c68e9929ab6cad4faffc8ea603e98e5d
WARNING: Author mismatch between patch and found commit:
Backport author: trondmy(a)kernel.org
Commit author: Trond Myklebust <trond.myklebust(a)hammerspace.com>
Status in newer kernel trees:
6.12.y | Not found
Note: The patch differs from the upstream commit:
---
1: c968fd23c68e ! 1: 4b6aa0b050af NFSv4.0: Fix the wake up of the next waiter in nfs_release_seqid()
@@ Commit message
seqid being removed is at the head of the list, and so is relinquishing
control of the sequence counter to the next entry.
- Reviewed-by: Yang Erkun <yangerkun(a)huawei.com>
Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
## fs/nfs/nfs4state.c ##
---
Results of testing on various branches:
| Branch | Patch Apply | Build Test |
|---------------------------|-------------|------------|
| stable/linux-6.12.y | Success | Success |
| stable/linux-6.6.y | Success | Success |
| stable/linux-6.1.y | Success | Success |
| stable/linux-5.15.y | Success | Success |
| stable/linux-5.10.y | Success | Success |
| stable/linux-5.4.y | Success | Success |
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 85230ee36d88e7a09fb062d43203035659dd10a5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122229-excusable-sample-91cf@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 85230ee36d88e7a09fb062d43203035659dd10a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <mdaenzer(a)redhat.com>
Date: Tue, 17 Dec 2024 18:22:56 +0100
Subject: [PATCH] drm/amdgpu: Handle NULL bo->tbo.resource (again) in
amdgpu_vm_bo_update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Third time's the charm, I hope?
Fixes: d3116756a710 ("drm/ttm: rename bo->mem and make it a pointer")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3837
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Michel Dänzer <mdaenzer(a)redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
(cherry picked from commit 695c2c745e5dff201b75da8a1d237ce403600d04)
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ddd7f05e4db9..c9c48b782ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1266,10 +1266,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
* next command submission.
*/
if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
- uint32_t mem_type = bo->tbo.resource->mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ if (bo->tbo.resource &&
+ !(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x a2e740e216f5bf49ccb83b6d490c72a340558a43
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122300-utensil-parsnip-aaf2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a2e740e216f5bf49ccb83b6d490c72a340558a43 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Date: Wed, 11 Dec 2024 20:25:37 +0000
Subject: [PATCH] vmalloc: fix accounting with i915
If the caller of vmap() specifies VM_MAP_PUT_PAGES (currently only the
i915 driver), we will decrement nr_vmalloc_pages and MEMCG_VMALLOC in
vfree(). These counters are incremented by vmalloc() but not by vmap() so
this will cause an underflow. Check the VM_MAP_PUT_PAGES flag before
decrementing either counter.
Link: https://lkml.kernel.org/r/20241211202538.168311-1-willy@infradead.org
Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap")
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reviewed-by: Shakeel Butt <shakeel.butt(a)linux.dev>
Reviewed-by: Balbir Singh <balbirs(a)nvidia.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f009b21705c1..5c88d0e90c20 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3374,7 +3374,8 @@ void vfree(const void *addr)
struct page *page = vm->pages[i];
BUG_ON(!page);
- mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
+ if (!(vm->flags & VM_MAP_PUT_PAGES))
+ mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
/*
* High-order allocs for huge vmallocs are split, so
* can be freed as an array of order-0 allocations
@@ -3382,7 +3383,8 @@ void vfree(const void *addr)
__free_page(page);
cond_resched();
}
- atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
+ if (!(vm->flags & VM_MAP_PUT_PAGES))
+ atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
kvfree(vm->pages);
kfree(vm);
}
Доброго дня, між ТОВ «Нова Пошта» та Вашою організацією за укладеним договором № 676131925 організувалася заборгованість у розмірі 64887 гр. , яка до сьогодні не була погашена Вашою організацією. У разі не погашення заборгованості до 27.12.2024, ми будемо змушені звернутися до суду для стягнення з Вашої організації боргу в судовому порядку.
Додаток надсилаю копію передсудової претензії, та оновлений рахунок на оплату послуг.
З повагою,
Відділ досудового врегулювання,
ТОВ «Нова Пошта»
corp.com(a)novaposhta.ua
044-32-30-222
You need cc stable.
在 2024/12/25 10:50, Li Lingfeng 写道:
> Hi, I noticed that [PATCH 2] has been applied to the stable, but [PATCH 1]
> has not.
> Based on 6.6 where [PATCH 2] was merged, I can still reproduce the
> original issue, and [PATCH 1] needs to be applied as well to resolve it.
> It may be better to push [PATCH 1] to the stable as well.
> Thanks.
>
> 在 2024/11/9 6:13, trondmy(a)kernel.org 写道:
>> From: Trond Myklebust <trond.myklebust(a)hammerspace.com>
>>
>> There is no need to wake up another waiter on the seqid list unless the
>> seqid being removed is at the head of the list, and so is relinquishing
>> control of the sequence counter to the next entry.
>>
>> Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
>> ---
>> fs/nfs/nfs4state.c | 10 ++++------
>> 1 file changed, 4 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
>> index dafd61186557..9a9f60a2291b 100644
>> --- a/fs/nfs/nfs4state.c
>> +++ b/fs/nfs/nfs4state.c
>> @@ -1083,14 +1083,12 @@ void nfs_release_seqid(struct nfs_seqid *seqid)
>> return;
>> sequence = seqid->sequence;
>> spin_lock(&sequence->lock);
>> - list_del_init(&seqid->list);
>> - if (!list_empty(&sequence->list)) {
>> - struct nfs_seqid *next;
>> -
>> - next = list_first_entry(&sequence->list,
>> - struct nfs_seqid, list);
>> + if (list_is_first(&seqid->list, &sequence->list) &&
>> + !list_is_singular(&sequence->list)) {
>> + struct nfs_seqid *next = list_next_entry(seqid, list);
>> rpc_wake_up_queued_task(&sequence->wait, next->task);
>> }
>> + list_del_init(&seqid->list);
>> spin_unlock(&sequence->lock);
>> }
This patch series is to fix bugs for below APIs:
devm_phy_put()
devm_of_phy_provider_unregister()
devm_phy_destroy()
phy_get()
of_phy_get()
devm_phy_get()
devm_of_phy_get()
devm_of_phy_get_by_index()
And simplify below API:
of_phy_simple_xlate().
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
---
Changes in v6:
- Use non-error path solution for patch 6/6.
- Remove stable tag for both patch 2/6 and 3/6.
- Link to v5: https://lore.kernel.org/r/20241106-phy_core_fix-v5-0-9771652eb88c@quicinc.c…
Changes in v5:
- s/Fixed/Fix s/case/cause for commit message based on Johan's reminder
- Remove unrelated change about code style for patch 4/6 suggested by Johan
- Link to v4: https://lore.kernel.org/r/20241102-phy_core_fix-v4-0-4f06439f61b1@quicinc.c…
Changes in v4:
- Correct commit message for patch 6/6
- Link to v3: https://lore.kernel.org/r/20241030-phy_core_fix-v3-0-19b97c3ec917@quicinc.c…
Changes in v3:
- Correct commit message based on Johan's suggestions for patches 1/6-3/6.
- Use goto label solution suggested by Johan for patch 4/6, also correct
commit message and remove the inline comment for it.
- Link to v2: https://lore.kernel.org/r/20241024-phy_core_fix-v2-0-fc0c63dbfcf3@quicinc.c…
Changes in v2:
- Correct title, commit message, and inline comments.
- Link to v1: https://lore.kernel.org/r/20241020-phy_core_fix-v1-0-078062f7da71@quicinc.c…
---
Zijun Hu (6):
phy: core: Fix that API devm_phy_put() fails to release the phy
phy: core: Fix that API devm_of_phy_provider_unregister() fails to unregister the phy provider
phy: core: Fix that API devm_phy_destroy() fails to destroy the phy
phy: core: Fix an OF node refcount leakage in _of_phy_get()
phy: core: Fix an OF node refcount leakage in of_phy_provider_lookup()
phy: core: Simplify API of_phy_simple_xlate() implementation
drivers/phy/phy-core.c | 44 +++++++++++++++++++++-----------------------
1 file changed, 21 insertions(+), 23 deletions(-)
---
base-commit: 9d23e48654620fdccfcc74cc2cef04eaf7353d07
change-id: 20241020-phy_core_fix-e3ad65db98f7
Best regards,
--
Zijun Hu <quic_zijuhu(a)quicinc.com>
If the clocks priv->iclk and priv->fclk were not enabled in ehci_hcd_sh_probe,
they should not be disabled in any path.
Conversely, if they was enabled in ehci_hcd_sh_probe, they must be disabled
in all error paths to ensure proper cleanup.
Found by Linux Verification Center (linuxtesting.org) with Klever.
Fixes: 63c845522263 ("usb: ehci-hcd: Add support for SuperH EHCI.")
Cc: stable(a)vger.kernel.org # ff30bd6a6618: sh: clk: Fix clk_enable() to return 0 on NULL clk
Signed-off-by: Vitalii Mordan <mordan(a)ispras.ru>
---
drivers/usb/host/ehci-sh.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/host/ehci-sh.c b/drivers/usb/host/ehci-sh.c
index d31d9506e41a..77460aac6dbd 100644
--- a/drivers/usb/host/ehci-sh.c
+++ b/drivers/usb/host/ehci-sh.c
@@ -119,8 +119,12 @@ static int ehci_hcd_sh_probe(struct platform_device *pdev)
if (IS_ERR(priv->iclk))
priv->iclk = NULL;
- clk_enable(priv->fclk);
- clk_enable(priv->iclk);
+ ret = clk_enable(priv->fclk);
+ if (ret)
+ goto fail_request_resource;
+ ret = clk_enable(priv->iclk);
+ if (ret)
+ goto fail_iclk;
ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
if (ret != 0) {
@@ -136,6 +140,7 @@ static int ehci_hcd_sh_probe(struct platform_device *pdev)
fail_add_hcd:
clk_disable(priv->iclk);
+fail_iclk:
clk_disable(priv->fclk);
fail_request_resource:
--
2.25.1
This patch series is to fix bugs and improve codes regarding various
driver core device iterating APIs
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
---
Changes in v5:
- Add comments back and correct tile and commit messages for patch 8/8.
- Link to v4: https://lore.kernel.org/r/20241218-class_fix-v4-0-3c40f098356b@quicinc.com
Changes in v4:
- Squich patches 3-5 into one based on Jonathan and Fan comments.
- Add one more patch
- Link to v3: https://lore.kernel.org/r/20241212-class_fix-v3-0-04e20c4f0971@quicinc.com
Changes in v3:
- Correct commit message, add fix tag, and correct pr_crit() message for 1st patch
- Add more patches regarding driver core device iterating APIs.
- Link to v2: https://lore.kernel.org/r/20241112-class_fix-v2-0-73d198d0a0d5@quicinc.com
Changes in v2:
- Remove both fix and stable tag for patch 1/3
- drop patch 3/3
- Link to v1: https://lore.kernel.org/r/20241105-class_fix-v1-0-80866f9994a5@quicinc.com
---
Zijun Hu (8):
driver core: class: Fix wild pointer dereferences in API class_dev_iter_next()
blk-cgroup: Fix class @block_class's subsystem refcount leakage
driver core: Move true expression out of if condition in 3 device finding APIs
driver core: Rename declaration parameter name for API device_find_child() cluster
driver core: Correct parameter check for API device_for_each_child_reverse_from()
driver core: Correct API device_for_each_child_reverse_from() prototype
driver core: Introduce device_iter_t for device iterating APIs
driver core: Move two simple APIs for finding child device to header
block/blk-cgroup.c | 1 +
drivers/base/bus.c | 9 +++++---
drivers/base/class.c | 11 ++++++++--
drivers/base/core.c | 49 +++++++++----------------------------------
drivers/base/driver.c | 9 +++++---
drivers/cxl/core/hdm.c | 2 +-
drivers/cxl/core/region.c | 2 +-
include/linux/device.h | 46 +++++++++++++++++++++++++++++++---------
include/linux/device/bus.h | 7 +++++--
include/linux/device/class.h | 4 ++--
include/linux/device/driver.h | 2 +-
11 files changed, 78 insertions(+), 64 deletions(-)
---
base-commit: 78d4f34e2115b517bcbfe7ec0d018bbbb6f9b0b8
change-id: 20241104-class_fix-f176bd9eba22
prerequisite-change-id: 20241201-const_dfc_done-aaec71e3bbea:v5
prerequisite-patch-id: 536aa56c0d055f644a1f71ab5c88b7cac9510162
prerequisite-patch-id: 39b0cf088c72853d9ce60c9e633ad2070a0278a8
prerequisite-patch-id: 60b22c42b67ad56a3d2a7b80a30ad588cbe740ec
prerequisite-patch-id: 119a167d7248481987b5e015db0e4fdb0d6edab8
prerequisite-patch-id: 133248083f3d3c57beb16473c2a4c62b3abc5fd0
prerequisite-patch-id: 4cda541f55165650bfa69fb19cbe0524eff0cb85
prerequisite-patch-id: 2b4193c6ea6370c07e6b66de04be89fb09448f54
prerequisite-patch-id: 73c675db18330c89fd8ca4790914d1d486ce0db8
prerequisite-patch-id: 88c50fc851fd7077797fd4e63fb12966b1b601bd
prerequisite-patch-id: 47b93916c1b5fb809d7c99aeaa05c729b1af01c5
prerequisite-patch-id: 5b58c0292ea5a5e37b08e2c8287d94df958128db
prerequisite-patch-id: 52ffb42b5aae69cae708332e0ddc7016139999f1
Best regards,
--
Zijun Hu <quic_zijuhu(a)quicinc.com>
This series fixes a wrong handling of the child node within the
for_each_child_of_node() by adding the missing calls to of_node_put() to
make it compatible with stable kernels that don't provide the scoped
variant of the macro, which is more secure and was introduced early this
year. The switch to the scoped macro is the next patch, which makes the
coe more robust and will avoid such issues in new error paths.
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
---
Javier Carrasco (2):
dmaengine: mv_xor: fix child node refcount handling in early exit
dmaengine: mv_xor: switch to for_each_child_of_node_scoped()
drivers/dma/mv_xor.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
---
base-commit: d61a00525464bfc5fe92c6ad713350988e492b88
change-id: 20241011-dma_mv_xor_of_node_put-5cc4126746a2
Best regards,
--
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 10331a93486ffb7b85ceea9da48a37da8cc16bdf
Gitweb: https://git.kernel.org/tip/10331a93486ffb7b85ceea9da48a37da8cc16bdf
Author: Li RongQing <lirongqing(a)baidu.com>
AuthorDate: Wed, 19 Jun 2024 19:18:01 +08:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Wed, 18 Dec 2024 06:07:55 -08:00
virt: tdx-guest: Just leak decrypted memory on unrecoverable errors
In CoCo VMs it is possible for the untrusted host to cause
set_memory_decrypted() to fail such that an error is returned
and the resulting memory is shared. Callers need to take care
to handle these errors to avoid returning decrypted (shared)
memory to the page allocator, which could lead to functional
or security issues.
Leak the decrypted memory when set_memory_decrypted() fails,
and don't need to print an error since set_memory_decrypted()
will call WARN_ONCE().
Fixes: f4738f56d1dc ("virt: tdx-guest: Add Quote generation support using TSM_REPORTS")
Signed-off-by: Li RongQing <lirongqing(a)baidu.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Rick Edgecombe <rick.p.edgecombe(a)intel.com>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20240619111801.25630-1-lirongqing%40baidu.com
---
drivers/virt/coco/tdx-guest/tdx-guest.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c
index d7db6c8..224e7dd 100644
--- a/drivers/virt/coco/tdx-guest/tdx-guest.c
+++ b/drivers/virt/coco/tdx-guest/tdx-guest.c
@@ -124,10 +124,8 @@ static void *alloc_quote_buf(void)
if (!addr)
return NULL;
- if (set_memory_decrypted((unsigned long)addr, count)) {
- free_pages_exact(addr, len);
+ if (set_memory_decrypted((unsigned long)addr, count))
return NULL;
- }
return addr;
}
From: Kan Liang <kan.liang(a)linux.intel.com>
The only difference between 5 and 6 is the new counters snapshotting
group, without the following counters snapshotting enabling patches,
it's impossible to utilize the feature in a PEBS record. It's safe to
share the same code path with format 5.
Add format 6, so the end user can at least utilize the legacy PEBS
features.
Fixes: a932aa0e868f ("perf/x86: Add Lunar Lake and Arrow Lake support")
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
New patch since V4
arch/x86/events/intel/ds.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8dcf90f6fb59..ba74e1198328 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2551,6 +2551,7 @@ void __init intel_ds_init(void)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
break;
+ case 6:
case 5:
x86_pmu.pebs_ept = 1;
fallthrough;
--
2.38.1
The reference count of the device incremented in device_initialize() is
not decremented when device_add() fails. Add a put_device() call before
returning from the function to decrement reference count for cleanup.
Or it could cause memory leak.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 53d2a715c240 ("phy: Add Tegra XUSB pad controller support")
Signed-off-by: Ma Ke <make_ruc2021(a)163.com>
---
drivers/phy/tegra/xusb.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 79d4814d758d..c89df95aa6ca 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -548,16 +548,16 @@ static int tegra_xusb_port_init(struct tegra_xusb_port *port,
err = dev_set_name(&port->dev, "%s-%u", name, index);
if (err < 0)
- goto unregister;
+ goto put_device;
err = device_add(&port->dev);
if (err < 0)
- goto unregister;
+ goto put_device;
return 0;
-unregister:
- device_unregister(&port->dev);
+put_device:
+ put_device(&port->dev);
return err;
}
--
2.25.1
This commit addresses an issue related to below kernel panic where
panic_on_warn is enabled. It is caused by the unnecessary use of WARN_ON
in functionsfs_bind, which easily leads to the following scenarios.
1.adb_write in adbd 2. UDC write via configfs
================= =====================
->usb_ffs_open_thread() ->UDC write
->open_functionfs() ->configfs_write_iter()
->adb_open() ->gadget_dev_desc_UDC_store()
->adb_write() ->usb_gadget_register_driver_owner
->driver_register()
->StartMonitor() ->bus_add_driver()
->adb_read() ->gadget_bind_driver()
<times-out without BIND event> ->configfs_composite_bind()
->usb_add_function()
->open_functionfs() ->ffs_func_bind()
->adb_open() ->functionfs_bind()
<ffs->state !=FFS_ACTIVE>
The adb_open, adb_read, and adb_write operations are invoked from the
daemon, but trying to bind the function is a process that is invoked by
UDC write through configfs, which opens up the possibility of a race
condition between the two paths. In this race scenario, the kernel panic
occurs due to the WARN_ON from functionfs_bind when panic_on_warn is
enabled. This commit fixes the kernel panic by removing the unnecessary
WARN_ON.
Kernel panic - not syncing: kernel: panic_on_warn set ...
[ 14.542395] Call trace:
[ 14.542464] ffs_func_bind+0x1c8/0x14a8
[ 14.542468] usb_add_function+0xcc/0x1f0
[ 14.542473] configfs_composite_bind+0x468/0x588
[ 14.542478] gadget_bind_driver+0x108/0x27c
[ 14.542483] really_probe+0x190/0x374
[ 14.542488] __driver_probe_device+0xa0/0x12c
[ 14.542492] driver_probe_device+0x3c/0x220
[ 14.542498] __driver_attach+0x11c/0x1fc
[ 14.542502] bus_for_each_dev+0x104/0x160
[ 14.542506] driver_attach+0x24/0x34
[ 14.542510] bus_add_driver+0x154/0x270
[ 14.542514] driver_register+0x68/0x104
[ 14.542518] usb_gadget_register_driver_owner+0x48/0xf4
[ 14.542523] gadget_dev_desc_UDC_store+0xf8/0x144
[ 14.542526] configfs_write_iter+0xf0/0x138
Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Akash M <akash.m5(a)samsung.com>
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 2920f8000bbd..92c883440e02 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2285,7 +2285,7 @@ static int functionfs_bind(struct ffs_data *ffs, struct usb_composite_dev *cdev)
struct usb_gadget_strings **lang;
int first_id;
- if (WARN_ON(ffs->state != FFS_ACTIVE
+ if ((ffs->state != FFS_ACTIVE
|| test_and_set_bit(FFS_FL_BOUND, &ffs->flags)))
return -EBADFD;
--
2.17.1
The patch titled
Subject: mm/damon/core: fix ignored quota goals and filters of newly committed schemes
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-core-fix-ignored-quota-goals-and-filters-of-newly-committed-schemes.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/core: fix ignored quota goals and filters of newly committed schemes
Date: Sun, 22 Dec 2024 15:12:22 -0800
damon_commit_schemes() ignores quota goals and filters of the newly
committed schemes. This makes users confused about the behaviors.
Correctly handle those inputs.
Link: https://lkml.kernel.org/r/20241222231222.85060-3-sj@kernel.org
Fixes: 9cb3d0b9dfce ("mm/damon/core: implement DAMON context commit function")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/mm/damon/core.c~mm-damon-core-fix-ignored-quota-goals-and-filters-of-newly-committed-schemes
+++ a/mm/damon/core.c
@@ -868,6 +868,11 @@ static int damon_commit_schemes(struct d
NUMA_NO_NODE);
if (!new_scheme)
return -ENOMEM;
+ err = damos_commit(new_scheme, src_scheme);
+ if (err) {
+ damon_destroy_scheme(new_scheme);
+ return err;
+ }
damon_add_scheme(dst, new_scheme);
}
return 0;
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-core-fix-new-damon_target-objects-leaks-on-damon_commit_targets.patch
mm-damon-core-fix-ignored-quota-goals-and-filters-of-newly-committed-schemes.patch
samples-add-a-skeleton-of-a-sample-damon-module-for-working-set-size-estimation.patch
samples-damon-wsse-start-and-stop-damon-as-the-user-requests.patch
samples-damon-wsse-implement-working-set-size-estimation-and-logging.patch
samples-damon-introduce-a-skeleton-of-a-smaple-damon-module-for-proactive-reclamation.patch
samples-damon-prcl-implement-schemes-setup.patch
replace-free-hugepage-folios-after-migration-fix-2.patch
The patch titled
Subject: mm/damon/core: fix new damon_target objects leaks on damon_commit_targets()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-core-fix-new-damon_target-objects-leaks-on-damon_commit_targets.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/core: fix new damon_target objects leaks on damon_commit_targets()
Date: Sun, 22 Dec 2024 15:12:21 -0800
Patch series "mm/damon/core: fix memory leaks and ignored inputs from
damon_commit_ctx()".
Due to two bugs in damon_commit_targets() and damon_commit_schemes(),
which are called from damon_commit_ctx(), some user inputs can be ignored,
and some mmeory objects can be leaked. Fix those.
Note that only DAMON sysfs interface users are affected. Other DAMON core
API user modules that more focused more on simple and dedicated production
usages, including DAMON_RECLAIM and DAMON_LRU_SORT are not using the buggy
function in the way, so not affected.
This patch (of 2):
When new DAMON targets are added via damon_commit_targets(), the newly
created targets are not deallocated when updating the internal data
(damon_commit_target()) is failed. Worse yet, even if the setup is
successfully done, the new target is not linked to the context. Hence,
the new targets are always leaked regardless of the internal data setup
failure. Fix the leaks.
Link: https://lkml.kernel.org/r/20241222231222.85060-2-sj@kernel.org
Fixes: 9cb3d0b9dfce ("mm/damon/core: implement DAMON context commit function")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/mm/damon/core.c~mm-damon-core-fix-new-damon_target-objects-leaks-on-damon_commit_targets
+++ a/mm/damon/core.c
@@ -961,8 +961,11 @@ static int damon_commit_targets(
return -ENOMEM;
err = damon_commit_target(new_target, false,
src_target, damon_target_has_pid(src));
- if (err)
+ if (err) {
+ damon_destroy_target(new_target);
return err;
+ }
+ damon_add_target(dst, new_target);
}
return 0;
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-core-fix-new-damon_target-objects-leaks-on-damon_commit_targets.patch
mm-damon-core-fix-ignored-quota-goals-and-filters-of-newly-committed-schemes.patch
samples-add-a-skeleton-of-a-sample-damon-module-for-working-set-size-estimation.patch
samples-damon-wsse-start-and-stop-damon-as-the-user-requests.patch
samples-damon-wsse-implement-working-set-size-estimation-and-logging.patch
samples-damon-introduce-a-skeleton-of-a-smaple-damon-module-for-proactive-reclamation.patch
samples-damon-prcl-implement-schemes-setup.patch
replace-free-hugepage-folios-after-migration-fix-2.patch
From: Chuck Lever <chuck.lever(a)oracle.com>
J. David reports an odd corruption of a READDIR reply sent to a
FreeBSD client.
xdr_reserve_space() has to do a special trick when the @nbytes value
requests more space than there is in the current page of the XDR
buffer.
In that case, xdr_reserve_space() returns a pointer to the start of
the next page, and then the next call to xdr_reserve_space() invokes
__xdr_commit_encode() to copy enough of the data item back into the
previous page to make that data item contiguous across the page
boundary.
But we need to be careful in the case where buffer space is reserved
early for a data item that will be inserted into the buffer later.
One such caller, nfsd4_encode_operation(), reserves 8 bytes in the
encoding buffer for each COMPOUND operation. However, a READDIR
result can sometimes encode file names so that there are only 4
bytes left at the end of the current XDR buffer page (though plenty
of pages are left to handle the remaining encoding tasks).
If a COMPOUND operation follows the READDIR result (say, a GETATTR),
then nfsd4_encode_operation() will reserve 8 bytes for the op number
(9) and the op status (usually NFS4_OK). In this weird case,
xdr_reserve_space() returns a pointer to byte zero of the next buffer
page, as it assumes the data item will be copied back into place (in
the previous page) on the next call to xdr_reserve_space().
nfsd4_encode_operation() writes the op num into the buffer, then
saves the next 4-byte location for the op's status code. The next
xdr_reserve_space() call is part of GETATTR encoding, so the op num
gets copied back into the previous page, but the saved location for
the op status continues to point to the wrong spot in the current
XDR buffer page because __xdr_commit_encode() moved that data item.
After GETATTR encoding is complete, nfsd4_encode_operation() writes
the op status over the first XDR data item in the GETATTR result.
The NFS4_OK status code (0) makes it look like there are zero items
in the GETATTR's attribute bitmask.
The patch description of commit 2825a7f90753 ("nfsd4: allow encoding
across page boundaries") [2014] remarks that NFSD "can't handle a
new operation starting close to the end of a page." This behavior
appears to be one reason for that remark.
Break up the reservation of the COMPOUND op num and op status data
items into two distinct 4-octet reservations. Thanks to XDR data
item alignment restrictions, a 4-octet buffer reservation can never
straddle a page boundary.
Reported-by: J David <j.david.lists(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/nfsd/nfs4xdr.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..8780da884197 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5764,10 +5764,11 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
nfsd4_enc encoder;
__be32 *p;
- p = xdr_reserve_space(xdr, 8);
+ if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
+ goto release;
+ p = xdr_reserve_space(xdr, XDR_UNIT);
if (!p)
goto release;
- *p++ = cpu_to_be32(op->opnum);
post_err_offset = xdr->buf->len;
if (op->opnum == OP_ILLEGAL)
--
2.47.0
Hi all,
This is the latest revision of a patchset that adds to XFS kernel
support for reverse mapping for the realtime device. This time around
I've fixed some of the bitrot that I've noticed over the past few
months, and most notably have converted rtrmapbt to use the metadata
inode directory feature instead of burning more space in the superblock.
At the beginning of the set are patches to implement storing B+tree
leaves in an inode root, since the realtime rmapbt is rooted in an
inode, unlike the regular rmapbt which is rooted in an AG block.
Prior to this, the only btree that could be rooted in the inode fork
was the block mapping btree; if all the extent records fit in the
inode, format would be switched from 'btree' to 'extents'.
The next few patches enhance the reverse mapping routines to handle
the parts that are specific to rtgroups -- adding the new btree type,
adding a new log intent item type, and wiring up the metadata directory
tree entries.
Finally, implement GETFSMAP with the rtrmapbt and scrub functionality
for the rtrmapbt and rtbitmap and online fsck functionality.
If you're going to start using this code, I strongly recommend pulling
from my git trees, which are linked below.
This has been running on the djcloud for months with no problems. Enjoy!
Comments and questions are, as always, welcome.
--D
kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=re…
xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h…
fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h…
xfsdocs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-documentation.git/l…
---
Commits in this patchset:
* xfs: add some rtgroup inode helpers
* xfs: prepare rmap btree cursor tracepoints for realtime
* xfs: simplify the xfs_rmap_{alloc,free}_extent calling conventions
* xfs: introduce realtime rmap btree ondisk definitions
* xfs: realtime rmap btree transaction reservations
* xfs: add realtime rmap btree operations
* xfs: prepare rmap functions to deal with rtrmapbt
* xfs: add a realtime flag to the rmap update log redo items
* xfs: support recovering rmap intent items targetting realtime extents
* xfs: pretty print metadata file types in error messages
* xfs: support file data forks containing metadata btrees
* xfs: add realtime reverse map inode to metadata directory
* xfs: add metadata reservations for realtime rmap btrees
* xfs: wire up a new metafile type for the realtime rmap
* xfs: wire up rmap map and unmap to the realtime rmapbt
* xfs: create routine to allocate and initialize a realtime rmap btree inode
* xfs: wire up getfsmap to the realtime reverse mapping btree
* xfs: check that the rtrmapbt maxlevels doesn't increase when growing fs
* xfs: report realtime rmap btree corruption errors to the health system
* xfs: allow queued realtime intents to drain before scrubbing
* xfs: scrub the realtime rmapbt
* xfs: cross-reference realtime bitmap to realtime rmapbt scrubber
* xfs: cross-reference the realtime rmapbt
* xfs: scan rt rmap when we're doing an intense rmap check of bmbt mappings
* xfs: scrub the metadir path of rt rmap btree files
* xfs: walk the rt reverse mapping tree when rebuilding rmap
* xfs: online repair of realtime file bmaps
* xfs: repair inodes that have realtime extents
* xfs: repair rmap btree inodes
* xfs: online repair of realtime bitmaps for a realtime group
* xfs: support repairing metadata btrees rooted in metadir inodes
* xfs: online repair of the realtime rmap btree
* xfs: create a shadow rmap btree during realtime rmap repair
* xfs: hook live realtime rmap operations during a repair operation
* xfs: don't shut down the filesystem for media failures beyond end of log
* xfs: react to fsdax failure notifications on the rt device
* xfs: enable realtime rmap btree
---
fs/xfs/Makefile | 3
fs/xfs/libxfs/xfs_btree.c | 73 +++
fs/xfs/libxfs/xfs_btree.h | 8
fs/xfs/libxfs/xfs_btree_mem.c | 1
fs/xfs/libxfs/xfs_btree_staging.c | 1
fs/xfs/libxfs/xfs_defer.h | 1
fs/xfs/libxfs/xfs_exchmaps.c | 4
fs/xfs/libxfs/xfs_format.h | 28 +
fs/xfs/libxfs/xfs_fs.h | 7
fs/xfs/libxfs/xfs_health.h | 4
fs/xfs/libxfs/xfs_inode_buf.c | 32 +
fs/xfs/libxfs/xfs_inode_fork.c | 25 +
fs/xfs/libxfs/xfs_log_format.h | 6
fs/xfs/libxfs/xfs_log_recover.h | 2
fs/xfs/libxfs/xfs_metafile.c | 18 +
fs/xfs/libxfs/xfs_metafile.h | 2
fs/xfs/libxfs/xfs_ondisk.h | 2
fs/xfs/libxfs/xfs_refcount.c | 6
fs/xfs/libxfs/xfs_rmap.c | 171 +++++-
fs/xfs/libxfs/xfs_rmap.h | 12
fs/xfs/libxfs/xfs_rtbitmap.c | 2
fs/xfs/libxfs/xfs_rtbitmap.h | 9
fs/xfs/libxfs/xfs_rtgroup.c | 53 +-
fs/xfs/libxfs/xfs_rtgroup.h | 49 ++
fs/xfs/libxfs/xfs_rtrmap_btree.c | 1011 +++++++++++++++++++++++++++++++++++++
fs/xfs/libxfs/xfs_rtrmap_btree.h | 210 ++++++++
fs/xfs/libxfs/xfs_sb.c | 6
fs/xfs/libxfs/xfs_shared.h | 14 +
fs/xfs/libxfs/xfs_trans_resv.c | 12
fs/xfs/libxfs/xfs_trans_space.h | 13
fs/xfs/scrub/alloc_repair.c | 5
fs/xfs/scrub/bmap.c | 108 +++-
fs/xfs/scrub/bmap_repair.c | 129 +++++
fs/xfs/scrub/common.c | 160 ++++++
fs/xfs/scrub/common.h | 23 +
fs/xfs/scrub/health.c | 1
fs/xfs/scrub/inode.c | 10
fs/xfs/scrub/inode_repair.c | 136 +++++
fs/xfs/scrub/metapath.c | 3
fs/xfs/scrub/newbt.c | 42 ++
fs/xfs/scrub/newbt.h | 1
fs/xfs/scrub/reap.c | 41 ++
fs/xfs/scrub/reap.h | 2
fs/xfs/scrub/repair.c | 191 +++++++
fs/xfs/scrub/repair.h | 17 +
fs/xfs/scrub/rgsuper.c | 6
fs/xfs/scrub/rmap_repair.c | 84 +++
fs/xfs/scrub/rtbitmap.c | 75 ++-
fs/xfs/scrub/rtbitmap.h | 55 ++
fs/xfs/scrub/rtbitmap_repair.c | 429 +++++++++++++++-
fs/xfs/scrub/rtrmap.c | 271 ++++++++++
fs/xfs/scrub/rtrmap_repair.c | 903 +++++++++++++++++++++++++++++++++
fs/xfs/scrub/rtsummary.c | 17 -
fs/xfs/scrub/rtsummary_repair.c | 3
fs/xfs/scrub/scrub.c | 11
fs/xfs/scrub/scrub.h | 14 +
fs/xfs/scrub/stats.c | 1
fs/xfs/scrub/tempexch.h | 2
fs/xfs/scrub/tempfile.c | 20 -
fs/xfs/scrub/trace.c | 1
fs/xfs/scrub/trace.h | 228 ++++++++
fs/xfs/xfs_buf.c | 1
fs/xfs/xfs_buf_item_recover.c | 4
fs/xfs/xfs_drain.c | 20 -
fs/xfs/xfs_drain.h | 7
fs/xfs/xfs_fsmap.c | 174 ++++++
fs/xfs/xfs_fsops.c | 11
fs/xfs/xfs_health.c | 1
fs/xfs/xfs_inode.c | 19 +
fs/xfs/xfs_inode_item.c | 2
fs/xfs/xfs_inode_item_recover.c | 44 +-
fs/xfs/xfs_log_recover.c | 2
fs/xfs/xfs_mount.c | 5
fs/xfs/xfs_mount.h | 9
fs/xfs/xfs_notify_failure.c | 230 +++++---
fs/xfs/xfs_notify_failure.h | 11
fs/xfs/xfs_qm.c | 8
fs/xfs/xfs_rmap_item.c | 216 +++++++-
fs/xfs/xfs_rtalloc.c | 82 ++-
fs/xfs/xfs_rtalloc.h | 10
fs/xfs/xfs_stats.c | 4
fs/xfs/xfs_stats.h | 2
fs/xfs/xfs_super.c | 6
fs/xfs/xfs_super.h | 1
fs/xfs/xfs_trace.h | 104 ++--
85 files changed, 5381 insertions(+), 366 deletions(-)
create mode 100644 fs/xfs/libxfs/xfs_rtrmap_btree.c
create mode 100644 fs/xfs/libxfs/xfs_rtrmap_btree.h
create mode 100644 fs/xfs/scrub/rtrmap.c
create mode 100644 fs/xfs/scrub/rtrmap_repair.c
create mode 100644 fs/xfs/xfs_notify_failure.h
Hi all,
Bug fixes for 6.13.
If you're going to start using this code, I strongly recommend pulling
from my git trees, which are linked below.
This has been running on the djcloud for months with no problems. Enjoy!
Comments and questions are, as always, welcome.
--D
kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=xf…
xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h…
---
Commits in this patchset:
* xfs: don't over-report free space or inodes in statvfs
* xfs: release the dquot buf outside of qli_lock
---
fs/xfs/xfs_dquot.c | 12 ++++++++----
fs/xfs/xfs_qm_bhv.c | 27 +++++++++++++++++----------
2 files changed, 25 insertions(+), 14 deletions(-)
Hi Andrey,
Please pull this branch with changes for xfsprogs for 6.11-rc1.
As usual, I did a test-merge with the main upstream branch as of a few
minutes ago, and didn't see any conflicts. Please let me know if you
encounter any problems.
--D
The following changes since commit 513300e9565b0d446ac8e6a3a990444d766c728b:
mkfs: add a utility to generate protofiles (2024-12-23 13:05:10 -0800)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfsprogs-dev.git tags/xfs-6.13-merge_2024-12-23
for you to fetch changes up to 80b81c84f015ee01fed80c32184cc763ee1a655e:
xfs: return from xfs_symlink_verify early on V4 filesystems (2024-12-23 13:05:13 -0800)
----------------------------------------------------------------
xfsprogs: new code for 6.13 [05/23]
New code for 6.12.
This has been running on the djcloud for months with no problems. Enjoy!
Signed-off-by: "Darrick J. Wong" <djwong(a)kernel.org>
----------------------------------------------------------------
Christoph Hellwig (9):
xfs: add a xfs_bmap_free_rtblocks helper
xfs: move RT bitmap and summary information to the rtgroup
xfs: support creating per-RTG files in growfs
xfs: refactor xfs_rtbitmap_blockcount
xfs: refactor xfs_rtsummary_blockcount
xfs: make RT extent numbers relative to the rtgroup
xfs: add a helper to prevent bmap merges across rtgroup boundaries
xfs: make the RT allocator rtgroup aware
xfs: don't call xfs_bmap_same_rtgroup in xfs_bmap_add_extent_hole_delay
Darrick J. Wong (40):
xfs: create incore realtime group structures
xfs: define locking primitives for realtime groups
xfs: add a lockdep class key for rtgroup inodes
xfs: support caching rtgroup metadata inodes
libfrog: add memchr_inv
xfs: define the format of rt groups
xfs: update realtime super every time we update the primary fs super
xfs: export realtime group geometry via XFS_FSOP_GEOM
xfs: check that rtblock extents do not break rtsupers or rtgroups
xfs: add frextents to the lazysbcounters when rtgroups enabled
xfs: record rt group metadata errors in the health system
xfs: export the geometry of realtime groups to userspace
xfs: add block headers to realtime bitmap and summary blocks
xfs: encode the rtbitmap in big endian format
xfs: encode the rtsummary in big endian format
xfs: grow the realtime section when realtime groups are enabled
xfs: support logging EFIs for realtime extents
xfs: support error injection when freeing rt extents
xfs: use realtime EFI to free extents when rtgroups are enabled
xfs: don't merge ioends across RTGs
xfs: scrub the realtime group superblock
xfs: scrub metadir paths for rtgroup metadata
xfs: mask off the rtbitmap and summary inodes when metadir in use
xfs: create helpers to deal with rounding xfs_fileoff_t to rtx boundaries
xfs: create helpers to deal with rounding xfs_filblks_t to rtx boundaries
xfs: make xfs_rtblock_t a segmented address like xfs_fsblock_t
xfs: adjust min_block usage in xfs_verify_agbno
xfs: move the min and max group block numbers to xfs_group
xfs: implement busy extent tracking for rtgroups
xfs: use metadir for quota inodes
xfs: scrub quota file metapaths
xfs: enable metadata directory feature
xfs: convert struct typedefs in xfs_ondisk.h
xfs: separate space btree structures in xfs_ondisk.h
xfs: port ondisk structure checks from xfs/122 to the kernel
xfs: return a 64-bit block count from xfs_btree_count_blocks
xfs: fix error bailout in xfs_rtginode_create
xfs: update btree keys correctly when _insrec splits an inode root block
xfs: fix sb_spino_align checks for large fsblock sizes
xfs: return from xfs_symlink_verify early on V4 filesystems
Dave Chinner (1):
xfs: fix sparse inode limits on runt AG
Jeff Layton (1):
xfs: switch to multigrain timestamps
Long Li (1):
xfs: remove unknown compat feature check in superblock write validation
db/block.c | 2 +-
db/block.h | 16 -
db/convert.c | 1 -
db/faddr.c | 1 -
include/libxfs.h | 2 +
include/platform_defs.h | 33 +++
include/xfs_mount.h | 30 +-
include/xfs_trace.h | 7 +
include/xfs_trans.h | 1 +
libfrog/util.c | 14 +
libfrog/util.h | 4 +
libxfs/Makefile | 2 +
libxfs/init.c | 35 ++-
libxfs/libxfs_api_defs.h | 16 +
libxfs/libxfs_io.h | 1 +
libxfs/libxfs_priv.h | 34 +--
libxfs/rdwr.c | 17 ++
libxfs/trans.c | 29 ++
libxfs/util.c | 8 +-
libxfs/xfs_ag.c | 22 +-
libxfs/xfs_ag.h | 16 +-
libxfs/xfs_alloc.c | 15 +-
libxfs/xfs_alloc.h | 12 +-
libxfs/xfs_bmap.c | 124 ++++++--
libxfs/xfs_btree.c | 33 ++-
libxfs/xfs_btree.h | 2 +-
libxfs/xfs_defer.c | 6 +
libxfs/xfs_defer.h | 1 +
libxfs/xfs_dquot_buf.c | 190 ++++++++++++
libxfs/xfs_format.h | 80 ++++-
libxfs/xfs_fs.h | 32 +-
libxfs/xfs_group.h | 33 +++
libxfs/xfs_health.h | 42 +--
libxfs/xfs_ialloc.c | 16 +-
libxfs/xfs_ialloc_btree.c | 6 +-
libxfs/xfs_log_format.h | 6 +-
libxfs/xfs_ondisk.h | 186 +++++++++---
libxfs/xfs_quota_defs.h | 43 +++
libxfs/xfs_rtbitmap.c | 405 +++++++++++++++++---------
libxfs/xfs_rtbitmap.h | 247 ++++++++++------
libxfs/xfs_rtgroup.c | 694 ++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_rtgroup.h | 284 ++++++++++++++++++
libxfs/xfs_sb.c | 246 ++++++++++++++--
libxfs/xfs_sb.h | 6 +-
libxfs/xfs_shared.h | 4 +
libxfs/xfs_symlink_remote.c | 4 +-
libxfs/xfs_trans_inode.c | 6 +-
libxfs/xfs_trans_resv.c | 2 +-
libxfs/xfs_types.c | 35 ++-
libxfs/xfs_types.h | 8 +-
mkfs/proto.c | 33 ++-
mkfs/xfs_mkfs.c | 8 +
repair/dinode.c | 4 +-
repair/phase6.c | 203 +++++++------
repair/rt.c | 34 +--
repair/rt.h | 4 +-
56 files changed, 2728 insertions(+), 617 deletions(-)
create mode 100644 libxfs/xfs_rtgroup.c
create mode 100644 libxfs/xfs_rtgroup.h
Hi all,
New code for 6.12.
If you're going to start using this code, I strongly recommend pulling
from my git trees, which are linked below.
This has been running on the djcloud for months with no problems. Enjoy!
Comments and questions are, as always, welcome.
--D
kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=xf…
xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h…
---
Commits in this patchset:
* xfs: create incore realtime group structures
* xfs: define locking primitives for realtime groups
* xfs: add a lockdep class key for rtgroup inodes
* xfs: support caching rtgroup metadata inodes
* xfs: add a xfs_bmap_free_rtblocks helper
* xfs: move RT bitmap and summary information to the rtgroup
* xfs: support creating per-RTG files in growfs
* xfs: refactor xfs_rtbitmap_blockcount
* xfs: refactor xfs_rtsummary_blockcount
* xfs: make RT extent numbers relative to the rtgroup
* libfrog: add memchr_inv
* xfs: define the format of rt groups
* xfs: update realtime super every time we update the primary fs super
* xfs: export realtime group geometry via XFS_FSOP_GEOM
* xfs: check that rtblock extents do not break rtsupers or rtgroups
* xfs: add a helper to prevent bmap merges across rtgroup boundaries
* xfs: add frextents to the lazysbcounters when rtgroups enabled
* xfs: record rt group metadata errors in the health system
* xfs: export the geometry of realtime groups to userspace
* xfs: add block headers to realtime bitmap and summary blocks
* xfs: encode the rtbitmap in big endian format
* xfs: encode the rtsummary in big endian format
* xfs: grow the realtime section when realtime groups are enabled
* xfs: support logging EFIs for realtime extents
* xfs: support error injection when freeing rt extents
* xfs: use realtime EFI to free extents when rtgroups are enabled
* xfs: don't merge ioends across RTGs
* xfs: make the RT allocator rtgroup aware
* xfs: scrub the realtime group superblock
* xfs: scrub metadir paths for rtgroup metadata
* xfs: mask off the rtbitmap and summary inodes when metadir in use
* xfs: create helpers to deal with rounding xfs_fileoff_t to rtx boundaries
* xfs: create helpers to deal with rounding xfs_filblks_t to rtx boundaries
* xfs: make xfs_rtblock_t a segmented address like xfs_fsblock_t
* xfs: adjust min_block usage in xfs_verify_agbno
* xfs: move the min and max group block numbers to xfs_group
* xfs: implement busy extent tracking for rtgroups
* xfs: use metadir for quota inodes
* xfs: scrub quota file metapaths
* xfs: enable metadata directory feature
* xfs: convert struct typedefs in xfs_ondisk.h
* xfs: separate space btree structures in xfs_ondisk.h
* xfs: port ondisk structure checks from xfs/122 to the kernel
* xfs: remove unknown compat feature check in superblock write validation
* xfs: fix sparse inode limits on runt AG
* xfs: switch to multigrain timestamps
* xfs: don't call xfs_bmap_same_rtgroup in xfs_bmap_add_extent_hole_delay
* xfs: return a 64-bit block count from xfs_btree_count_blocks
* xfs: fix error bailout in xfs_rtginode_create
* xfs: update btree keys correctly when _insrec splits an inode root block
* xfs: fix sb_spino_align checks for large fsblock sizes
* xfs: return from xfs_symlink_verify early on V4 filesystems
---
db/block.c | 2
db/block.h | 16 -
db/convert.c | 1
db/faddr.c | 1
include/libxfs.h | 2
include/platform_defs.h | 33 ++
include/xfs_mount.h | 30 +-
include/xfs_trace.h | 7
include/xfs_trans.h | 1
libfrog/util.c | 14 +
libfrog/util.h | 4
libxfs/Makefile | 2
libxfs/init.c | 35 ++
libxfs/libxfs_api_defs.h | 16 +
libxfs/libxfs_io.h | 1
libxfs/libxfs_priv.h | 34 --
libxfs/rdwr.c | 17 +
libxfs/trans.c | 29 ++
libxfs/util.c | 8
libxfs/xfs_ag.c | 22 +
libxfs/xfs_ag.h | 16 -
libxfs/xfs_alloc.c | 15 +
libxfs/xfs_alloc.h | 12 +
libxfs/xfs_bmap.c | 124 ++++++--
libxfs/xfs_btree.c | 33 ++
libxfs/xfs_btree.h | 2
libxfs/xfs_defer.c | 6
libxfs/xfs_defer.h | 1
libxfs/xfs_dquot_buf.c | 190 ++++++++++++
libxfs/xfs_format.h | 80 +++++
libxfs/xfs_fs.h | 32 ++
libxfs/xfs_group.h | 33 ++
libxfs/xfs_health.h | 42 ++-
libxfs/xfs_ialloc.c | 16 +
libxfs/xfs_ialloc_btree.c | 6
libxfs/xfs_log_format.h | 6
libxfs/xfs_ondisk.h | 186 +++++++++---
libxfs/xfs_quota_defs.h | 43 +++
libxfs/xfs_rtbitmap.c | 405 +++++++++++++++++--------
libxfs/xfs_rtbitmap.h | 247 ++++++++++-----
libxfs/xfs_rtgroup.c | 694 +++++++++++++++++++++++++++++++++++++++++++
libxfs/xfs_rtgroup.h | 284 ++++++++++++++++++
libxfs/xfs_sb.c | 246 ++++++++++++++-
libxfs/xfs_sb.h | 6
libxfs/xfs_shared.h | 4
libxfs/xfs_symlink_remote.c | 4
libxfs/xfs_trans_inode.c | 6
libxfs/xfs_trans_resv.c | 2
libxfs/xfs_types.c | 35 ++
libxfs/xfs_types.h | 8
mkfs/proto.c | 33 +-
mkfs/xfs_mkfs.c | 8
repair/dinode.c | 4
repair/phase6.c | 203 ++++++-------
repair/rt.c | 34 --
repair/rt.h | 4
56 files changed, 2728 insertions(+), 617 deletions(-)
create mode 100644 libxfs/xfs_rtgroup.c
create mode 100644 libxfs/xfs_rtgroup.h
The Host Port (i.e. CPU facing port) of CPSW receives traffic from Linux
via TX DMA Channels which are Hardware Queues consisting of traffic
categorized according to their priority. The Host Port is configured to
dequeue traffic from these Hardware Queues on the basis of priority i.e.
as long as traffic exists on a Hardware Queue of a higher priority, the
traffic on Hardware Queues of lower priority isn't dequeued. An alternate
operation is also supported wherein traffic can be dequeued by the Host
Port in a Round-Robin manner.
Until [0], the am65-cpsw driver enabled a single TX DMA Channel, due to
which, unless modified by user via "ethtool", all traffic from Linux is
transmitted on DMA Channel 0. Therefore, configuring the Host Port for
priority based dequeuing or Round-Robin operation is identical since
there is a single DMA Channel.
Since [0], all 8 TX DMA Channels are enabled by default. Additionally,
the default "tc mapping" doesn't take into account the possibility of
different traffic profiles which various users might have. This results
in traffic starvation at the Host Port due to the priority based dequeuing
which has been enabled by default since the inception of the driver. The
traffic starvation triggers NETDEV WATCHDOG timeout for all TX DMA Channels
that haven't been serviced due to the presence of traffic on the higher
priority TX DMA Channels.
Fix this by defaulting to Round-Robin dequeuing at the Host Port, which
shall ensure that traffic is dequeued from all TX DMA Channels irrespective
of the traffic profile. This will address the NETDEV WATCHDOG timeouts.
At the same time, users can still switch from Round-Robin to Priority
based dequeuing at the Host Port with the help of the "p0-rx-ptype-rrobin"
private flag of "ethtool". Users are expected to setup an appropriate
"tc mapping" that suits their traffic profile when switching to priority
based dequeuing at the Host Port.
[0] commit be397ea3473d ("net: ethernet: am65-cpsw: Set default TX channels to maximum")
Fixes: be397ea3473d ("net: ethernet: am65-cpsw: Set default TX channels to maximum")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Siddharth Vadapalli <s-vadapalli(a)ti.com>
---
Hello,
This patch is based on commit
8faabc041a00 Merge tag 'net-6.13-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
of Mainline Linux.
Regards,
Siddharth.
drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 14e1df721f2e..5465bf872734 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -3551,7 +3551,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
init_completion(&common->tdown_complete);
common->tx_ch_num = AM65_CPSW_DEFAULT_TX_CHNS;
common->rx_ch_num_flows = AM65_CPSW_DEFAULT_RX_CHN_FLOWS;
- common->pf_p0_rx_ptype_rrobin = false;
+ common->pf_p0_rx_ptype_rrobin = true;
common->default_vlan = 1;
common->ports = devm_kcalloc(dev, common->port_num,
--
2.43.0
From: Chuck Lever <chuck.lever(a)oracle.com>
Testing shows that the EBUSY error return from mtree_alloc_cyclic()
leaks into user space. The ERRORS section of "man creat(2)" says:
> EBUSY O_EXCL was specified in flags and pathname refers
> to a block device that is in use by the system
> (e.g., it is mounted).
ENOSPC is closer to what applications expect in this situation.
Note that the normal range of simple directory offset values is
2..2^63, so hitting this error is going to be rare to impossible.
Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
Cc: <stable(a)vger.kernel.org> # v6.9+
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Reviewed-by: Yang Erkun <yangerkun(a)huawei.com>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/libfs.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/libfs.c b/fs/libfs.c
index 748ac5923154..3da58a92f48f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -292,8 +292,8 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
LONG_MAX, &octx->next_offset, GFP_KERNEL);
- if (ret < 0)
- return ret;
+ if (unlikely(ret < 0))
+ return ret == -EBUSY ? -ENOSPC : ret;
offset_set(dentry, offset);
return 0;
--
2.47.0
Hello,
Below commit was ported to 6.12, but I would like to request porting to the 6.6
longterm branch we are currently using:
commit c809b0d0e52d01c30066367b2952c4c4186b1047
Author: Borislav Petkov (AMD) <bp(a)alien8.de>
Date: 2024-11-19 12:21:33 +0100
x86/microcode/AMD: Flush patch buffer mapping after application
[...]
The patch itself is small, but the consequence of not patching is large on
affected systems (tens of seconds to minutes, of boot delay). See original
discussion [1] for details.
The patch in master relies on a variable 'bsp_cpuid_1_eax' introduced in commit
94838d230a6c ("x86/microcode/AMD: Use the family,model,stepping encoded in the
patch ID"), but porting that entire commit seems excessive, especially because
there are several 'Fixes' commits for that one (e.g. 5343558a868e, d1744a4c975b,
1d81d85d1a19).
I think the simplest prerequisite change is (for Borislav Petkov to confirm):
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index bbd1dc38ea03..555fa76bd1f3 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -96,6 +97,8 @@ struct cont_desc {
static u32 ucode_new_rev;
+static u32 bsp_cpuid_1_eax __ro_after_init;
+
/*
* Microcode patch container file is prepended to the initrd in cpio
* format. See Documentation/arch/x86/microcode.rst
@@ -551,6 +566,7 @@ static void apply_ucode_from_containers(unsigned int cpuid_1_eax)
void load_ucode_amd_early(unsigned int cpuid_1_eax)
{
+ bsp_cpuid_1_eax = cpuid_1_eax;
return apply_ucode_from_containers(cpuid_1_eax);
}
Thanks,
Thomas
[1] https://lore.kernel.org/lkml/ZyulbYuvrkshfsd2@antipodes/T/
On certain i.MX8 series parts [1], the PPS channel 0
is routed internally to eDMA, and the external PPS
pin is available on channel 1. In addition, on
certain boards, the PPS may be wired on the PCB to
an EVENTOUTn pin other than 0. On these systems
it is necessary that the PPS channel be able
to be configured from the Device Tree.
[1] https://lore.kernel.org/all/ZrPYOWA3FESx197L@lizhi-Precision-Tower-5810/
Now that these patches are applied to 6.12.y,
it is time to include it in the upcoming 6.6.67.
This series is picked onto 6.6.66.
Francesco Dolcini (3):
dt-bindings: net: fec: add pps channel property
net: fec: refactor PPS channel configuration
net: fec: make PPS channel configurable
Documentation/devicetree/bindings/net/fsl,fec.yaml | 7 +++++++
drivers/net/ethernet/freescale/fec_ptp.c | 11 ++++++-----
2 files changed, 13 insertions(+), 5 deletions(-)
--
2.34.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 7f05e20b989ac33c9c0f8c2028ec0a566493548f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122321-region-charger-ed44@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f05e20b989ac33c9c0f8c2028ec0a566493548f Mon Sep 17 00:00:00 2001
From: Andrea della Porta <andrea.porta(a)suse.com>
Date: Sun, 24 Nov 2024 11:05:37 +0100
Subject: [PATCH] of: address: Preserve the flags portion on 1:1 dma-ranges
mapping
A missing or empty dma-ranges in a DT node implies a 1:1 mapping for dma
translations. In this specific case, the current behaviour is to zero out
the entire specifier so that the translation could be carried on as an
offset from zero. This includes address specifier that has flags (e.g.
PCI ranges).
Once the flags portion has been zeroed, the translation chain is broken
since the mapping functions will check the upcoming address specifier
against mismatching flags, always failing the 1:1 mapping and its entire
purpose of always succeeding.
Set to zero only the address portion while passing the flags through.
Fixes: dbbdee94734b ("of/address: Merge all of the bus translation code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrea della Porta <andrea.porta(a)suse.com>
Tested-by: Herve Codina <herve.codina(a)bootlin.com>
Link: https://lore.kernel.org/r/e51ae57874e58a9b349c35e2e877425ebc075d7a.17324418…
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c5b925ac469f..5b7ee3ed5296 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -459,7 +459,8 @@ static int of_translate_one(const struct device_node *parent, const struct of_bu
}
if (ranges == NULL || rlen == 0) {
offset = of_read_number(addr, na);
- memset(addr, 0, pna * 4);
+ /* set address to zero, pass flags through */
+ memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4);
pr_debug("empty ranges; 1:1 translation\n");
goto finish;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 7f05e20b989ac33c9c0f8c2028ec0a566493548f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122321-uptake-awhile-44ab@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f05e20b989ac33c9c0f8c2028ec0a566493548f Mon Sep 17 00:00:00 2001
From: Andrea della Porta <andrea.porta(a)suse.com>
Date: Sun, 24 Nov 2024 11:05:37 +0100
Subject: [PATCH] of: address: Preserve the flags portion on 1:1 dma-ranges
mapping
A missing or empty dma-ranges in a DT node implies a 1:1 mapping for dma
translations. In this specific case, the current behaviour is to zero out
the entire specifier so that the translation could be carried on as an
offset from zero. This includes address specifier that has flags (e.g.
PCI ranges).
Once the flags portion has been zeroed, the translation chain is broken
since the mapping functions will check the upcoming address specifier
against mismatching flags, always failing the 1:1 mapping and its entire
purpose of always succeeding.
Set to zero only the address portion while passing the flags through.
Fixes: dbbdee94734b ("of/address: Merge all of the bus translation code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrea della Porta <andrea.porta(a)suse.com>
Tested-by: Herve Codina <herve.codina(a)bootlin.com>
Link: https://lore.kernel.org/r/e51ae57874e58a9b349c35e2e877425ebc075d7a.17324418…
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c5b925ac469f..5b7ee3ed5296 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -459,7 +459,8 @@ static int of_translate_one(const struct device_node *parent, const struct of_bu
}
if (ranges == NULL || rlen == 0) {
offset = of_read_number(addr, na);
- memset(addr, 0, pna * 4);
+ /* set address to zero, pass flags through */
+ memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4);
pr_debug("empty ranges; 1:1 translation\n");
goto finish;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7f05e20b989ac33c9c0f8c2028ec0a566493548f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122356-overdress-spoiler-5b36@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f05e20b989ac33c9c0f8c2028ec0a566493548f Mon Sep 17 00:00:00 2001
From: Andrea della Porta <andrea.porta(a)suse.com>
Date: Sun, 24 Nov 2024 11:05:37 +0100
Subject: [PATCH] of: address: Preserve the flags portion on 1:1 dma-ranges
mapping
A missing or empty dma-ranges in a DT node implies a 1:1 mapping for dma
translations. In this specific case, the current behaviour is to zero out
the entire specifier so that the translation could be carried on as an
offset from zero. This includes address specifier that has flags (e.g.
PCI ranges).
Once the flags portion has been zeroed, the translation chain is broken
since the mapping functions will check the upcoming address specifier
against mismatching flags, always failing the 1:1 mapping and its entire
purpose of always succeeding.
Set to zero only the address portion while passing the flags through.
Fixes: dbbdee94734b ("of/address: Merge all of the bus translation code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrea della Porta <andrea.porta(a)suse.com>
Tested-by: Herve Codina <herve.codina(a)bootlin.com>
Link: https://lore.kernel.org/r/e51ae57874e58a9b349c35e2e877425ebc075d7a.17324418…
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c5b925ac469f..5b7ee3ed5296 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -459,7 +459,8 @@ static int of_translate_one(const struct device_node *parent, const struct of_bu
}
if (ranges == NULL || rlen == 0) {
offset = of_read_number(addr, na);
- memset(addr, 0, pna * 4);
+ /* set address to zero, pass flags through */
+ memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4);
pr_debug("empty ranges; 1:1 translation\n");
goto finish;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 7f05e20b989ac33c9c0f8c2028ec0a566493548f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122356-zen-badland-29c9@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f05e20b989ac33c9c0f8c2028ec0a566493548f Mon Sep 17 00:00:00 2001
From: Andrea della Porta <andrea.porta(a)suse.com>
Date: Sun, 24 Nov 2024 11:05:37 +0100
Subject: [PATCH] of: address: Preserve the flags portion on 1:1 dma-ranges
mapping
A missing or empty dma-ranges in a DT node implies a 1:1 mapping for dma
translations. In this specific case, the current behaviour is to zero out
the entire specifier so that the translation could be carried on as an
offset from zero. This includes address specifier that has flags (e.g.
PCI ranges).
Once the flags portion has been zeroed, the translation chain is broken
since the mapping functions will check the upcoming address specifier
against mismatching flags, always failing the 1:1 mapping and its entire
purpose of always succeeding.
Set to zero only the address portion while passing the flags through.
Fixes: dbbdee94734b ("of/address: Merge all of the bus translation code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrea della Porta <andrea.porta(a)suse.com>
Tested-by: Herve Codina <herve.codina(a)bootlin.com>
Link: https://lore.kernel.org/r/e51ae57874e58a9b349c35e2e877425ebc075d7a.17324418…
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c5b925ac469f..5b7ee3ed5296 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -459,7 +459,8 @@ static int of_translate_one(const struct device_node *parent, const struct of_bu
}
if (ranges == NULL || rlen == 0) {
offset = of_read_number(addr, na);
- memset(addr, 0, pna * 4);
+ /* set address to zero, pass flags through */
+ memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4);
pr_debug("empty ranges; 1:1 translation\n");
goto finish;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 7f05e20b989ac33c9c0f8c2028ec0a566493548f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122347-fiscally-slacks-e686@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7f05e20b989ac33c9c0f8c2028ec0a566493548f Mon Sep 17 00:00:00 2001
From: Andrea della Porta <andrea.porta(a)suse.com>
Date: Sun, 24 Nov 2024 11:05:37 +0100
Subject: [PATCH] of: address: Preserve the flags portion on 1:1 dma-ranges
mapping
A missing or empty dma-ranges in a DT node implies a 1:1 mapping for dma
translations. In this specific case, the current behaviour is to zero out
the entire specifier so that the translation could be carried on as an
offset from zero. This includes address specifier that has flags (e.g.
PCI ranges).
Once the flags portion has been zeroed, the translation chain is broken
since the mapping functions will check the upcoming address specifier
against mismatching flags, always failing the 1:1 mapping and its entire
purpose of always succeeding.
Set to zero only the address portion while passing the flags through.
Fixes: dbbdee94734b ("of/address: Merge all of the bus translation code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrea della Porta <andrea.porta(a)suse.com>
Tested-by: Herve Codina <herve.codina(a)bootlin.com>
Link: https://lore.kernel.org/r/e51ae57874e58a9b349c35e2e877425ebc075d7a.17324418…
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/address.c b/drivers/of/address.c
index c5b925ac469f..5b7ee3ed5296 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -459,7 +459,8 @@ static int of_translate_one(const struct device_node *parent, const struct of_bu
}
if (ranges == NULL || rlen == 0) {
offset = of_read_number(addr, na);
- memset(addr, 0, pna * 4);
+ /* set address to zero, pass flags through */
+ memset(addr + pbus->flag_cells, 0, (pna - pbus->flag_cells) * 4);
pr_debug("empty ranges; 1:1 translation\n");
goto finish;
}
Hello,
Please consider applying the following patch to the 6.1.y stable tree.
Commit: 1ea1cd11c72d1405a6b98440a9d5ea82dfa07166
Subject: udf: Fix directory iteration for longer tail extents
Kernel Versions: 6.1.y
Reason for inclusion: This patch resolves a performance issue that slows
down directory traversal in UDF ISO images. The problem affects systems
running kernel version 6.1.y. The fix has been merged into the mainline
as commit 1ea1cd11c. No prep patches are needed.
Link to upstream commit:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--
Shedi
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 66e0c4f91461d17d48071695271c824620bed4ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122357-underuse-trousers-8f4d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 66e0c4f91461d17d48071695271c824620bed4ef Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov(a)gmail.com>
Date: Fri, 6 Dec 2024 17:32:59 +0100
Subject: [PATCH] ceph: fix memory leak in ceph_direct_read_write()
The bvecs array which is allocated in iter_get_bvecs_alloc() is leaked
and pages remain pinned if ceph_alloc_sparse_ext_map() fails.
There is no need to delay the allocation of sparse_ext map until after
the bvecs array is set up, so fix this by moving sparse_ext allocation
a bit earlier. Also, make a similar adjustment in __ceph_sync_read()
for consistency (a leak of the same kind in __ceph_sync_read() has been
addressed differently).
Cc: stable(a)vger.kernel.org
Fixes: 03bc06c7b0bd ("ceph: add new mount option to enable sparse reads")
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
Reviewed-by: Alex Markuze <amarkuze(a)redhat.com>
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8e0400d461a2..67468d88f139 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1116,6 +1116,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
len = read_off + read_len - off;
more = len < iov_iter_count(to);
+ op = &req->r_ops[0];
+ if (sparse) {
+ extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
+ ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ break;
+ }
+ }
+
num_pages = calc_pages_for(read_off, read_len);
page_off = offset_in_page(off);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
@@ -1129,16 +1139,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
offset_in_page(read_off),
false, true);
- op = &req->r_ops[0];
- if (sparse) {
- extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
- ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
- if (ret) {
- ceph_osdc_put_request(req);
- break;
- }
- }
-
ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
@@ -1551,6 +1551,16 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
break;
}
+ op = &req->r_ops[0];
+ if (sparse) {
+ extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+ ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ break;
+ }
+ }
+
len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
if (len < 0) {
ceph_osdc_put_request(req);
@@ -1560,6 +1570,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (len != size)
osd_req_op_extent_update(req, 0, len);
+ osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
+
/*
* To simplify error handling, allow AIO when IO within i_size
* or IO can be satisfied by single OSD request.
@@ -1591,17 +1603,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
req->r_mtime = mtime;
}
- osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
- op = &req->r_ops[0];
- if (sparse) {
- extent_cnt = __ceph_sparse_read_ext_count(inode, size);
- ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
- if (ret) {
- ceph_osdc_put_request(req);
- break;
- }
- }
-
if (aio_req) {
aio_req->total_len += len;
aio_req->num_reqs++;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x bc7acc0bd0f94c26bc0defc902311794a3d0fae9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122323-online-freemason-996f@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bc7acc0bd0f94c26bc0defc902311794a3d0fae9 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland(a)sifive.com>
Date: Wed, 20 Nov 2024 15:31:16 -0800
Subject: [PATCH] of: property: fw_devlink: Do not use interrupt-parent
directly
commit 7f00be96f125 ("of: property: Add device link support for
interrupt-parent, dmas and -gpio(s)") started adding device links for
the interrupt-parent property. commit 4104ca776ba3 ("of: property: Add
fw_devlink support for interrupts") and commit f265f06af194 ("of:
property: Fix fw_devlink handling of interrupts/interrupts-extended")
later added full support for parsing the interrupts and
interrupts-extended properties, which includes looking up the node of
the parent domain. This made the handler for the interrupt-parent
property redundant.
In fact, creating device links based solely on interrupt-parent is
problematic, because it can create spurious cycles. A node may have
this property without itself being an interrupt controller or consumer.
For example, this property is often present in the root node or a /soc
bus node to set the default interrupt parent for child nodes. However,
it is incorrect for the bus to depend on the interrupt controller, as
some of the bus's children may not be interrupt consumers at all or may
have a different interrupt parent.
Resolving these spurious dependency cycles can cause an incorrect probe
order for interrupt controller drivers. This was observed on a RISC-V
system with both an APLIC and IMSIC under /soc, where interrupt-parent
in /soc points to the APLIC, and the APLIC msi-parent points to the
IMSIC. fw_devlink found three dependency cycles and attempted to probe
the APLIC before the IMSIC. After applying this patch, there were no
dependency cycles and the probe order was correct.
Acked-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Fixes: 4104ca776ba3 ("of: property: Add fw_devlink support for interrupts")
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
Link: https://lore.kernel.org/r/20241120233124.3649382-1-samuel.holland@sifive.com
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 519bf9229e61..cfc8aea002e4 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1286,7 +1286,6 @@ DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells")
DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells")
DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells")
DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells")
-DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL)
DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells")
@@ -1432,7 +1431,6 @@ static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_mboxes, },
{ .parse_prop = parse_io_channels, },
{ .parse_prop = parse_io_backends, },
- { .parse_prop = parse_interrupt_parent, },
{ .parse_prop = parse_dmas, .optional = true, },
{ .parse_prop = parse_power_domains, },
{ .parse_prop = parse_hwlocks, },
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x bc7acc0bd0f94c26bc0defc902311794a3d0fae9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122322-tartly-crowbar-8f79@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bc7acc0bd0f94c26bc0defc902311794a3d0fae9 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland(a)sifive.com>
Date: Wed, 20 Nov 2024 15:31:16 -0800
Subject: [PATCH] of: property: fw_devlink: Do not use interrupt-parent
directly
commit 7f00be96f125 ("of: property: Add device link support for
interrupt-parent, dmas and -gpio(s)") started adding device links for
the interrupt-parent property. commit 4104ca776ba3 ("of: property: Add
fw_devlink support for interrupts") and commit f265f06af194 ("of:
property: Fix fw_devlink handling of interrupts/interrupts-extended")
later added full support for parsing the interrupts and
interrupts-extended properties, which includes looking up the node of
the parent domain. This made the handler for the interrupt-parent
property redundant.
In fact, creating device links based solely on interrupt-parent is
problematic, because it can create spurious cycles. A node may have
this property without itself being an interrupt controller or consumer.
For example, this property is often present in the root node or a /soc
bus node to set the default interrupt parent for child nodes. However,
it is incorrect for the bus to depend on the interrupt controller, as
some of the bus's children may not be interrupt consumers at all or may
have a different interrupt parent.
Resolving these spurious dependency cycles can cause an incorrect probe
order for interrupt controller drivers. This was observed on a RISC-V
system with both an APLIC and IMSIC under /soc, where interrupt-parent
in /soc points to the APLIC, and the APLIC msi-parent points to the
IMSIC. fw_devlink found three dependency cycles and attempted to probe
the APLIC before the IMSIC. After applying this patch, there were no
dependency cycles and the probe order was correct.
Acked-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Fixes: 4104ca776ba3 ("of: property: Add fw_devlink support for interrupts")
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
Link: https://lore.kernel.org/r/20241120233124.3649382-1-samuel.holland@sifive.com
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 519bf9229e61..cfc8aea002e4 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1286,7 +1286,6 @@ DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells")
DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells")
DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells")
DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells")
-DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL)
DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells")
@@ -1432,7 +1431,6 @@ static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_mboxes, },
{ .parse_prop = parse_io_channels, },
{ .parse_prop = parse_io_backends, },
- { .parse_prop = parse_interrupt_parent, },
{ .parse_prop = parse_dmas, .optional = true, },
{ .parse_prop = parse_power_domains, },
{ .parse_prop = parse_hwlocks, },
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x bc7acc0bd0f94c26bc0defc902311794a3d0fae9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122321-plug-barrel-3b60@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bc7acc0bd0f94c26bc0defc902311794a3d0fae9 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland(a)sifive.com>
Date: Wed, 20 Nov 2024 15:31:16 -0800
Subject: [PATCH] of: property: fw_devlink: Do not use interrupt-parent
directly
commit 7f00be96f125 ("of: property: Add device link support for
interrupt-parent, dmas and -gpio(s)") started adding device links for
the interrupt-parent property. commit 4104ca776ba3 ("of: property: Add
fw_devlink support for interrupts") and commit f265f06af194 ("of:
property: Fix fw_devlink handling of interrupts/interrupts-extended")
later added full support for parsing the interrupts and
interrupts-extended properties, which includes looking up the node of
the parent domain. This made the handler for the interrupt-parent
property redundant.
In fact, creating device links based solely on interrupt-parent is
problematic, because it can create spurious cycles. A node may have
this property without itself being an interrupt controller or consumer.
For example, this property is often present in the root node or a /soc
bus node to set the default interrupt parent for child nodes. However,
it is incorrect for the bus to depend on the interrupt controller, as
some of the bus's children may not be interrupt consumers at all or may
have a different interrupt parent.
Resolving these spurious dependency cycles can cause an incorrect probe
order for interrupt controller drivers. This was observed on a RISC-V
system with both an APLIC and IMSIC under /soc, where interrupt-parent
in /soc points to the APLIC, and the APLIC msi-parent points to the
IMSIC. fw_devlink found three dependency cycles and attempted to probe
the APLIC before the IMSIC. After applying this patch, there were no
dependency cycles and the probe order was correct.
Acked-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Fixes: 4104ca776ba3 ("of: property: Add fw_devlink support for interrupts")
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
Link: https://lore.kernel.org/r/20241120233124.3649382-1-samuel.holland@sifive.com
Signed-off-by: Rob Herring (Arm) <robh(a)kernel.org>
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 519bf9229e61..cfc8aea002e4 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1286,7 +1286,6 @@ DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells")
DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells")
DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells")
DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells")
-DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL)
DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells")
@@ -1432,7 +1431,6 @@ static const struct supplier_bindings of_supplier_bindings[] = {
{ .parse_prop = parse_mboxes, },
{ .parse_prop = parse_io_channels, },
{ .parse_prop = parse_io_backends, },
- { .parse_prop = parse_interrupt_parent, },
{ .parse_prop = parse_dmas, .optional = true, },
{ .parse_prop = parse_power_domains, },
{ .parse_prop = parse_hwlocks, },
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122349-unhinge-seduce-70ff@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 4 Dec 2024 17:26:19 +0100
Subject: [PATCH] udmabuf: fix racy memfd sealing check
The current check_memfd_seals() is racy: Since we first do
check_memfd_seals() and then udmabuf_pin_folios() without holding any
relevant lock across both, F_SEAL_WRITE can be set in between.
This is problematic because we can end up holding pins to pages in a
write-sealed memfd.
Fix it using the inode lock, that's probably the easiest way.
In the future, we might want to consider moving this logic into memfd,
especially if anyone else wants to use memfd_pin_folios().
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219106
Closes: https://lore.kernel.org/r/CAG48ez0w8HrFEZtJkfmkVKFDhE5aP7nz=obrimeTgpD+StkV…
Fixes: fbb0de795078 ("Add udmabuf misc device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Acked-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Acked-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241204-udmabuf-fixes-v2-1-2…
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 8ce1f074c2d3..c1d8c2766d6d 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -436,14 +436,19 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
}
+ /*
+ * Take the inode lock to protect against concurrent
+ * memfd_add_seals(), which takes this lock in write mode.
+ */
+ inode_lock_shared(file_inode(memfd));
ret = check_memfd_seals(memfd);
- if (ret < 0) {
- fput(memfd);
- goto err;
- }
+ if (ret)
+ goto out_unlock;
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
list[i].size, folios);
+out_unlock:
+ inode_unlock_shared(file_inode(memfd));
fput(memfd);
if (ret)
goto err;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122349-avalanche-wriggle-a30e@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 4 Dec 2024 17:26:19 +0100
Subject: [PATCH] udmabuf: fix racy memfd sealing check
The current check_memfd_seals() is racy: Since we first do
check_memfd_seals() and then udmabuf_pin_folios() without holding any
relevant lock across both, F_SEAL_WRITE can be set in between.
This is problematic because we can end up holding pins to pages in a
write-sealed memfd.
Fix it using the inode lock, that's probably the easiest way.
In the future, we might want to consider moving this logic into memfd,
especially if anyone else wants to use memfd_pin_folios().
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219106
Closes: https://lore.kernel.org/r/CAG48ez0w8HrFEZtJkfmkVKFDhE5aP7nz=obrimeTgpD+StkV…
Fixes: fbb0de795078 ("Add udmabuf misc device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Acked-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Acked-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241204-udmabuf-fixes-v2-1-2…
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 8ce1f074c2d3..c1d8c2766d6d 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -436,14 +436,19 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
}
+ /*
+ * Take the inode lock to protect against concurrent
+ * memfd_add_seals(), which takes this lock in write mode.
+ */
+ inode_lock_shared(file_inode(memfd));
ret = check_memfd_seals(memfd);
- if (ret < 0) {
- fput(memfd);
- goto err;
- }
+ if (ret)
+ goto out_unlock;
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
list[i].size, folios);
+out_unlock:
+ inode_unlock_shared(file_inode(memfd));
fput(memfd);
if (ret)
goto err;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122348-endeared-emptier-9ba3@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 4 Dec 2024 17:26:19 +0100
Subject: [PATCH] udmabuf: fix racy memfd sealing check
The current check_memfd_seals() is racy: Since we first do
check_memfd_seals() and then udmabuf_pin_folios() without holding any
relevant lock across both, F_SEAL_WRITE can be set in between.
This is problematic because we can end up holding pins to pages in a
write-sealed memfd.
Fix it using the inode lock, that's probably the easiest way.
In the future, we might want to consider moving this logic into memfd,
especially if anyone else wants to use memfd_pin_folios().
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219106
Closes: https://lore.kernel.org/r/CAG48ez0w8HrFEZtJkfmkVKFDhE5aP7nz=obrimeTgpD+StkV…
Fixes: fbb0de795078 ("Add udmabuf misc device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Acked-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Acked-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241204-udmabuf-fixes-v2-1-2…
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 8ce1f074c2d3..c1d8c2766d6d 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -436,14 +436,19 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
}
+ /*
+ * Take the inode lock to protect against concurrent
+ * memfd_add_seals(), which takes this lock in write mode.
+ */
+ inode_lock_shared(file_inode(memfd));
ret = check_memfd_seals(memfd);
- if (ret < 0) {
- fput(memfd);
- goto err;
- }
+ if (ret)
+ goto out_unlock;
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
list[i].size, folios);
+out_unlock:
+ inode_unlock_shared(file_inode(memfd));
fput(memfd);
if (ret)
goto err;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122346-scion-scored-b379@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 4 Dec 2024 17:26:19 +0100
Subject: [PATCH] udmabuf: fix racy memfd sealing check
The current check_memfd_seals() is racy: Since we first do
check_memfd_seals() and then udmabuf_pin_folios() without holding any
relevant lock across both, F_SEAL_WRITE can be set in between.
This is problematic because we can end up holding pins to pages in a
write-sealed memfd.
Fix it using the inode lock, that's probably the easiest way.
In the future, we might want to consider moving this logic into memfd,
especially if anyone else wants to use memfd_pin_folios().
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219106
Closes: https://lore.kernel.org/r/CAG48ez0w8HrFEZtJkfmkVKFDhE5aP7nz=obrimeTgpD+StkV…
Fixes: fbb0de795078 ("Add udmabuf misc device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Acked-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Acked-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241204-udmabuf-fixes-v2-1-2…
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 8ce1f074c2d3..c1d8c2766d6d 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -436,14 +436,19 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
}
+ /*
+ * Take the inode lock to protect against concurrent
+ * memfd_add_seals(), which takes this lock in write mode.
+ */
+ inode_lock_shared(file_inode(memfd));
ret = check_memfd_seals(memfd);
- if (ret < 0) {
- fput(memfd);
- goto err;
- }
+ if (ret)
+ goto out_unlock;
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
list[i].size, folios);
+out_unlock:
+ inode_unlock_shared(file_inode(memfd));
fput(memfd);
if (ret)
goto err;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122347-uncivil-decimal-1a11@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9cb189a882738c1d28b349d4e7c6a1ef9b3d8f87 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 4 Dec 2024 17:26:19 +0100
Subject: [PATCH] udmabuf: fix racy memfd sealing check
The current check_memfd_seals() is racy: Since we first do
check_memfd_seals() and then udmabuf_pin_folios() without holding any
relevant lock across both, F_SEAL_WRITE can be set in between.
This is problematic because we can end up holding pins to pages in a
write-sealed memfd.
Fix it using the inode lock, that's probably the easiest way.
In the future, we might want to consider moving this logic into memfd,
especially if anyone else wants to use memfd_pin_folios().
Reported-by: Julian Orth <ju.orth(a)gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219106
Closes: https://lore.kernel.org/r/CAG48ez0w8HrFEZtJkfmkVKFDhE5aP7nz=obrimeTgpD+StkV…
Fixes: fbb0de795078 ("Add udmabuf misc device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Acked-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Acked-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241204-udmabuf-fixes-v2-1-2…
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 8ce1f074c2d3..c1d8c2766d6d 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -436,14 +436,19 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
}
+ /*
+ * Take the inode lock to protect against concurrent
+ * memfd_add_seals(), which takes this lock in write mode.
+ */
+ inode_lock_shared(file_inode(memfd));
ret = check_memfd_seals(memfd);
- if (ret < 0) {
- fput(memfd);
- goto err;
- }
+ if (ret)
+ goto out_unlock;
ret = udmabuf_pin_folios(ubuf, memfd, list[i].offset,
list[i].size, folios);
+out_unlock:
+ inode_unlock_shared(file_inode(memfd));
fput(memfd);
if (ret)
goto err;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 6c3864e055486fadb5b97793b57688082e14b43b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122301-ouch-willfully-ae9c@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6c3864e055486fadb5b97793b57688082e14b43b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Mon, 4 Nov 2024 07:26:32 +0100
Subject: [PATCH] btrfs: use bio_is_zone_append() in the completion handler
Otherwise it won't catch bios turned into regular writes by the block
level zone write plugging. The additional test it adds is for emulated
zone append.
Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation")
CC: stable(a)vger.kernel.org # 6.12
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 1f216d07eff6..011cc97be3b5 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -355,7 +355,7 @@ static void btrfs_simple_end_io(struct bio *bio)
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+ if (bio_is_zone_append(bio) && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
}
@@ -398,7 +398,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
else
bio->bi_status = BLK_STS_OK;
- if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+ if (bio_is_zone_append(bio) && !bio->bi_status)
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
btrfs_bio_end_io(bbio, bbio->bio.bi_status);
@@ -412,7 +412,7 @@ static void btrfs_clone_write_end_io(struct bio *bio)
if (bio->bi_status) {
atomic_inc(&stripe->bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
- } else if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ } else if (bio_is_zone_append(bio)) {
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x dbd2ca9367eb19bc5e269b8c58b0b1514ada9156
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122314-twice-deuce-e49d@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dbd2ca9367eb19bc5e269b8c58b0b1514ada9156 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 19 Dec 2024 19:52:58 +0000
Subject: [PATCH] io_uring: check if iowq is killed before queuing
task work can be executed after the task has gone through io_uring
termination, whether it's the final task_work run or the fallback path.
In this case, task work will find ->io_wq being already killed and
null'ed, which is a problem if it then tries to forward the request to
io_queue_iowq(). Make io_queue_iowq() fail requests in this case.
Note that it also checks PF_KTHREAD, because the user can first close
a DEFER_TASKRUN ring and shortly after kill the task, in which case
->iowq check would race.
Cc: stable(a)vger.kernel.org
Fixes: 50c52250e2d74 ("block: implement async io_uring discard cmd")
Fixes: 773af69121ecc ("io_uring: always reissue from task_work context")
Reported-by: Will <willsroot(a)protonmail.com>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/63312b4a2c2bb67ad67b857d17a300e1d3b078e8.17346379…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 432b95ca9c85..d3403c8216db 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -514,7 +514,11 @@ static void io_queue_iowq(struct io_kiocb *req)
struct io_uring_task *tctx = req->tctx;
BUG_ON(!tctx);
- BUG_ON(!tctx->io_wq);
+
+ if ((current->flags & PF_KTHREAD) || !tctx->io_wq) {
+ io_req_task_queue_fail(req, -ECANCELED);
+ return;
+ }
/* init ->work of the whole link before punting */
io_prep_async_link(req);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x afd2627f727b89496d79a6b934a025fc916d4ded
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122317-espionage-overbite-d59e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From afd2627f727b89496d79a6b934a025fc916d4ded Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Mon, 16 Dec 2024 21:41:22 -0500
Subject: [PATCH] tracing: Check "%s" dereference via the field and not the
TP_printk format
The TP_printk() portion of a trace event is executed at the time a event
is read from the trace. This can happen seconds, minutes, hours, days,
months, years possibly later since the event was recorded. If the print
format contains a dereference to a string via "%s", and that string was
allocated, there's a chance that string could be freed before it is read
by the trace file.
To protect against such bugs, there are two functions that verify the
event. The first one is test_event_printk(), which is called when the
event is created. It reads the TP_printk() format as well as its arguments
to make sure nothing may be dereferencing a pointer that was not copied
into the ring buffer along with the event. If it is, it will trigger a
WARN_ON().
For strings that use "%s", it is not so easy. The string may not reside in
the ring buffer but may still be valid. Strings that are static and part
of the kernel proper which will not be freed for the life of the running
system, are safe to dereference. But to know if it is a pointer to a
static string or to something on the heap can not be determined until the
event is triggered.
This brings us to the second function that tests for the bad dereferencing
of strings, trace_check_vprintf(). It would walk through the printf format
looking for "%s", and when it finds it, it would validate that the pointer
is safe to read. If not, it would produces a WARN_ON() as well and write
into the ring buffer "[UNSAFE-MEMORY]".
The problem with this is how it used va_list to have vsnprintf() handle
all the cases that it didn't need to check. Instead of re-implementing
vsnprintf(), it would make a copy of the format up to the %s part, and
call vsnprintf() with the current va_list ap variable, where the ap would
then be ready to point at the string in question.
For architectures that passed va_list by reference this was possible. For
architectures that passed it by copy it was not. A test_can_verify()
function was used to differentiate between the two, and if it wasn't
possible, it would disable it.
Even for architectures where this was feasible, it was a stretch to rely
on such a method that is undocumented, and could cause issues later on
with new optimizations of the compiler.
Instead, the first function test_event_printk() was updated to look at
"%s" as well. If the "%s" argument is a pointer outside the event in the
ring buffer, it would find the field type of the event that is the problem
and mark the structure with a new flag called "needs_test". The event
itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that
this event has a field that needs to be verified before the event can be
printed using the printf format.
When the event fields are created from the field type structure, the
fields would copy the field type's "needs_test" value.
Finally, before being printed, a new function ignore_event() is called
which will check if the event has the TEST_STR flag set (if not, it
returns false). If the flag is set, it then iterates through the events
fields looking for the ones that have the "needs_test" flag set.
Then it uses the offset field from the field structure to find the pointer
in the ring buffer event. It runs the tests to make sure that pointer is
safe to print and if not, it triggers the WARN_ON() and also adds to the
trace output that the event in question has an unsafe memory access.
The ignore_event() makes the trace_check_vprintf() obsolete so it is
removed.
Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Al Viro <viro(a)ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Link: https://lore.kernel.org/20241217024720.848621576@goodmis.org
Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2a5df5b62cfc..91b8ffbdfa8c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -273,7 +273,8 @@ struct trace_event_fields {
const char *name;
const int size;
const int align;
- const int is_signed;
+ const unsigned int is_signed:1;
+ unsigned int needs_test:1;
const int filter_type;
const int len;
};
@@ -324,6 +325,7 @@ enum {
TRACE_EVENT_FL_EPROBE_BIT,
TRACE_EVENT_FL_FPROBE_BIT,
TRACE_EVENT_FL_CUSTOM_BIT,
+ TRACE_EVENT_FL_TEST_STR_BIT,
};
/*
@@ -340,6 +342,7 @@ enum {
* CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint)
* This is set when the custom event has not been attached
* to a tracepoint yet, then it is cleared when it is.
+ * TEST_STR - The event has a "%s" that points to a string outside the event
*/
enum {
TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
@@ -352,6 +355,7 @@ enum {
TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT),
TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT),
TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT),
+ TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..7cc18b9bce27 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3611,17 +3611,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
}
/* Returns true if the string is safe to dereference from an event */
-static bool trace_safe_str(struct trace_iterator *iter, const char *str,
- bool star, int len)
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
{
unsigned long addr = (unsigned long)str;
struct trace_event *trace_event;
struct trace_event_call *event;
- /* Ignore strings with no length */
- if (star && !len)
- return true;
-
/* OK if part of the event data */
if ((addr >= (unsigned long)iter->ent) &&
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3656,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
return false;
}
-static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
-
-static int test_can_verify_check(const char *fmt, ...)
-{
- char buf[16];
- va_list ap;
- int ret;
-
- /*
- * The verifier is dependent on vsnprintf() modifies the va_list
- * passed to it, where it is sent as a reference. Some architectures
- * (like x86_32) passes it by value, which means that vsnprintf()
- * does not modify the va_list passed to it, and the verifier
- * would then need to be able to understand all the values that
- * vsnprintf can use. If it is passed by value, then the verifier
- * is disabled.
- */
- va_start(ap, fmt);
- vsnprintf(buf, 16, "%d", ap);
- ret = va_arg(ap, int);
- va_end(ap);
-
- return ret;
-}
-
-static void test_can_verify(void)
-{
- if (!test_can_verify_check("%d %d", 0, 1)) {
- pr_info("trace event string verifier disabled\n");
- static_branch_inc(&trace_no_verify);
- }
-}
-
/**
- * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * ignore_event - Check dereferenced fields while writing to the seq buffer
* @iter: The iterator that holds the seq buffer and the event being printed
- * @fmt: The format used to print the event
- * @ap: The va_list holding the data to print from @fmt.
*
- * This writes the data into the @iter->seq buffer using the data from
- * @fmt and @ap. If the format has a %s, then the source of the string
- * is examined to make sure it is safe to print, otherwise it will
- * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
- * pointer.
+ * At boot up, test_event_printk() will flag any event that dereferences
+ * a string with "%s" that does exist in the ring buffer. It may still
+ * be valid, as the string may point to a static string in the kernel
+ * rodata that never gets freed. But if the string pointer is pointing
+ * to something that was allocated, there's a chance that it can be freed
+ * by the time the user reads the trace. This would cause a bad memory
+ * access by the kernel and possibly crash the system.
+ *
+ * This function will check if the event has any fields flagged as needing
+ * to be checked at runtime and perform those checks.
+ *
+ * If it is found that a field is unsafe, it will write into the @iter->seq
+ * a message stating what was found to be unsafe.
+ *
+ * @return: true if the event is unsafe and should be ignored,
+ * false otherwise.
*/
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap)
+bool ignore_event(struct trace_iterator *iter)
{
- long text_delta = 0;
- long data_delta = 0;
- const char *p = fmt;
- const char *str;
- bool good;
- int i, j;
+ struct ftrace_event_field *field;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+ struct list_head *head;
+ struct trace_seq *seq;
+ const void *ptr;
- if (WARN_ON_ONCE(!fmt))
- return;
+ trace_event = ftrace_find_event(iter->ent->type);
- if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ seq = &iter->seq;
- /*
- * When the kernel is booted with the tp_printk command line
- * parameter, trace events go directly through to printk().
- * It also is checked by this function, but it does not
- * have an associated trace_array (tr) for it.
- */
- if (iter->tr) {
- text_delta = iter->tr->text_delta;
- data_delta = iter->tr->data_delta;
+ if (!trace_event) {
+ trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
+ return true;
}
- /* Don't bother checking when doing a ftrace_dump() */
- if (iter->fmt == static_fmt_buf)
- goto print;
+ event = container_of(trace_event, struct trace_event_call, event);
+ if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
+ return false;
- while (*p) {
- bool star = false;
- int len = 0;
+ head = trace_get_fields(event);
+ if (!head) {
+ trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
+ trace_event_name(event));
+ return true;
+ }
- j = 0;
+ /* Offsets are from the iter->ent that points to the raw event */
+ ptr = iter->ent;
- /*
- * We only care about %s and variants
- * as well as %p[sS] if delta is non-zero
- */
- for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
+ list_for_each_entry(field, head, link) {
+ const char *str;
+ bool good;
- if (p[i] == '\\' && p[i+1]) {
- i++;
- continue;
- }
- if (p[i] == '%') {
- /* Need to test cases like %08.*s */
- for (j = 1; p[i+j]; j++) {
- if (isdigit(p[i+j]) ||
- p[i+j] == '.')
- continue;
- if (p[i+j] == '*') {
- star = true;
- continue;
- }
- break;
- }
- if (p[i+j] == 's')
- break;
-
- if (text_delta && p[i+1] == 'p' &&
- ((p[i+2] == 's' || p[i+2] == 'S')))
- break;
-
- star = false;
- }
- j = 0;
- }
- /* If no %s found then just print normally */
- if (!p[i])
- break;
-
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
-
- /* Add delta to %pS pointers */
- if (p[i+1] == 'p') {
- unsigned long addr;
- char fmt[4];
-
- fmt[0] = '%';
- fmt[1] = 'p';
- fmt[2] = p[i+2]; /* Either %ps or %pS */
- fmt[3] = '\0';
-
- addr = va_arg(ap, unsigned long);
- addr += text_delta;
- trace_seq_printf(&iter->seq, fmt, (void *)addr);
-
- p += i + 3;
+ if (!field->needs_test)
continue;
- }
- /*
- * If iter->seq is full, the above call no longer guarantees
- * that ap is in sync with fmt processing, and further calls
- * to va_arg() can return wrong positional arguments.
- *
- * Ensure that ap is no longer used in this case.
- */
- if (iter->seq.full) {
- p = "";
- break;
- }
+ str = *(const char **)(ptr + field->offset);
- if (star)
- len = va_arg(ap, int);
-
- /* The ap now points to the string data of the %s */
- str = va_arg(ap, const char *);
-
- good = trace_safe_str(iter, str, star, len);
-
- /* Could be from the last boot */
- if (data_delta && !good) {
- str += data_delta;
- good = trace_safe_str(iter, str, star, len);
- }
+ good = trace_safe_str(iter, str);
/*
* If you hit this warning, it is likely that the
@@ -3846,44 +3729,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* instead. See samples/trace_events/trace-events-sample.h
* for reference.
*/
- if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
- fmt, seq_buf_str(&iter->seq.seq))) {
- int ret;
-
- /* Try to safely read the string */
- if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
- } else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
- }
- if (ret < 0)
- trace_seq_printf(&iter->seq, "(0x%px)", str);
- else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
- } else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
+ trace_event_name(event), field->name)) {
+ trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
+ trace_event_name(event), field->name);
+ return true;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
-
- p += i + j + 1;
}
- print:
- if (*p)
- trace_seq_vprintf(&iter->seq, p, ap);
+ return false;
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -10777,8 +10630,6 @@ __init static int tracer_alloc_buffers(void)
register_snapshot_cmd();
- test_can_verify();
-
return 0;
out_free_pipe_cpumask:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 266740b4e121..9691b47b5f3d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,9 +667,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);
+bool ignore_event(struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -1413,7 +1412,8 @@ struct ftrace_event_field {
int filter_type;
int offset;
int size;
- int is_signed;
+ unsigned int is_signed:1;
+ unsigned int needs_test:1;
int len;
};
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 521ad2fd1fe7..1545cc8b49d0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system)
}
static struct ftrace_event_field *
-__find_event_field(struct list_head *head, char *name)
+__find_event_field(struct list_head *head, const char *name)
{
struct ftrace_event_field *field;
@@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type, int len)
+ int is_signed, int filter_type, int len,
+ int need_test)
{
struct ftrace_event_field *field;
@@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->needs_test = need_test;
field->len = len;
list_add(&field->link, head);
@@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, 0);
+ is_signed, filter_type, 0, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
static int trace_define_field_ext(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
- int filter_type, int len)
+ int filter_type, int len, int need_test)
{
struct list_head *head;
@@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, len);
+ is_signed, filter_type, len, need_test);
}
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type, 0); \
+ filter_type, 0, 0); \
if (ret) \
return ret;
@@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER, 0); \
+ is_signed_type(type), FILTER_OTHER, \
+ 0, 0); \
if (ret) \
return ret;
@@ -332,6 +335,7 @@ static bool process_pointer(const char *fmt, int len, struct trace_event_call *c
/* Return true if the string is safe */
static bool process_string(const char *fmt, int len, struct trace_event_call *call)
{
+ struct trace_event_fields *field;
const char *r, *e, *s;
e = fmt + len;
@@ -372,8 +376,16 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca
if (process_pointer(fmt, len, call))
return true;
- /* Make sure the field is found, and consider it OK for now if it is */
- return find_event_field(fmt, call) != NULL;
+ /* Make sure the field is found */
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* Test this field's string before printing the event */
+ call->flags |= TRACE_EVENT_FL_TEST_STR;
+ field->needs_test = 1;
+
+ return true;
}
/*
@@ -2586,7 +2598,7 @@ event_define_fields(struct trace_event_call *call)
ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type,
- field->len);
+ field->len, field->needs_test);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index da748b7cbc4d..03d56f711ad1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep);
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
{
+ struct trace_seq *s = &iter->seq;
va_list ap;
+ if (ignore_event(iter))
+ return;
+
va_start(ap, fmt);
- trace_check_vprintf(iter, trace_event_format(iter, fmt), ap);
+ trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
va_end(ap);
}
EXPORT_SYMBOL(trace_event_printf);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x afd2627f727b89496d79a6b934a025fc916d4ded
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122315-repeal-enforced-8d7c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From afd2627f727b89496d79a6b934a025fc916d4ded Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Mon, 16 Dec 2024 21:41:22 -0500
Subject: [PATCH] tracing: Check "%s" dereference via the field and not the
TP_printk format
The TP_printk() portion of a trace event is executed at the time a event
is read from the trace. This can happen seconds, minutes, hours, days,
months, years possibly later since the event was recorded. If the print
format contains a dereference to a string via "%s", and that string was
allocated, there's a chance that string could be freed before it is read
by the trace file.
To protect against such bugs, there are two functions that verify the
event. The first one is test_event_printk(), which is called when the
event is created. It reads the TP_printk() format as well as its arguments
to make sure nothing may be dereferencing a pointer that was not copied
into the ring buffer along with the event. If it is, it will trigger a
WARN_ON().
For strings that use "%s", it is not so easy. The string may not reside in
the ring buffer but may still be valid. Strings that are static and part
of the kernel proper which will not be freed for the life of the running
system, are safe to dereference. But to know if it is a pointer to a
static string or to something on the heap can not be determined until the
event is triggered.
This brings us to the second function that tests for the bad dereferencing
of strings, trace_check_vprintf(). It would walk through the printf format
looking for "%s", and when it finds it, it would validate that the pointer
is safe to read. If not, it would produces a WARN_ON() as well and write
into the ring buffer "[UNSAFE-MEMORY]".
The problem with this is how it used va_list to have vsnprintf() handle
all the cases that it didn't need to check. Instead of re-implementing
vsnprintf(), it would make a copy of the format up to the %s part, and
call vsnprintf() with the current va_list ap variable, where the ap would
then be ready to point at the string in question.
For architectures that passed va_list by reference this was possible. For
architectures that passed it by copy it was not. A test_can_verify()
function was used to differentiate between the two, and if it wasn't
possible, it would disable it.
Even for architectures where this was feasible, it was a stretch to rely
on such a method that is undocumented, and could cause issues later on
with new optimizations of the compiler.
Instead, the first function test_event_printk() was updated to look at
"%s" as well. If the "%s" argument is a pointer outside the event in the
ring buffer, it would find the field type of the event that is the problem
and mark the structure with a new flag called "needs_test". The event
itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that
this event has a field that needs to be verified before the event can be
printed using the printf format.
When the event fields are created from the field type structure, the
fields would copy the field type's "needs_test" value.
Finally, before being printed, a new function ignore_event() is called
which will check if the event has the TEST_STR flag set (if not, it
returns false). If the flag is set, it then iterates through the events
fields looking for the ones that have the "needs_test" flag set.
Then it uses the offset field from the field structure to find the pointer
in the ring buffer event. It runs the tests to make sure that pointer is
safe to print and if not, it triggers the WARN_ON() and also adds to the
trace output that the event in question has an unsafe memory access.
The ignore_event() makes the trace_check_vprintf() obsolete so it is
removed.
Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Al Viro <viro(a)ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Link: https://lore.kernel.org/20241217024720.848621576@goodmis.org
Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2a5df5b62cfc..91b8ffbdfa8c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -273,7 +273,8 @@ struct trace_event_fields {
const char *name;
const int size;
const int align;
- const int is_signed;
+ const unsigned int is_signed:1;
+ unsigned int needs_test:1;
const int filter_type;
const int len;
};
@@ -324,6 +325,7 @@ enum {
TRACE_EVENT_FL_EPROBE_BIT,
TRACE_EVENT_FL_FPROBE_BIT,
TRACE_EVENT_FL_CUSTOM_BIT,
+ TRACE_EVENT_FL_TEST_STR_BIT,
};
/*
@@ -340,6 +342,7 @@ enum {
* CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint)
* This is set when the custom event has not been attached
* to a tracepoint yet, then it is cleared when it is.
+ * TEST_STR - The event has a "%s" that points to a string outside the event
*/
enum {
TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
@@ -352,6 +355,7 @@ enum {
TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT),
TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT),
TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT),
+ TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..7cc18b9bce27 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3611,17 +3611,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
}
/* Returns true if the string is safe to dereference from an event */
-static bool trace_safe_str(struct trace_iterator *iter, const char *str,
- bool star, int len)
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
{
unsigned long addr = (unsigned long)str;
struct trace_event *trace_event;
struct trace_event_call *event;
- /* Ignore strings with no length */
- if (star && !len)
- return true;
-
/* OK if part of the event data */
if ((addr >= (unsigned long)iter->ent) &&
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3656,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
return false;
}
-static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
-
-static int test_can_verify_check(const char *fmt, ...)
-{
- char buf[16];
- va_list ap;
- int ret;
-
- /*
- * The verifier is dependent on vsnprintf() modifies the va_list
- * passed to it, where it is sent as a reference. Some architectures
- * (like x86_32) passes it by value, which means that vsnprintf()
- * does not modify the va_list passed to it, and the verifier
- * would then need to be able to understand all the values that
- * vsnprintf can use. If it is passed by value, then the verifier
- * is disabled.
- */
- va_start(ap, fmt);
- vsnprintf(buf, 16, "%d", ap);
- ret = va_arg(ap, int);
- va_end(ap);
-
- return ret;
-}
-
-static void test_can_verify(void)
-{
- if (!test_can_verify_check("%d %d", 0, 1)) {
- pr_info("trace event string verifier disabled\n");
- static_branch_inc(&trace_no_verify);
- }
-}
-
/**
- * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * ignore_event - Check dereferenced fields while writing to the seq buffer
* @iter: The iterator that holds the seq buffer and the event being printed
- * @fmt: The format used to print the event
- * @ap: The va_list holding the data to print from @fmt.
*
- * This writes the data into the @iter->seq buffer using the data from
- * @fmt and @ap. If the format has a %s, then the source of the string
- * is examined to make sure it is safe to print, otherwise it will
- * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
- * pointer.
+ * At boot up, test_event_printk() will flag any event that dereferences
+ * a string with "%s" that does exist in the ring buffer. It may still
+ * be valid, as the string may point to a static string in the kernel
+ * rodata that never gets freed. But if the string pointer is pointing
+ * to something that was allocated, there's a chance that it can be freed
+ * by the time the user reads the trace. This would cause a bad memory
+ * access by the kernel and possibly crash the system.
+ *
+ * This function will check if the event has any fields flagged as needing
+ * to be checked at runtime and perform those checks.
+ *
+ * If it is found that a field is unsafe, it will write into the @iter->seq
+ * a message stating what was found to be unsafe.
+ *
+ * @return: true if the event is unsafe and should be ignored,
+ * false otherwise.
*/
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap)
+bool ignore_event(struct trace_iterator *iter)
{
- long text_delta = 0;
- long data_delta = 0;
- const char *p = fmt;
- const char *str;
- bool good;
- int i, j;
+ struct ftrace_event_field *field;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+ struct list_head *head;
+ struct trace_seq *seq;
+ const void *ptr;
- if (WARN_ON_ONCE(!fmt))
- return;
+ trace_event = ftrace_find_event(iter->ent->type);
- if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ seq = &iter->seq;
- /*
- * When the kernel is booted with the tp_printk command line
- * parameter, trace events go directly through to printk().
- * It also is checked by this function, but it does not
- * have an associated trace_array (tr) for it.
- */
- if (iter->tr) {
- text_delta = iter->tr->text_delta;
- data_delta = iter->tr->data_delta;
+ if (!trace_event) {
+ trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
+ return true;
}
- /* Don't bother checking when doing a ftrace_dump() */
- if (iter->fmt == static_fmt_buf)
- goto print;
+ event = container_of(trace_event, struct trace_event_call, event);
+ if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
+ return false;
- while (*p) {
- bool star = false;
- int len = 0;
+ head = trace_get_fields(event);
+ if (!head) {
+ trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
+ trace_event_name(event));
+ return true;
+ }
- j = 0;
+ /* Offsets are from the iter->ent that points to the raw event */
+ ptr = iter->ent;
- /*
- * We only care about %s and variants
- * as well as %p[sS] if delta is non-zero
- */
- for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
+ list_for_each_entry(field, head, link) {
+ const char *str;
+ bool good;
- if (p[i] == '\\' && p[i+1]) {
- i++;
- continue;
- }
- if (p[i] == '%') {
- /* Need to test cases like %08.*s */
- for (j = 1; p[i+j]; j++) {
- if (isdigit(p[i+j]) ||
- p[i+j] == '.')
- continue;
- if (p[i+j] == '*') {
- star = true;
- continue;
- }
- break;
- }
- if (p[i+j] == 's')
- break;
-
- if (text_delta && p[i+1] == 'p' &&
- ((p[i+2] == 's' || p[i+2] == 'S')))
- break;
-
- star = false;
- }
- j = 0;
- }
- /* If no %s found then just print normally */
- if (!p[i])
- break;
-
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
-
- /* Add delta to %pS pointers */
- if (p[i+1] == 'p') {
- unsigned long addr;
- char fmt[4];
-
- fmt[0] = '%';
- fmt[1] = 'p';
- fmt[2] = p[i+2]; /* Either %ps or %pS */
- fmt[3] = '\0';
-
- addr = va_arg(ap, unsigned long);
- addr += text_delta;
- trace_seq_printf(&iter->seq, fmt, (void *)addr);
-
- p += i + 3;
+ if (!field->needs_test)
continue;
- }
- /*
- * If iter->seq is full, the above call no longer guarantees
- * that ap is in sync with fmt processing, and further calls
- * to va_arg() can return wrong positional arguments.
- *
- * Ensure that ap is no longer used in this case.
- */
- if (iter->seq.full) {
- p = "";
- break;
- }
+ str = *(const char **)(ptr + field->offset);
- if (star)
- len = va_arg(ap, int);
-
- /* The ap now points to the string data of the %s */
- str = va_arg(ap, const char *);
-
- good = trace_safe_str(iter, str, star, len);
-
- /* Could be from the last boot */
- if (data_delta && !good) {
- str += data_delta;
- good = trace_safe_str(iter, str, star, len);
- }
+ good = trace_safe_str(iter, str);
/*
* If you hit this warning, it is likely that the
@@ -3846,44 +3729,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* instead. See samples/trace_events/trace-events-sample.h
* for reference.
*/
- if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
- fmt, seq_buf_str(&iter->seq.seq))) {
- int ret;
-
- /* Try to safely read the string */
- if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
- } else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
- }
- if (ret < 0)
- trace_seq_printf(&iter->seq, "(0x%px)", str);
- else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
- } else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
+ trace_event_name(event), field->name)) {
+ trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
+ trace_event_name(event), field->name);
+ return true;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
-
- p += i + j + 1;
}
- print:
- if (*p)
- trace_seq_vprintf(&iter->seq, p, ap);
+ return false;
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -10777,8 +10630,6 @@ __init static int tracer_alloc_buffers(void)
register_snapshot_cmd();
- test_can_verify();
-
return 0;
out_free_pipe_cpumask:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 266740b4e121..9691b47b5f3d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,9 +667,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);
+bool ignore_event(struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -1413,7 +1412,8 @@ struct ftrace_event_field {
int filter_type;
int offset;
int size;
- int is_signed;
+ unsigned int is_signed:1;
+ unsigned int needs_test:1;
int len;
};
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 521ad2fd1fe7..1545cc8b49d0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system)
}
static struct ftrace_event_field *
-__find_event_field(struct list_head *head, char *name)
+__find_event_field(struct list_head *head, const char *name)
{
struct ftrace_event_field *field;
@@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type, int len)
+ int is_signed, int filter_type, int len,
+ int need_test)
{
struct ftrace_event_field *field;
@@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->needs_test = need_test;
field->len = len;
list_add(&field->link, head);
@@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, 0);
+ is_signed, filter_type, 0, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
static int trace_define_field_ext(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
- int filter_type, int len)
+ int filter_type, int len, int need_test)
{
struct list_head *head;
@@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, len);
+ is_signed, filter_type, len, need_test);
}
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type, 0); \
+ filter_type, 0, 0); \
if (ret) \
return ret;
@@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER, 0); \
+ is_signed_type(type), FILTER_OTHER, \
+ 0, 0); \
if (ret) \
return ret;
@@ -332,6 +335,7 @@ static bool process_pointer(const char *fmt, int len, struct trace_event_call *c
/* Return true if the string is safe */
static bool process_string(const char *fmt, int len, struct trace_event_call *call)
{
+ struct trace_event_fields *field;
const char *r, *e, *s;
e = fmt + len;
@@ -372,8 +376,16 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca
if (process_pointer(fmt, len, call))
return true;
- /* Make sure the field is found, and consider it OK for now if it is */
- return find_event_field(fmt, call) != NULL;
+ /* Make sure the field is found */
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* Test this field's string before printing the event */
+ call->flags |= TRACE_EVENT_FL_TEST_STR;
+ field->needs_test = 1;
+
+ return true;
}
/*
@@ -2586,7 +2598,7 @@ event_define_fields(struct trace_event_call *call)
ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type,
- field->len);
+ field->len, field->needs_test);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index da748b7cbc4d..03d56f711ad1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep);
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
{
+ struct trace_seq *s = &iter->seq;
va_list ap;
+ if (ignore_event(iter))
+ return;
+
va_start(ap, fmt);
- trace_check_vprintf(iter, trace_event_format(iter, fmt), ap);
+ trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
va_end(ap);
}
EXPORT_SYMBOL(trace_event_printf);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x afd2627f727b89496d79a6b934a025fc916d4ded
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024122308-scrunch-gag-a448@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From afd2627f727b89496d79a6b934a025fc916d4ded Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Mon, 16 Dec 2024 21:41:22 -0500
Subject: [PATCH] tracing: Check "%s" dereference via the field and not the
TP_printk format
The TP_printk() portion of a trace event is executed at the time a event
is read from the trace. This can happen seconds, minutes, hours, days,
months, years possibly later since the event was recorded. If the print
format contains a dereference to a string via "%s", and that string was
allocated, there's a chance that string could be freed before it is read
by the trace file.
To protect against such bugs, there are two functions that verify the
event. The first one is test_event_printk(), which is called when the
event is created. It reads the TP_printk() format as well as its arguments
to make sure nothing may be dereferencing a pointer that was not copied
into the ring buffer along with the event. If it is, it will trigger a
WARN_ON().
For strings that use "%s", it is not so easy. The string may not reside in
the ring buffer but may still be valid. Strings that are static and part
of the kernel proper which will not be freed for the life of the running
system, are safe to dereference. But to know if it is a pointer to a
static string or to something on the heap can not be determined until the
event is triggered.
This brings us to the second function that tests for the bad dereferencing
of strings, trace_check_vprintf(). It would walk through the printf format
looking for "%s", and when it finds it, it would validate that the pointer
is safe to read. If not, it would produces a WARN_ON() as well and write
into the ring buffer "[UNSAFE-MEMORY]".
The problem with this is how it used va_list to have vsnprintf() handle
all the cases that it didn't need to check. Instead of re-implementing
vsnprintf(), it would make a copy of the format up to the %s part, and
call vsnprintf() with the current va_list ap variable, where the ap would
then be ready to point at the string in question.
For architectures that passed va_list by reference this was possible. For
architectures that passed it by copy it was not. A test_can_verify()
function was used to differentiate between the two, and if it wasn't
possible, it would disable it.
Even for architectures where this was feasible, it was a stretch to rely
on such a method that is undocumented, and could cause issues later on
with new optimizations of the compiler.
Instead, the first function test_event_printk() was updated to look at
"%s" as well. If the "%s" argument is a pointer outside the event in the
ring buffer, it would find the field type of the event that is the problem
and mark the structure with a new flag called "needs_test". The event
itself will be marked by TRACE_EVENT_FL_TEST_STR to let it be known that
this event has a field that needs to be verified before the event can be
printed using the printf format.
When the event fields are created from the field type structure, the
fields would copy the field type's "needs_test" value.
Finally, before being printed, a new function ignore_event() is called
which will check if the event has the TEST_STR flag set (if not, it
returns false). If the flag is set, it then iterates through the events
fields looking for the ones that have the "needs_test" flag set.
Then it uses the offset field from the field structure to find the pointer
in the ring buffer event. It runs the tests to make sure that pointer is
safe to print and if not, it triggers the WARN_ON() and also adds to the
trace output that the event in question has an unsafe memory access.
The ignore_event() makes the trace_check_vprintf() obsolete so it is
removed.
Link: https://lore.kernel.org/all/CAHk-=wh3uOnqnZPpR0PeLZZtyWbZLboZ7cHLCKRWsocvs9…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Al Viro <viro(a)ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Link: https://lore.kernel.org/20241217024720.848621576@goodmis.org
Fixes: 5013f454a352c ("tracing: Add check of trace event print fmts for dereferencing pointers")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2a5df5b62cfc..91b8ffbdfa8c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -273,7 +273,8 @@ struct trace_event_fields {
const char *name;
const int size;
const int align;
- const int is_signed;
+ const unsigned int is_signed:1;
+ unsigned int needs_test:1;
const int filter_type;
const int len;
};
@@ -324,6 +325,7 @@ enum {
TRACE_EVENT_FL_EPROBE_BIT,
TRACE_EVENT_FL_FPROBE_BIT,
TRACE_EVENT_FL_CUSTOM_BIT,
+ TRACE_EVENT_FL_TEST_STR_BIT,
};
/*
@@ -340,6 +342,7 @@ enum {
* CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint)
* This is set when the custom event has not been attached
* to a tracepoint yet, then it is cleared when it is.
+ * TEST_STR - The event has a "%s" that points to a string outside the event
*/
enum {
TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
@@ -352,6 +355,7 @@ enum {
TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT),
TRACE_EVENT_FL_FPROBE = (1 << TRACE_EVENT_FL_FPROBE_BIT),
TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT),
+ TRACE_EVENT_FL_TEST_STR = (1 << TRACE_EVENT_FL_TEST_STR_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..7cc18b9bce27 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3611,17 +3611,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
}
/* Returns true if the string is safe to dereference from an event */
-static bool trace_safe_str(struct trace_iterator *iter, const char *str,
- bool star, int len)
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
{
unsigned long addr = (unsigned long)str;
struct trace_event *trace_event;
struct trace_event_call *event;
- /* Ignore strings with no length */
- if (star && !len)
- return true;
-
/* OK if part of the event data */
if ((addr >= (unsigned long)iter->ent) &&
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3656,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
return false;
}
-static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
-
-static int test_can_verify_check(const char *fmt, ...)
-{
- char buf[16];
- va_list ap;
- int ret;
-
- /*
- * The verifier is dependent on vsnprintf() modifies the va_list
- * passed to it, where it is sent as a reference. Some architectures
- * (like x86_32) passes it by value, which means that vsnprintf()
- * does not modify the va_list passed to it, and the verifier
- * would then need to be able to understand all the values that
- * vsnprintf can use. If it is passed by value, then the verifier
- * is disabled.
- */
- va_start(ap, fmt);
- vsnprintf(buf, 16, "%d", ap);
- ret = va_arg(ap, int);
- va_end(ap);
-
- return ret;
-}
-
-static void test_can_verify(void)
-{
- if (!test_can_verify_check("%d %d", 0, 1)) {
- pr_info("trace event string verifier disabled\n");
- static_branch_inc(&trace_no_verify);
- }
-}
-
/**
- * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * ignore_event - Check dereferenced fields while writing to the seq buffer
* @iter: The iterator that holds the seq buffer and the event being printed
- * @fmt: The format used to print the event
- * @ap: The va_list holding the data to print from @fmt.
*
- * This writes the data into the @iter->seq buffer using the data from
- * @fmt and @ap. If the format has a %s, then the source of the string
- * is examined to make sure it is safe to print, otherwise it will
- * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
- * pointer.
+ * At boot up, test_event_printk() will flag any event that dereferences
+ * a string with "%s" that does exist in the ring buffer. It may still
+ * be valid, as the string may point to a static string in the kernel
+ * rodata that never gets freed. But if the string pointer is pointing
+ * to something that was allocated, there's a chance that it can be freed
+ * by the time the user reads the trace. This would cause a bad memory
+ * access by the kernel and possibly crash the system.
+ *
+ * This function will check if the event has any fields flagged as needing
+ * to be checked at runtime and perform those checks.
+ *
+ * If it is found that a field is unsafe, it will write into the @iter->seq
+ * a message stating what was found to be unsafe.
+ *
+ * @return: true if the event is unsafe and should be ignored,
+ * false otherwise.
*/
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap)
+bool ignore_event(struct trace_iterator *iter)
{
- long text_delta = 0;
- long data_delta = 0;
- const char *p = fmt;
- const char *str;
- bool good;
- int i, j;
+ struct ftrace_event_field *field;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+ struct list_head *head;
+ struct trace_seq *seq;
+ const void *ptr;
- if (WARN_ON_ONCE(!fmt))
- return;
+ trace_event = ftrace_find_event(iter->ent->type);
- if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ seq = &iter->seq;
- /*
- * When the kernel is booted with the tp_printk command line
- * parameter, trace events go directly through to printk().
- * It also is checked by this function, but it does not
- * have an associated trace_array (tr) for it.
- */
- if (iter->tr) {
- text_delta = iter->tr->text_delta;
- data_delta = iter->tr->data_delta;
+ if (!trace_event) {
+ trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
+ return true;
}
- /* Don't bother checking when doing a ftrace_dump() */
- if (iter->fmt == static_fmt_buf)
- goto print;
+ event = container_of(trace_event, struct trace_event_call, event);
+ if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
+ return false;
- while (*p) {
- bool star = false;
- int len = 0;
+ head = trace_get_fields(event);
+ if (!head) {
+ trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
+ trace_event_name(event));
+ return true;
+ }
- j = 0;
+ /* Offsets are from the iter->ent that points to the raw event */
+ ptr = iter->ent;
- /*
- * We only care about %s and variants
- * as well as %p[sS] if delta is non-zero
- */
- for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
+ list_for_each_entry(field, head, link) {
+ const char *str;
+ bool good;
- if (p[i] == '\\' && p[i+1]) {
- i++;
- continue;
- }
- if (p[i] == '%') {
- /* Need to test cases like %08.*s */
- for (j = 1; p[i+j]; j++) {
- if (isdigit(p[i+j]) ||
- p[i+j] == '.')
- continue;
- if (p[i+j] == '*') {
- star = true;
- continue;
- }
- break;
- }
- if (p[i+j] == 's')
- break;
-
- if (text_delta && p[i+1] == 'p' &&
- ((p[i+2] == 's' || p[i+2] == 'S')))
- break;
-
- star = false;
- }
- j = 0;
- }
- /* If no %s found then just print normally */
- if (!p[i])
- break;
-
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
-
- /* Add delta to %pS pointers */
- if (p[i+1] == 'p') {
- unsigned long addr;
- char fmt[4];
-
- fmt[0] = '%';
- fmt[1] = 'p';
- fmt[2] = p[i+2]; /* Either %ps or %pS */
- fmt[3] = '\0';
-
- addr = va_arg(ap, unsigned long);
- addr += text_delta;
- trace_seq_printf(&iter->seq, fmt, (void *)addr);
-
- p += i + 3;
+ if (!field->needs_test)
continue;
- }
- /*
- * If iter->seq is full, the above call no longer guarantees
- * that ap is in sync with fmt processing, and further calls
- * to va_arg() can return wrong positional arguments.
- *
- * Ensure that ap is no longer used in this case.
- */
- if (iter->seq.full) {
- p = "";
- break;
- }
+ str = *(const char **)(ptr + field->offset);
- if (star)
- len = va_arg(ap, int);
-
- /* The ap now points to the string data of the %s */
- str = va_arg(ap, const char *);
-
- good = trace_safe_str(iter, str, star, len);
-
- /* Could be from the last boot */
- if (data_delta && !good) {
- str += data_delta;
- good = trace_safe_str(iter, str, star, len);
- }
+ good = trace_safe_str(iter, str);
/*
* If you hit this warning, it is likely that the
@@ -3846,44 +3729,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* instead. See samples/trace_events/trace-events-sample.h
* for reference.
*/
- if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
- fmt, seq_buf_str(&iter->seq.seq))) {
- int ret;
-
- /* Try to safely read the string */
- if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
- } else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
- }
- if (ret < 0)
- trace_seq_printf(&iter->seq, "(0x%px)", str);
- else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
- } else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
+ trace_event_name(event), field->name)) {
+ trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
+ trace_event_name(event), field->name);
+ return true;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
-
- p += i + j + 1;
}
- print:
- if (*p)
- trace_seq_vprintf(&iter->seq, p, ap);
+ return false;
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -10777,8 +10630,6 @@ __init static int tracer_alloc_buffers(void)
register_snapshot_cmd();
- test_can_verify();
-
return 0;
out_free_pipe_cpumask:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 266740b4e121..9691b47b5f3d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,9 +667,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);
+bool ignore_event(struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -1413,7 +1412,8 @@ struct ftrace_event_field {
int filter_type;
int offset;
int size;
- int is_signed;
+ unsigned int is_signed:1;
+ unsigned int needs_test:1;
int len;
};
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 521ad2fd1fe7..1545cc8b49d0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system)
}
static struct ftrace_event_field *
-__find_event_field(struct list_head *head, char *name)
+__find_event_field(struct list_head *head, const char *name)
{
struct ftrace_event_field *field;
@@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type, int len)
+ int is_signed, int filter_type, int len,
+ int need_test)
{
struct ftrace_event_field *field;
@@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->needs_test = need_test;
field->len = len;
list_add(&field->link, head);
@@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, 0);
+ is_signed, filter_type, 0, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
static int trace_define_field_ext(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
- int filter_type, int len)
+ int filter_type, int len, int need_test)
{
struct list_head *head;
@@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, len);
+ is_signed, filter_type, len, need_test);
}
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type, 0); \
+ filter_type, 0, 0); \
if (ret) \
return ret;
@@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER, 0); \
+ is_signed_type(type), FILTER_OTHER, \
+ 0, 0); \
if (ret) \
return ret;
@@ -332,6 +335,7 @@ static bool process_pointer(const char *fmt, int len, struct trace_event_call *c
/* Return true if the string is safe */
static bool process_string(const char *fmt, int len, struct trace_event_call *call)
{
+ struct trace_event_fields *field;
const char *r, *e, *s;
e = fmt + len;
@@ -372,8 +376,16 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca
if (process_pointer(fmt, len, call))
return true;
- /* Make sure the field is found, and consider it OK for now if it is */
- return find_event_field(fmt, call) != NULL;
+ /* Make sure the field is found */
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* Test this field's string before printing the event */
+ call->flags |= TRACE_EVENT_FL_TEST_STR;
+ field->needs_test = 1;
+
+ return true;
}
/*
@@ -2586,7 +2598,7 @@ event_define_fields(struct trace_event_call *call)
ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type,
- field->len);
+ field->len, field->needs_test);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index da748b7cbc4d..03d56f711ad1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep);
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
{
+ struct trace_seq *s = &iter->seq;
va_list ap;
+ if (ignore_event(iter))
+ return;
+
va_start(ap, fmt);
- trace_check_vprintf(iter, trace_event_format(iter, fmt), ap);
+ trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
va_end(ap);
}
EXPORT_SYMBOL(trace_event_printf);