On all platforms set_clock_selection() writes to a GRF register. This
requires certain clocks running and thus should happen before the
clocks are disabled.
This has been noticed on RK3576 Sige5, which hangs during system suspend
when trying to suspend the second network interface. Note, that
suspending the first interface works, because the second device ensures
that the necessary clocks for the GRF are enabled.
Cc: stable(a)vger.kernel.org
Fixes: 2f2b60a0ec28 ("net: ethernet: stmmac: dwmac-rk: Add gmac support for rk3588")
Signed-off-by: Sebastian Reichel <sebastian.reichel(a)collabora.com>
---
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 51ea0caf16c1..0786816e05f0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1446,14 +1446,15 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
}
} else {
if (bsp_priv->clk_enabled) {
+ if (bsp_priv->ops && bsp_priv->ops->set_clock_selection) {
+ bsp_priv->ops->set_clock_selection(bsp_priv,
+ bsp_priv->clock_input, false);
+ }
+
clk_bulk_disable_unprepare(bsp_priv->num_clks,
bsp_priv->clks);
clk_disable_unprepare(bsp_priv->clk_phy);
- if (bsp_priv->ops && bsp_priv->ops->set_clock_selection)
- bsp_priv->ops->set_clock_selection(bsp_priv,
- bsp_priv->clock_input, false);
-
bsp_priv->clk_enabled = false;
}
}
---
base-commit: 3a8660878839faadb4f1a6dd72c3179c1df56787
change-id: 20251014-rockchip-network-clock-fix-2c7069a6b6ec
Best regards,
--
Sebastian Reichel <sebastian.reichel(a)collabora.com>
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x f620d66af3165838bfa845dcf9f5f9b4089bf508
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101645-unplanned-pointy-70c8@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f620d66af3165838bfa845dcf9f5f9b4089bf508 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas(a)arm.com>
Date: Wed, 24 Sep 2025 13:31:22 +0100
Subject: [PATCH] arm64: mte: Do not flag the zero page as PG_mte_tagged
Commit 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the
zero page") attempted to fix ptrace() reading of tags from the zero page
by marking it as PG_mte_tagged during cpu_enable_mte(). The same commit
also changed the ptrace() tag access permission check to the VM_MTE vma
flag while turning the page flag test into a WARN_ON_ONCE().
Attempting to set the PG_mte_tagged flag early with
CONFIG_DEFERRED_STRUCT_PAGE_INIT enabled may either hang (after commit
d77e59a8fccd "arm64: mte: Lock a page for MTE tag initialisation") or
have the flags cleared later during page_alloc_init_late(). In addition,
pages_identical() -> memcmp_pages() will reject any comparison with the
zero page as it is marked as tagged.
Partially revert the above commit to avoid setting PG_mte_tagged on the
zero page. Update the __access_remote_tags() warning on untagged pages
to ignore the zero page since it is known to have the tags initialised.
Note that all user mapping of the zero page are marked as pte_special().
The arm64 set_pte_at() will not call mte_sync_tags() on such pages, so
PG_mte_tagged will remain cleared.
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
Fixes: 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the zero page")
Reported-by: Gergely Kovacs <Gergely.Kovacs2(a)arm.com>
Cc: stable(a)vger.kernel.org # 5.10.x
Cc: Will Deacon <will(a)kernel.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Lance Yang <lance.yang(a)linux.dev>
Acked-by: Lance Yang <lance.yang(a)linux.dev>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Lance Yang <lance.yang(a)linux.dev>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index ecb83ab0700e..7345987a50a0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2303,17 +2303,21 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
+ static bool cleared_zero_page = false;
+
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
mte_cpu_setup();
/*
* Clear the tags in the zero page. This needs to be done via the
- * linear map which has the Tagged attribute.
+ * linear map which has the Tagged attribute. Since this page is
+ * always mapped as pte_special(), set_pte_at() will not attempt to
+ * clear the tags or set PG_mte_tagged.
*/
- if (try_page_mte_tagging(ZERO_PAGE(0))) {
+ if (!cleared_zero_page) {
+ cleared_zero_page = true;
mte_clear_page_tags(lm_alias(empty_zero_page));
- set_page_mte_tagged(ZERO_PAGE(0));
}
kasan_init_hw_tags_cpu();
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index e5e773844889..63aed49ac181 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -460,7 +460,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
if (folio_test_hugetlb(folio))
WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
else
- WARN_ON_ONCE(!page_mte_tagged(page));
+ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x f620d66af3165838bfa845dcf9f5f9b4089bf508
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101645-blissful-snagged-feca@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f620d66af3165838bfa845dcf9f5f9b4089bf508 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas(a)arm.com>
Date: Wed, 24 Sep 2025 13:31:22 +0100
Subject: [PATCH] arm64: mte: Do not flag the zero page as PG_mte_tagged
Commit 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the
zero page") attempted to fix ptrace() reading of tags from the zero page
by marking it as PG_mte_tagged during cpu_enable_mte(). The same commit
also changed the ptrace() tag access permission check to the VM_MTE vma
flag while turning the page flag test into a WARN_ON_ONCE().
Attempting to set the PG_mte_tagged flag early with
CONFIG_DEFERRED_STRUCT_PAGE_INIT enabled may either hang (after commit
d77e59a8fccd "arm64: mte: Lock a page for MTE tag initialisation") or
have the flags cleared later during page_alloc_init_late(). In addition,
pages_identical() -> memcmp_pages() will reject any comparison with the
zero page as it is marked as tagged.
Partially revert the above commit to avoid setting PG_mte_tagged on the
zero page. Update the __access_remote_tags() warning on untagged pages
to ignore the zero page since it is known to have the tags initialised.
Note that all user mapping of the zero page are marked as pte_special().
The arm64 set_pte_at() will not call mte_sync_tags() on such pages, so
PG_mte_tagged will remain cleared.
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
Fixes: 68d54ceeec0e ("arm64: mte: Allow PTRACE_PEEKMTETAGS access to the zero page")
Reported-by: Gergely Kovacs <Gergely.Kovacs2(a)arm.com>
Cc: stable(a)vger.kernel.org # 5.10.x
Cc: Will Deacon <will(a)kernel.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Lance Yang <lance.yang(a)linux.dev>
Acked-by: Lance Yang <lance.yang(a)linux.dev>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Lance Yang <lance.yang(a)linux.dev>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index ecb83ab0700e..7345987a50a0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2303,17 +2303,21 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_MTE
static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
{
+ static bool cleared_zero_page = false;
+
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
mte_cpu_setup();
/*
* Clear the tags in the zero page. This needs to be done via the
- * linear map which has the Tagged attribute.
+ * linear map which has the Tagged attribute. Since this page is
+ * always mapped as pte_special(), set_pte_at() will not attempt to
+ * clear the tags or set PG_mte_tagged.
*/
- if (try_page_mte_tagging(ZERO_PAGE(0))) {
+ if (!cleared_zero_page) {
+ cleared_zero_page = true;
mte_clear_page_tags(lm_alias(empty_zero_page));
- set_page_mte_tagged(ZERO_PAGE(0));
}
kasan_init_hw_tags_cpu();
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index e5e773844889..63aed49ac181 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -460,7 +460,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
if (folio_test_hugetlb(folio))
WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
else
- WARN_ON_ONCE(!page_mte_tagged(page));
+ WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 0dccbc75e18df85399a71933d60b97494110f559
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101625-provoke-nutcase-6f67@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0dccbc75e18df85399a71933d60b97494110f559 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 27 Aug 2025 17:52:49 -0700
Subject: [PATCH] x86/kvm: Force legacy PCI hole to UC when overriding MTRRs
for TDX/SNP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When running as an SNP or TDX guest under KVM, force the legacy PCI hole,
i.e. memory between Top of Lower Usable DRAM and 4GiB, to be mapped as UC
via a forced variable MTRR range.
In most KVM-based setups, legacy devices such as the HPET and TPM are
enumerated via ACPI. ACPI enumeration includes a Memory32Fixed entry, and
optionally a SystemMemory descriptor for an OperationRegion, e.g. if the
device needs to be accessed via a Control Method.
If a SystemMemory entry is present, then the kernel's ACPI driver will
auto-ioremap the region so that it can be accessed at will. However, the
ACPI spec doesn't provide a way to enumerate the memory type of
SystemMemory regions, i.e. there's no way to tell software that a region
must be mapped as UC vs. WB, etc. As a result, Linux's ACPI driver always
maps SystemMemory regions using ioremap_cache(), i.e. as WB on x86.
The dedicated device drivers however, e.g. the HPET driver and TPM driver,
want to map their associated memory as UC or WC, as accessing PCI devices
using WB is unsupported.
On bare metal and non-CoCO, the conflicting requirements "work" as firmware
configures the PCI hole (and other device memory) to be UC in the MTRRs.
So even though the ACPI mappings request WB, they are forced to UC- in the
kernel's tracking due to the kernel properly handling the MTRR overrides,
and thus are compatible with the drivers' requested WC/UC-.
With force WB MTRRs on SNP and TDX guests, the ACPI mappings get their
requested WB if the ACPI mappings are established before the dedicated
driver code attempts to initialize the device. E.g. if acpi_init()
runs before the corresponding device driver is probed, ACPI's WB mapping
will "win", and result in the driver's ioremap() failing because the
existing WB mapping isn't compatible with the requested WC/UC-.
E.g. when a TPM is emulated by the hypervisor (ignoring the security
implications of relying on what is allegedly an untrusted entity to store
measurements), the TPM driver will request UC and fail:
[ 1.730459] ioremap error for 0xfed40000-0xfed45000, requested 0x2, got 0x0
[ 1.732780] tpm_tis MSFT0101:00: probe with driver tpm_tis failed with error -12
Note, the '0x2' and '0x0' values refer to "enum page_cache_mode", not x86's
memtypes (which frustratingly are an almost pure inversion; 2 == WB, 0 == UC).
E.g. tracing mapping requests for TPM TIS yields:
Mapping TPM TIS with req_type = 0
WARNING: CPU: 22 PID: 1 at arch/x86/mm/pat/memtype.c:530 memtype_reserve+0x2ab/0x460
Modules linked in:
CPU: 22 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.16.0-rc7+ #2 VOLUNTARY
Tainted: [W]=WARN
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/29/2025
RIP: 0010:memtype_reserve+0x2ab/0x460
__ioremap_caller+0x16d/0x3d0
ioremap_cache+0x17/0x30
x86_acpi_os_ioremap+0xe/0x20
acpi_os_map_iomem+0x1f3/0x240
acpi_os_map_memory+0xe/0x20
acpi_ex_system_memory_space_handler+0x273/0x440
acpi_ev_address_space_dispatch+0x176/0x4c0
acpi_ex_access_region+0x2ad/0x530
acpi_ex_field_datum_io+0xa2/0x4f0
acpi_ex_extract_from_field+0x296/0x3e0
acpi_ex_read_data_from_field+0xd1/0x460
acpi_ex_resolve_node_to_value+0x2ee/0x530
acpi_ex_resolve_to_value+0x1f2/0x540
acpi_ds_evaluate_name_path+0x11b/0x190
acpi_ds_exec_end_op+0x456/0x960
acpi_ps_parse_loop+0x27a/0xa50
acpi_ps_parse_aml+0x226/0x600
acpi_ps_execute_method+0x172/0x3e0
acpi_ns_evaluate+0x175/0x5f0
acpi_evaluate_object+0x213/0x490
acpi_evaluate_integer+0x6d/0x140
acpi_bus_get_status+0x93/0x150
acpi_add_single_object+0x43a/0x7c0
acpi_bus_check_add+0x149/0x3a0
acpi_bus_check_add_1+0x16/0x30
acpi_ns_walk_namespace+0x22c/0x360
acpi_walk_namespace+0x15c/0x170
acpi_bus_scan+0x1dd/0x200
acpi_scan_init+0xe5/0x2b0
acpi_init+0x264/0x5b0
do_one_initcall+0x5a/0x310
kernel_init_freeable+0x34f/0x4f0
kernel_init+0x1b/0x200
ret_from_fork+0x186/0x1b0
ret_from_fork_asm+0x1a/0x30
</TASK>
The above traces are from a Google-VMM based VM, but the same behavior
happens with a QEMU based VM that is modified to add a SystemMemory range
for the TPM TIS address space.
The only reason this doesn't cause problems for HPET, which appears to
require a SystemMemory region, is because HPET gets special treatment via
x86_init.timers.timer_init(), and so gets a chance to create its UC-
mapping before acpi_init() clobbers things. Disabling the early call to
hpet_time_init() yields the same behavior for HPET:
[ 0.318264] ioremap error for 0xfed00000-0xfed01000, requested 0x2, got 0x0
Hack around the ACPI gap by forcing the legacy PCI hole to UC when
overriding the (virtual) MTRRs for CoCo guest, so that ioremap handling
of MTRRs naturally kicks in and forces the ACPI mappings to be UC.
Note, the requested/mapped memtype doesn't actually matter in terms of
accessing the device. In practically every setup, legacy PCI devices are
emulated by the hypervisor, and accesses are intercepted and handled as
emulated MMIO, i.e. never access physical memory and thus don't have an
effective memtype.
Even in a theoretical setup where such devices are passed through by the
host, i.e. point at real MMIO memory, it is KVM's (as the hypervisor)
responsibility to force the memory to be WC/UC, e.g. via EPT memtype
under TDX or real hardware MTRRs under SNP. Not doing so cannot work,
and the hypervisor is highly motivated to do the right thing as letting
the guest access hardware MMIO with WB would likely result in a variety
of fatal #MCs.
In other words, forcing the range to be UC is all about coercing the
kernel's tracking into thinking that it has established UC mappings, so
that the ioremap code doesn't reject mappings from e.g. the TPM driver and
thus prevent the driver from loading and the device from functioning.
Note #2, relying on guest firmware to handle this scenario, e.g. by setting
virtual MTRRs and then consuming them in Linux, is not a viable option, as
the virtual MTRR state is managed by the untrusted hypervisor, and because
OVMF at least has stopped programming virtual MTRRs when running as a TDX
guest.
Link: https://lore.kernel.org/all/8137d98e-8825-415b-9282-1d2a115bb51a@linux.inte…
Fixes: 8e690b817e38 ("x86/kvm: Override default caching mode for SEV-SNP and TDX")
Cc: stable(a)vger.kernel.org
Cc: Peter Gonda <pgonda(a)google.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: Jürgen Groß <jgross(a)suse.com>
Cc: Korakit Seemakhupt <korakit(a)google.com>
Cc: Jianxiong Gao <jxgao(a)google.com>
Cc: Nikolay Borisov <nik.borisov(a)suse.com>
Suggested-by: Binbin Wu <binbin.wu(a)linux.intel.com>
Reviewed-by: Binbin Wu <binbin.wu(a)linux.intel.com>
Tested-by: Korakit Seemakhupt <korakit(a)google.com>
Link: https://lore.kernel.org/r/20250828005249.39339-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8ae750cde0c6..57379698015e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -933,6 +933,19 @@ static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
static void __init kvm_init_platform(void)
{
+ u64 tolud = PFN_PHYS(e820__end_of_low_ram_pfn());
+ /*
+ * Note, hardware requires variable MTRR ranges to be power-of-2 sized
+ * and naturally aligned. But when forcing guest MTRR state, Linux
+ * doesn't program the forced ranges into hardware. Don't bother doing
+ * the math to generate a technically-legal range.
+ */
+ struct mtrr_var_range pci_hole = {
+ .base_lo = tolud | X86_MEMTYPE_UC,
+ .mask_lo = (u32)(~(SZ_4G - tolud - 1)) | MTRR_PHYSMASK_V,
+ .mask_hi = (BIT_ULL(boot_cpu_data.x86_phys_bits) - 1) >> 32,
+ };
+
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
unsigned long nr_pages;
@@ -982,8 +995,12 @@ static void __init kvm_init_platform(void)
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
- /* Set WB as the default cache mode for SEV-SNP and TDX */
- guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
+ /*
+ * Set WB as the default cache mode for SEV-SNP and TDX, with a single
+ * UC range for the legacy PCI hole, e.g. so that devices that expect
+ * to get UC/WC mappings don't get surprised with WB.
+ */
+ guest_force_mtrr_state(&pci_hole, 1, MTRR_TYPE_WRBACK);
}
#if defined(CONFIG_AMD_MEM_ENCRYPT)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 316294bb6695a43a9181973ecd4e6fb3e576a9f7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101653-armory-oxygen-af78@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 316294bb6695a43a9181973ecd4e6fb3e576a9f7 Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan.gerhold(a)linaro.org>
Date: Thu, 21 Aug 2025 10:15:09 +0200
Subject: [PATCH] arm64: dts: qcom: sdm845: Fix slimbam num-channels/ees
Reading the hardware registers of the &slimbam on RB3 reveals that the BAM
supports only 23 pipes (channels) and supports 4 EEs instead of 2. This
hasn't caused problems so far since nothing is using the extra channels,
but attempting to use them would lead to crashes.
The bam_dma driver might warn in the future if the num-channels in the DT
are wrong, so correct the properties in the DT to avoid future regressions.
Cc: stable(a)vger.kernel.org
Fixes: 27ca1de07dc3 ("arm64: dts: qcom: sdm845: add slimbus nodes")
Signed-off-by: Stephan Gerhold <stephan.gerhold(a)linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250821-sdm845-slimbam-channels-v1-1-498f7d46b9e…
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 828b55cb6baf..02536114edb8 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -5396,11 +5396,11 @@ slimbam: dma-controller@17184000 {
compatible = "qcom,bam-v1.7.4", "qcom,bam-v1.7.0";
qcom,controlled-remotely;
reg = <0 0x17184000 0 0x2a000>;
- num-channels = <31>;
+ num-channels = <23>;
interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
#dma-cells = <1>;
qcom,ee = <1>;
- qcom,num-ees = <2>;
+ qcom,num-ees = <4>;
iommus = <&apps_smmu 0x1806 0x0>;
};
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 316294bb6695a43a9181973ecd4e6fb3e576a9f7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101653-gray-unbroken-db61@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 316294bb6695a43a9181973ecd4e6fb3e576a9f7 Mon Sep 17 00:00:00 2001
From: Stephan Gerhold <stephan.gerhold(a)linaro.org>
Date: Thu, 21 Aug 2025 10:15:09 +0200
Subject: [PATCH] arm64: dts: qcom: sdm845: Fix slimbam num-channels/ees
Reading the hardware registers of the &slimbam on RB3 reveals that the BAM
supports only 23 pipes (channels) and supports 4 EEs instead of 2. This
hasn't caused problems so far since nothing is using the extra channels,
but attempting to use them would lead to crashes.
The bam_dma driver might warn in the future if the num-channels in the DT
are wrong, so correct the properties in the DT to avoid future regressions.
Cc: stable(a)vger.kernel.org
Fixes: 27ca1de07dc3 ("arm64: dts: qcom: sdm845: add slimbus nodes")
Signed-off-by: Stephan Gerhold <stephan.gerhold(a)linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov(a)oss.qualcomm.com>
Link: https://lore.kernel.org/r/20250821-sdm845-slimbam-channels-v1-1-498f7d46b9e…
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 828b55cb6baf..02536114edb8 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -5396,11 +5396,11 @@ slimbam: dma-controller@17184000 {
compatible = "qcom,bam-v1.7.4", "qcom,bam-v1.7.0";
qcom,controlled-remotely;
reg = <0 0x17184000 0 0x2a000>;
- num-channels = <31>;
+ num-channels = <23>;
interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
#dma-cells = <1>;
qcom,ee = <1>;
- qcom,num-ees = <2>;
+ qcom,num-ees = <4>;
iommus = <&apps_smmu 0x1806 0x0>;
};
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101649-bride-landmark-1121@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Mon, 15 Sep 2025 20:28:52 +0200
Subject: [PATCH] ACPI: property: Do not pass NULL handles to
acpi_attach_data()
In certain circumstances, the ACPI handle of a data-only node may be
NULL, in which case it does not make sense to attempt to attach that
node to an ACPI namespace object, so update the code to avoid attempts
to do so.
This prevents confusing and unuseful error messages from being printed.
Also document the fact that the ACPI handle of a data-only node may be
NULL and when that happens in a code comment. In addition, make
acpi_add_nondev_subnodes() print a diagnostic message for each data-only
node with an unknown ACPI namespace scope.
Fixes: 1d52f10917a7 ("ACPI: property: Tie data nodes to acpi handles")
Cc: 6.0+ <stable(a)vger.kernel.org> # 6.0+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Tested-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index f4776a4085e0..c086786fe84c 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -124,6 +124,10 @@ static bool acpi_nondev_subnode_extract(union acpi_object *desc,
result = true;
if (result) {
+ /*
+ * This will be NULL if the desc package is embedded in an outer
+ * _DSD-equivalent package and its scope cannot be determined.
+ */
dn->handle = handle;
dn->data.pointer = desc;
list_add_tail(&dn->sibling, list);
@@ -224,6 +228,8 @@ static bool acpi_add_nondev_subnodes(acpi_handle scope,
* strings because there is no way to build full
* pathnames out of them.
*/
+ acpi_handle_debug(scope, "subnode %s: Unknown scope\n",
+ link->package.elements[0].string.pointer);
desc = &link->package.elements[1];
result = acpi_nondev_subnode_extract(desc, NULL, link,
list, parent);
@@ -396,6 +402,9 @@ static void acpi_untie_nondev_subnodes(struct acpi_device_data *data)
struct acpi_data_node *dn;
list_for_each_entry(dn, &data->subnodes, sibling) {
+ if (!dn->handle)
+ continue;
+
acpi_detach_data(dn->handle, acpi_nondev_subnode_tag);
acpi_untie_nondev_subnodes(&dn->data);
@@ -410,6 +419,9 @@ static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data)
acpi_status status;
bool ret;
+ if (!dn->handle)
+ continue;
+
status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn);
if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
acpi_handle_err(dn->handle, "Can't tag data node\n");
Hi,
As discussed in [1], here are the backports of the s390 BPF tail
counter fixes. Commits 1-3 are NFC prerequisites, commits 4-5 are
fixes themselves.
[1] https://lore.kernel.org/stable/CA+G9fYsdErtgqKuyPfFhMS9haGKavBVCHQnipv2EeXM…
Best regards,
Ilya
Ilya Leoshkevich (5):
s390/bpf: Change seen_reg to a mask
s390/bpf: Centralize frame offset calculations
s390/bpf: Describe the frame using a struct instead of constants
s390/bpf: Write back tail call counter for BPF_PSEUDO_CALL
s390/bpf: Write back tail call counter for BPF_TRAMP_F_CALL_ORIG
arch/s390/net/bpf_jit.h | 55 ------------
arch/s390/net/bpf_jit_comp.c | 161 +++++++++++++++++++++--------------
2 files changed, 97 insertions(+), 119 deletions(-)
delete mode 100644 arch/s390/net/bpf_jit.h
--
2.51.0
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 195a1b7d8388c0ec2969a39324feb8bebf9bb907
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101658-gladiator-reheat-eca3@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 195a1b7d8388c0ec2969a39324feb8bebf9bb907 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang(a)os.amperecomputing.com>
Date: Thu, 18 Sep 2025 09:23:49 -0700
Subject: [PATCH] arm64: kprobes: call set_memory_rox() for kprobe page
The kprobe page is allocated by execmem allocator with ROX permission.
It needs to call set_memory_rox() to set proper permission for the
direct map too. It was missed.
Fixes: 10d5e97c1bf8 ("arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com>
Reviewed-by: Catalin Marinas <catalin.marinas(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 0c5d408afd95..8ab6104a4883 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "kprobes: " fmt
+#include <linux/execmem.h>
#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -41,6 +42,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+void *alloc_insn_page(void)
+{
+ void *addr;
+
+ addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+ set_memory_rox((unsigned long)addr, 1);
+ return addr;
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
kprobe_opcode_t *addr = p->ainsn.xol_insn;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101648-surely-manhunt-fac9@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Mon, 15 Sep 2025 20:28:52 +0200
Subject: [PATCH] ACPI: property: Do not pass NULL handles to
acpi_attach_data()
In certain circumstances, the ACPI handle of a data-only node may be
NULL, in which case it does not make sense to attempt to attach that
node to an ACPI namespace object, so update the code to avoid attempts
to do so.
This prevents confusing and unuseful error messages from being printed.
Also document the fact that the ACPI handle of a data-only node may be
NULL and when that happens in a code comment. In addition, make
acpi_add_nondev_subnodes() print a diagnostic message for each data-only
node with an unknown ACPI namespace scope.
Fixes: 1d52f10917a7 ("ACPI: property: Tie data nodes to acpi handles")
Cc: 6.0+ <stable(a)vger.kernel.org> # 6.0+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Tested-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index f4776a4085e0..c086786fe84c 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -124,6 +124,10 @@ static bool acpi_nondev_subnode_extract(union acpi_object *desc,
result = true;
if (result) {
+ /*
+ * This will be NULL if the desc package is embedded in an outer
+ * _DSD-equivalent package and its scope cannot be determined.
+ */
dn->handle = handle;
dn->data.pointer = desc;
list_add_tail(&dn->sibling, list);
@@ -224,6 +228,8 @@ static bool acpi_add_nondev_subnodes(acpi_handle scope,
* strings because there is no way to build full
* pathnames out of them.
*/
+ acpi_handle_debug(scope, "subnode %s: Unknown scope\n",
+ link->package.elements[0].string.pointer);
desc = &link->package.elements[1];
result = acpi_nondev_subnode_extract(desc, NULL, link,
list, parent);
@@ -396,6 +402,9 @@ static void acpi_untie_nondev_subnodes(struct acpi_device_data *data)
struct acpi_data_node *dn;
list_for_each_entry(dn, &data->subnodes, sibling) {
+ if (!dn->handle)
+ continue;
+
acpi_detach_data(dn->handle, acpi_nondev_subnode_tag);
acpi_untie_nondev_subnodes(&dn->data);
@@ -410,6 +419,9 @@ static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data)
acpi_status status;
bool ret;
+ if (!dn->handle)
+ continue;
+
status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn);
if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
acpi_handle_err(dn->handle, "Can't tag data node\n");
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101647-handiwork-catcall-487b@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Mon, 15 Sep 2025 20:28:52 +0200
Subject: [PATCH] ACPI: property: Do not pass NULL handles to
acpi_attach_data()
In certain circumstances, the ACPI handle of a data-only node may be
NULL, in which case it does not make sense to attempt to attach that
node to an ACPI namespace object, so update the code to avoid attempts
to do so.
This prevents confusing and unuseful error messages from being printed.
Also document the fact that the ACPI handle of a data-only node may be
NULL and when that happens in a code comment. In addition, make
acpi_add_nondev_subnodes() print a diagnostic message for each data-only
node with an unknown ACPI namespace scope.
Fixes: 1d52f10917a7 ("ACPI: property: Tie data nodes to acpi handles")
Cc: 6.0+ <stable(a)vger.kernel.org> # 6.0+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Tested-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index f4776a4085e0..c086786fe84c 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -124,6 +124,10 @@ static bool acpi_nondev_subnode_extract(union acpi_object *desc,
result = true;
if (result) {
+ /*
+ * This will be NULL if the desc package is embedded in an outer
+ * _DSD-equivalent package and its scope cannot be determined.
+ */
dn->handle = handle;
dn->data.pointer = desc;
list_add_tail(&dn->sibling, list);
@@ -224,6 +228,8 @@ static bool acpi_add_nondev_subnodes(acpi_handle scope,
* strings because there is no way to build full
* pathnames out of them.
*/
+ acpi_handle_debug(scope, "subnode %s: Unknown scope\n",
+ link->package.elements[0].string.pointer);
desc = &link->package.elements[1];
result = acpi_nondev_subnode_extract(desc, NULL, link,
list, parent);
@@ -396,6 +402,9 @@ static void acpi_untie_nondev_subnodes(struct acpi_device_data *data)
struct acpi_data_node *dn;
list_for_each_entry(dn, &data->subnodes, sibling) {
+ if (!dn->handle)
+ continue;
+
acpi_detach_data(dn->handle, acpi_nondev_subnode_tag);
acpi_untie_nondev_subnodes(&dn->data);
@@ -410,6 +419,9 @@ static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data)
acpi_status status;
bool ret;
+ if (!dn->handle)
+ continue;
+
status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn);
if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
acpi_handle_err(dn->handle, "Can't tag data node\n");
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 4b1ff850e0c1aacc23e923ed22989b827b9808f9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101650-setback-proofs-9687@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4b1ff850e0c1aacc23e923ed22989b827b9808f9 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Thu, 25 Sep 2025 12:32:36 +0200
Subject: [PATCH] mptcp: pm: in-kernel: usable client side with C-flag
When servers set the C-flag in their MP_CAPABLE to tell clients not to
create subflows to the initial address and port, clients will likely not
use their other endpoints. That's because the in-kernel path-manager
uses the 'subflow' endpoints to create subflows only to the initial
address and port.
If the limits have not been modified to accept ADD_ADDR, the client
doesn't try to establish new subflows. If the limits accept ADD_ADDR,
the routing routes will be used to select the source IP.
The C-flag is typically set when the server is operating behind a legacy
Layer 4 load balancer, or using anycast IP address. Clients having their
different 'subflow' endpoints setup, don't end up creating multiple
subflows as expected, and causing some deployment issues.
A special case is then added here: when servers set the C-flag in the
MPC and directly sends an ADD_ADDR, this single ADD_ADDR is accepted.
The 'subflows' endpoints will then be used with this new remote IP and
port. This exception is only allowed when the ADD_ADDR is sent
immediately after the 3WHS, and makes the client switching to the 'fully
established' mode. After that, 'select_local_address()' will not be able
to find any subflows, because 'id_avail_bitmap' will be filled in
mptcp_pm_create_subflow_or_signal_addr(), when switching to 'fully
established' mode.
Fixes: df377be38725 ("mptcp: add deny_join_id0 in mptcp_options_received")
Cc: stable(a)vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/536
Reviewed-by: Geliang Tang <geliang(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20250925-net-next-mptcp-c-flag-laminar-v1-1-ad126c…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 204e1f61212e..584cab90aa6e 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -637,9 +637,12 @@ void mptcp_pm_add_addr_received(const struct sock *ssk,
} else {
__MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
}
- /* id0 should not have a different address */
+ /* - id0 should not have a different address
+ * - special case for C-flag: linked to fill_local_addresses_vec()
+ */
} else if ((addr->id == 0 && !mptcp_pm_is_init_remote_addr(msk, addr)) ||
- (addr->id > 0 && !READ_ONCE(pm->accept_addr))) {
+ (addr->id > 0 && !READ_ONCE(pm->accept_addr) &&
+ !mptcp_pm_add_addr_c_flag_case(msk))) {
mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c
index 667803d72b64..8c46493a0835 100644
--- a/net/mptcp/pm_kernel.c
+++ b/net/mptcp/pm_kernel.c
@@ -389,10 +389,12 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info mpc_addr;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
+ bool c_flag_case;
int i = 0;
pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
+ c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk);
mptcp_local_address((struct sock_common *)msk, &mpc_addr);
@@ -405,12 +407,27 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
continue;
if (msk->pm.subflows < subflows_max) {
+ bool is_id0;
+
locals[i].addr = entry->addr;
locals[i].flags = entry->flags;
locals[i].ifindex = entry->ifindex;
+ is_id0 = mptcp_addresses_equal(&locals[i].addr,
+ &mpc_addr,
+ locals[i].addr.port);
+
+ if (c_flag_case &&
+ (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) {
+ __clear_bit(locals[i].addr.id,
+ msk->pm.id_avail_bitmap);
+
+ if (!is_id0)
+ msk->pm.local_addr_used++;
+ }
+
/* Special case for ID0: set the correct ID */
- if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port))
+ if (is_id0)
locals[i].addr.id = 0;
msk->pm.subflows++;
@@ -419,6 +436,37 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
}
rcu_read_unlock();
+ /* Special case: peer sets the C flag, accept one ADD_ADDR if default
+ * limits are used -- accepting no ADD_ADDR -- and use subflow endpoints
+ */
+ if (!i && c_flag_case) {
+ unsigned int local_addr_max = mptcp_pm_get_local_addr_max(msk);
+
+ while (msk->pm.local_addr_used < local_addr_max &&
+ msk->pm.subflows < subflows_max) {
+ struct mptcp_pm_local *local = &locals[i];
+
+ if (!select_local_address(pernet, msk, local))
+ break;
+
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+
+ if (!mptcp_pm_addr_families_match(sk, &local->addr,
+ remote))
+ continue;
+
+ if (mptcp_addresses_equal(&local->addr, &mpc_addr,
+ local->addr.port))
+ continue;
+
+ msk->pm.local_addr_used++;
+ msk->pm.subflows++;
+ i++;
+ }
+
+ return i;
+ }
+
/* If the array is empty, fill in the single
* 'IPADDRANY' local address
*/
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a1787a1344ac..cbe54331e5c7 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -1199,6 +1199,14 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
+static inline bool mptcp_pm_add_addr_c_flag_case(struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.remote_deny_join_id0) &&
+ msk->pm.local_addr_used == 0 &&
+ mptcp_pm_get_add_addr_accept_max(msk) == 0 &&
+ msk->pm.subflows < mptcp_pm_get_subflows_max(msk);
+}
+
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101647-crushable-wiring-b094@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From baf60d5cb8bc6b85511c5df5f0ad7620bb66d23c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Mon, 15 Sep 2025 20:28:52 +0200
Subject: [PATCH] ACPI: property: Do not pass NULL handles to
acpi_attach_data()
In certain circumstances, the ACPI handle of a data-only node may be
NULL, in which case it does not make sense to attempt to attach that
node to an ACPI namespace object, so update the code to avoid attempts
to do so.
This prevents confusing and unuseful error messages from being printed.
Also document the fact that the ACPI handle of a data-only node may be
NULL and when that happens in a code comment. In addition, make
acpi_add_nondev_subnodes() print a diagnostic message for each data-only
node with an unknown ACPI namespace scope.
Fixes: 1d52f10917a7 ("ACPI: property: Tie data nodes to acpi handles")
Cc: 6.0+ <stable(a)vger.kernel.org> # 6.0+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
Tested-by: Sakari Ailus <sakari.ailus(a)linux.intel.com>
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index f4776a4085e0..c086786fe84c 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -124,6 +124,10 @@ static bool acpi_nondev_subnode_extract(union acpi_object *desc,
result = true;
if (result) {
+ /*
+ * This will be NULL if the desc package is embedded in an outer
+ * _DSD-equivalent package and its scope cannot be determined.
+ */
dn->handle = handle;
dn->data.pointer = desc;
list_add_tail(&dn->sibling, list);
@@ -224,6 +228,8 @@ static bool acpi_add_nondev_subnodes(acpi_handle scope,
* strings because there is no way to build full
* pathnames out of them.
*/
+ acpi_handle_debug(scope, "subnode %s: Unknown scope\n",
+ link->package.elements[0].string.pointer);
desc = &link->package.elements[1];
result = acpi_nondev_subnode_extract(desc, NULL, link,
list, parent);
@@ -396,6 +402,9 @@ static void acpi_untie_nondev_subnodes(struct acpi_device_data *data)
struct acpi_data_node *dn;
list_for_each_entry(dn, &data->subnodes, sibling) {
+ if (!dn->handle)
+ continue;
+
acpi_detach_data(dn->handle, acpi_nondev_subnode_tag);
acpi_untie_nondev_subnodes(&dn->data);
@@ -410,6 +419,9 @@ static bool acpi_tie_nondev_subnodes(struct acpi_device_data *data)
acpi_status status;
bool ret;
+ if (!dn->handle)
+ continue;
+
status = acpi_attach_data(dn->handle, acpi_nondev_subnode_tag, dn);
if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
acpi_handle_err(dn->handle, "Can't tag data node\n");
Hi, Sasha,
On Wed, Oct 15, 2025 at 7:42 PM Sasha Levin <sashal(a)kernel.org> wrote:
>
> This is a note to let you know that I've just added the patch titled
>
> LoongArch: Init acpi_gbl_use_global_lock to false
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> loongarch-init-acpi_gbl_use_global_lock-to-false.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
Please also backport feb8ae81b2378b75a99c81d3156 ("ACPICA: Allow to
skip Global Lock initialization") as a dependency for all stable
branches.
Huacai
>
>
> commit abbcba805d220623909699cbeb81b739d3dd5a06
> Author: Huacai Chen <chenhuacai(a)kernel.org>
> Date: Thu Oct 2 22:38:57 2025 +0800
>
> LoongArch: Init acpi_gbl_use_global_lock to false
>
> [ Upstream commit 98662be7ef20d2b88b598f72e7ce9b6ac26a40f9 ]
>
> Init acpi_gbl_use_global_lock to false, in order to void error messages
> during boot phase:
>
> ACPI Error: Could not enable GlobalLock event (20240827/evxfevnt-182)
> ACPI Error: No response from Global Lock hardware, disabling lock (20240827/evglock-59)
>
> Fixes: 628c3bb40e9a8cefc0a6 ("LoongArch: Add boot and setup routines")
> Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
> index 1fa6a604734ef..2ceb198ae8c80 100644
> --- a/arch/loongarch/kernel/setup.c
> +++ b/arch/loongarch/kernel/setup.c
> @@ -354,6 +354,7 @@ void __init platform_init(void)
>
> #ifdef CONFIG_ACPI
> acpi_table_upgrade();
> + acpi_gbl_use_global_lock = false;
> acpi_gbl_use_default_register_widths = false;
> acpi_boot_table_init();
> #endif
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8ecb790ea8c3fc69e77bace57f14cf0d7c177bd8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101634-energize-tartness-d7b6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8ecb790ea8c3fc69e77bace57f14cf0d7c177bd8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso(a)mit.edu>
Date: Tue, 16 Sep 2025 23:22:47 -0400
Subject: [PATCH] ext4: avoid potential buffer over-read in
parse_apply_sb_mount_options()
Unlike other strings in the ext4 superblock, we rely on tune2fs to
make sure s_mount_opts is NUL terminated. Harden
parse_apply_sb_mount_options() by treating s_mount_opts as a potential
__nonstring.
Cc: stable(a)vger.kernel.org
Fixes: 8b67f04ab9de ("ext4: Add mount options in superblock")
Reviewed-by: Jan Kara <jack(a)suse.cz>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Message-ID: <20250916-tune2fs-v2-1-d594dc7486f0(a)mit.edu>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d26e5c0731e5..488f4c281a3f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2469,7 +2469,7 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
struct ext4_fs_context *m_ctx)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *s_mount_opts = NULL;
+ char s_mount_opts[65];
struct ext4_fs_context *s_ctx = NULL;
struct fs_context *fc = NULL;
int ret = -ENOMEM;
@@ -2477,15 +2477,11 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
if (!sbi->s_es->s_mount_opts[0])
return 0;
- s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
- sizeof(sbi->s_es->s_mount_opts),
- GFP_KERNEL);
- if (!s_mount_opts)
- return ret;
+ strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
- goto out_free;
+ return -ENOMEM;
s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
if (!s_ctx)
@@ -2517,11 +2513,8 @@ static int parse_apply_sb_mount_options(struct super_block *sb,
ret = 0;
out_free:
- if (fc) {
- ext4_fc_free(fc);
- kfree(fc);
- }
- kfree(s_mount_opts);
+ ext4_fc_free(fc);
+ kfree(fc);
return ret;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 399dbcadc01ebf0035f325eaa8c264f8b5cd0a14
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101635-disaster-pasture-f32d@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399dbcadc01ebf0035f325eaa8c264f8b5cd0a14 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Sun, 28 Sep 2025 12:18:29 +0200
Subject: [PATCH] ACPI: battery: Add synchronization between interface updates
There is no synchronization between different code paths in the ACPI
battery driver that update its sysfs interface or its power supply
class device interface. In some cases this results to functional
failures due to race conditions.
One example of this is when two ACPI notifications:
- ACPI_BATTERY_NOTIFY_STATUS (0x80)
- ACPI_BATTERY_NOTIFY_INFO (0x81)
are triggered (by the platform firmware) in a row with a little delay
in between after removing and reinserting a laptop battery. Both
notifications cause acpi_battery_update() to be called and if the delay
between them is sufficiently small, sysfs_add_battery() can be re-entered
before battery->bat is set which leads to a duplicate sysfs entry error:
sysfs: cannot create duplicate filename '/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0C0A:00/power_supply/BAT1'
CPU: 1 UID: 0 PID: 185 Comm: kworker/1:4 Kdump: loaded Not tainted 6.12.38+deb13-amd64 #1 Debian 6.12.38-1
Hardware name: Gateway NV44 /SJV40-MV , BIOS V1.3121 04/08/2009
Workqueue: kacpi_notify acpi_os_execute_deferred
Call Trace:
<TASK>
dump_stack_lvl+0x5d/0x80
sysfs_warn_dup.cold+0x17/0x23
sysfs_create_dir_ns+0xce/0xe0
kobject_add_internal+0xba/0x250
kobject_add+0x96/0xc0
? get_device_parent+0xde/0x1e0
device_add+0xe2/0x870
__power_supply_register.part.0+0x20f/0x3f0
? wake_up_q+0x4e/0x90
sysfs_add_battery+0xa4/0x1d0 [battery]
acpi_battery_update+0x19e/0x290 [battery]
acpi_battery_notify+0x50/0x120 [battery]
acpi_ev_notify_dispatch+0x49/0x70
acpi_os_execute_deferred+0x1a/0x30
process_one_work+0x177/0x330
worker_thread+0x251/0x390
? __pfx_worker_thread+0x10/0x10
kthread+0xd2/0x100
? __pfx_kthread+0x10/0x10
ret_from_fork+0x34/0x50
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1a/0x30
</TASK>
kobject: kobject_add_internal failed for BAT1 with -EEXIST, don't try to register things with the same name in the same directory.
There are also other scenarios in which analogous issues may occur.
Address this by using a common lock in all of the code paths leading
to updates of driver interfaces: ACPI Notify () handler, system resume
callback and post-resume notification, device addition and removal.
This new lock replaces sysfs_lock that has been used only in
sysfs_remove_battery() which now is going to be always called under
the new lock, so it doesn't need any internal locking any more.
Fixes: 10666251554c ("ACPI: battery: Install Notify() handler directly")
Closes: https://lore.kernel.org/linux-acpi/20250910142653.313360-1-luogf2025@163.co…
Reported-by: GuangFei Luo <luogf2025(a)163.com>
Tested-by: GuangFei Luo <luogf2025(a)163.com>
Cc: 6.6+ <stable(a)vger.kernel.org> # 6.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 6905b56bf3e4..67b76492c839 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -92,7 +92,7 @@ enum {
struct acpi_battery {
struct mutex lock;
- struct mutex sysfs_lock;
+ struct mutex update_lock;
struct power_supply *bat;
struct power_supply_desc bat_desc;
struct acpi_device *device;
@@ -904,15 +904,12 @@ static int sysfs_add_battery(struct acpi_battery *battery)
static void sysfs_remove_battery(struct acpi_battery *battery)
{
- mutex_lock(&battery->sysfs_lock);
- if (!battery->bat) {
- mutex_unlock(&battery->sysfs_lock);
+ if (!battery->bat)
return;
- }
+
battery_hook_remove_battery(battery);
power_supply_unregister(battery->bat);
battery->bat = NULL;
- mutex_unlock(&battery->sysfs_lock);
}
static void find_battery(const struct dmi_header *dm, void *private)
@@ -1072,6 +1069,9 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
if (!battery)
return;
+
+ guard(mutex)(&battery->update_lock);
+
old = battery->bat;
/*
* On Acer Aspire V5-573G notifications are sometimes triggered too
@@ -1094,21 +1094,22 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
}
static int battery_notify(struct notifier_block *nb,
- unsigned long mode, void *_unused)
+ unsigned long mode, void *_unused)
{
struct acpi_battery *battery = container_of(nb, struct acpi_battery,
pm_nb);
- int result;
- switch (mode) {
- case PM_POST_HIBERNATION:
- case PM_POST_SUSPEND:
+ if (mode == PM_POST_SUSPEND || mode == PM_POST_HIBERNATION) {
+ guard(mutex)(&battery->update_lock);
+
if (!acpi_battery_present(battery))
return 0;
if (battery->bat) {
acpi_battery_refresh(battery);
} else {
+ int result;
+
result = acpi_battery_get_info(battery);
if (result)
return result;
@@ -1120,7 +1121,6 @@ static int battery_notify(struct notifier_block *nb,
acpi_battery_init_alarm(battery);
acpi_battery_get_state(battery);
- break;
}
return 0;
@@ -1198,6 +1198,8 @@ static int acpi_battery_update_retry(struct acpi_battery *battery)
{
int retry, ret;
+ guard(mutex)(&battery->update_lock);
+
for (retry = 5; retry; retry--) {
ret = acpi_battery_update(battery, false);
if (!ret)
@@ -1208,6 +1210,13 @@ static int acpi_battery_update_retry(struct acpi_battery *battery)
return ret;
}
+static void sysfs_battery_cleanup(struct acpi_battery *battery)
+{
+ guard(mutex)(&battery->update_lock);
+
+ sysfs_remove_battery(battery);
+}
+
static int acpi_battery_add(struct acpi_device *device)
{
int result = 0;
@@ -1230,7 +1239,7 @@ static int acpi_battery_add(struct acpi_device *device)
if (result)
return result;
- result = devm_mutex_init(&device->dev, &battery->sysfs_lock);
+ result = devm_mutex_init(&device->dev, &battery->update_lock);
if (result)
return result;
@@ -1262,7 +1271,7 @@ static int acpi_battery_add(struct acpi_device *device)
device_init_wakeup(&device->dev, 0);
unregister_pm_notifier(&battery->pm_nb);
fail:
- sysfs_remove_battery(battery);
+ sysfs_battery_cleanup(battery);
return result;
}
@@ -1281,6 +1290,9 @@ static void acpi_battery_remove(struct acpi_device *device)
device_init_wakeup(&device->dev, 0);
unregister_pm_notifier(&battery->pm_nb);
+
+ guard(mutex)(&battery->update_lock);
+
sysfs_remove_battery(battery);
}
@@ -1297,6 +1309,9 @@ static int acpi_battery_resume(struct device *dev)
return -EINVAL;
battery->update_time = 0;
+
+ guard(mutex)(&battery->update_lock);
+
acpi_battery_update(battery, true);
return 0;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 399dbcadc01ebf0035f325eaa8c264f8b5cd0a14
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101634-factsheet-preplan-069e@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399dbcadc01ebf0035f325eaa8c264f8b5cd0a14 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Sun, 28 Sep 2025 12:18:29 +0200
Subject: [PATCH] ACPI: battery: Add synchronization between interface updates
There is no synchronization between different code paths in the ACPI
battery driver that update its sysfs interface or its power supply
class device interface. In some cases this results to functional
failures due to race conditions.
One example of this is when two ACPI notifications:
- ACPI_BATTERY_NOTIFY_STATUS (0x80)
- ACPI_BATTERY_NOTIFY_INFO (0x81)
are triggered (by the platform firmware) in a row with a little delay
in between after removing and reinserting a laptop battery. Both
notifications cause acpi_battery_update() to be called and if the delay
between them is sufficiently small, sysfs_add_battery() can be re-entered
before battery->bat is set which leads to a duplicate sysfs entry error:
sysfs: cannot create duplicate filename '/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0C0A:00/power_supply/BAT1'
CPU: 1 UID: 0 PID: 185 Comm: kworker/1:4 Kdump: loaded Not tainted 6.12.38+deb13-amd64 #1 Debian 6.12.38-1
Hardware name: Gateway NV44 /SJV40-MV , BIOS V1.3121 04/08/2009
Workqueue: kacpi_notify acpi_os_execute_deferred
Call Trace:
<TASK>
dump_stack_lvl+0x5d/0x80
sysfs_warn_dup.cold+0x17/0x23
sysfs_create_dir_ns+0xce/0xe0
kobject_add_internal+0xba/0x250
kobject_add+0x96/0xc0
? get_device_parent+0xde/0x1e0
device_add+0xe2/0x870
__power_supply_register.part.0+0x20f/0x3f0
? wake_up_q+0x4e/0x90
sysfs_add_battery+0xa4/0x1d0 [battery]
acpi_battery_update+0x19e/0x290 [battery]
acpi_battery_notify+0x50/0x120 [battery]
acpi_ev_notify_dispatch+0x49/0x70
acpi_os_execute_deferred+0x1a/0x30
process_one_work+0x177/0x330
worker_thread+0x251/0x390
? __pfx_worker_thread+0x10/0x10
kthread+0xd2/0x100
? __pfx_kthread+0x10/0x10
ret_from_fork+0x34/0x50
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0x1a/0x30
</TASK>
kobject: kobject_add_internal failed for BAT1 with -EEXIST, don't try to register things with the same name in the same directory.
There are also other scenarios in which analogous issues may occur.
Address this by using a common lock in all of the code paths leading
to updates of driver interfaces: ACPI Notify () handler, system resume
callback and post-resume notification, device addition and removal.
This new lock replaces sysfs_lock that has been used only in
sysfs_remove_battery() which now is going to be always called under
the new lock, so it doesn't need any internal locking any more.
Fixes: 10666251554c ("ACPI: battery: Install Notify() handler directly")
Closes: https://lore.kernel.org/linux-acpi/20250910142653.313360-1-luogf2025@163.co…
Reported-by: GuangFei Luo <luogf2025(a)163.com>
Tested-by: GuangFei Luo <luogf2025(a)163.com>
Cc: 6.6+ <stable(a)vger.kernel.org> # 6.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 6905b56bf3e4..67b76492c839 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -92,7 +92,7 @@ enum {
struct acpi_battery {
struct mutex lock;
- struct mutex sysfs_lock;
+ struct mutex update_lock;
struct power_supply *bat;
struct power_supply_desc bat_desc;
struct acpi_device *device;
@@ -904,15 +904,12 @@ static int sysfs_add_battery(struct acpi_battery *battery)
static void sysfs_remove_battery(struct acpi_battery *battery)
{
- mutex_lock(&battery->sysfs_lock);
- if (!battery->bat) {
- mutex_unlock(&battery->sysfs_lock);
+ if (!battery->bat)
return;
- }
+
battery_hook_remove_battery(battery);
power_supply_unregister(battery->bat);
battery->bat = NULL;
- mutex_unlock(&battery->sysfs_lock);
}
static void find_battery(const struct dmi_header *dm, void *private)
@@ -1072,6 +1069,9 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
if (!battery)
return;
+
+ guard(mutex)(&battery->update_lock);
+
old = battery->bat;
/*
* On Acer Aspire V5-573G notifications are sometimes triggered too
@@ -1094,21 +1094,22 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
}
static int battery_notify(struct notifier_block *nb,
- unsigned long mode, void *_unused)
+ unsigned long mode, void *_unused)
{
struct acpi_battery *battery = container_of(nb, struct acpi_battery,
pm_nb);
- int result;
- switch (mode) {
- case PM_POST_HIBERNATION:
- case PM_POST_SUSPEND:
+ if (mode == PM_POST_SUSPEND || mode == PM_POST_HIBERNATION) {
+ guard(mutex)(&battery->update_lock);
+
if (!acpi_battery_present(battery))
return 0;
if (battery->bat) {
acpi_battery_refresh(battery);
} else {
+ int result;
+
result = acpi_battery_get_info(battery);
if (result)
return result;
@@ -1120,7 +1121,6 @@ static int battery_notify(struct notifier_block *nb,
acpi_battery_init_alarm(battery);
acpi_battery_get_state(battery);
- break;
}
return 0;
@@ -1198,6 +1198,8 @@ static int acpi_battery_update_retry(struct acpi_battery *battery)
{
int retry, ret;
+ guard(mutex)(&battery->update_lock);
+
for (retry = 5; retry; retry--) {
ret = acpi_battery_update(battery, false);
if (!ret)
@@ -1208,6 +1210,13 @@ static int acpi_battery_update_retry(struct acpi_battery *battery)
return ret;
}
+static void sysfs_battery_cleanup(struct acpi_battery *battery)
+{
+ guard(mutex)(&battery->update_lock);
+
+ sysfs_remove_battery(battery);
+}
+
static int acpi_battery_add(struct acpi_device *device)
{
int result = 0;
@@ -1230,7 +1239,7 @@ static int acpi_battery_add(struct acpi_device *device)
if (result)
return result;
- result = devm_mutex_init(&device->dev, &battery->sysfs_lock);
+ result = devm_mutex_init(&device->dev, &battery->update_lock);
if (result)
return result;
@@ -1262,7 +1271,7 @@ static int acpi_battery_add(struct acpi_device *device)
device_init_wakeup(&device->dev, 0);
unregister_pm_notifier(&battery->pm_nb);
fail:
- sysfs_remove_battery(battery);
+ sysfs_battery_cleanup(battery);
return result;
}
@@ -1281,6 +1290,9 @@ static void acpi_battery_remove(struct acpi_device *device)
device_init_wakeup(&device->dev, 0);
unregister_pm_notifier(&battery->pm_nb);
+
+ guard(mutex)(&battery->update_lock);
+
sysfs_remove_battery(battery);
}
@@ -1297,6 +1309,9 @@ static int acpi_battery_resume(struct device *dev)
return -EINVAL;
battery->update_time = 0;
+
+ guard(mutex)(&battery->update_lock);
+
acpi_battery_update(battery, true);
return 0;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x e4f574ca9c6dfa66695bb054ff5df43ecea873ec
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101610-ranch-sincerity-0fc4@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e4f574ca9c6dfa66695bb054ff5df43ecea873ec Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew(a)redhat.com>
Date: Wed, 6 Aug 2025 15:15:43 -0400
Subject: [PATCH] nfsd: decouple the xprtsec policy check from
check_nfsd_access()
A while back I had reported that an NFSv3 client could successfully
mount using '-o xprtsec=none' an export that had been exported with
'xprtsec=tls:mtls'. By "successfully" I mean that the mount command
would succeed and the mount would show up in /proc/mount. Attempting
to do anything futher with the mount would be met with NFS3ERR_ACCES.
This was fixed (albeit accidentally) by commit bb4f07f2409c ("nfsd:
Fix NFSD_MAY_BYPASS_GSS and NFSD_MAY_BYPASS_GSS_ON_ROOT") and was
subsequently re-broken by commit 0813c5f01249 ("nfsd: fix access
checking for NLM under XPRTSEC policies").
Transport Layer Security isn't an RPC security flavor or pseudo-flavor,
so we shouldn't be conflating them when determining whether the access
checks can be bypassed. Split check_nfsd_access() into two helpers, and
have __fh_verify() call the helpers directly since __fh_verify() has
logic that allows one or both of the checks to be skipped. All other
sites will continue to call check_nfsd_access().
Link: https://lore.kernel.org/linux-nfs/ZjO3Qwf_G87yNXb2@aion/
Fixes: 9280c5774314 ("NFSD: Handle new xprtsec= export option")
Cc: stable(a)vger.kernel.org
Signed-off-by: Scott Mayhew <smayhew(a)redhat.com>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cadfc2bae60e..95b5681152c4 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1082,50 +1082,62 @@ static struct svc_export *exp_find(struct cache_detail *cd,
}
/**
- * check_nfsd_access - check if access to export is allowed.
+ * check_xprtsec_policy - check if access to export is allowed by the
+ * xprtsec policy
* @exp: svc_export that is being accessed.
- * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO).
- * @may_bypass_gss: reduce strictness of authorization check
+ * @rqstp: svc_rqst attempting to access @exp.
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
*
* Return values:
* %nfs_ok if access is granted, or
* %nfserr_wrongsec if access is denied
*/
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
- bool may_bypass_gss)
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp)
{
- struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
- struct svc_xprt *xprt;
-
- /*
- * If rqstp is NULL, this is a LOCALIO request which will only
- * ever use a filehandle/credential pair for which access has
- * been affirmed (by ACCESS or OPEN NFS requests) over the
- * wire. So there is no need for further checks here.
- */
- if (!rqstp)
- return nfs_ok;
-
- xprt = rqstp->rq_xprt;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
!test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
- if (!may_bypass_gss)
- goto denied;
+ return nfserr_wrongsec;
+}
+
+/**
+ * check_security_flavor - check if access to export is allowed by the
+ * security flavor
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
-ok:
/* legacy gss-only clients are always OK: */
if (exp->ex_client == rqstp->rq_gssclient)
return nfs_ok;
@@ -1167,10 +1179,30 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
}
}
-denied:
return nfserr_wrongsec;
}
+/**
+ * check_nfsd_access - check if access to export is allowed.
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ __be32 status;
+
+ status = check_xprtsec_policy(exp, rqstp);
+ if (status != nfs_ok)
+ return status;
+ return check_security_flavor(exp, rqstp, may_bypass_gss);
+}
+
/*
* Uses rq_client and rq_gssclient to find an export; uses rq_client (an
* auth_unix client) if it's available and has secinfo information;
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index b9c0adb3ce09..ef5581911d5b 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -101,6 +101,9 @@ struct svc_expkey {
struct svc_cred;
int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp);
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp);
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
bool may_bypass_gss);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f4c2fb3dd5d0..062cfc18d8c6 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -364,10 +364,30 @@ __fh_verify(struct svc_rqst *rqstp,
if (error)
goto out;
+ /*
+ * If rqstp is NULL, this is a LOCALIO request which will only
+ * ever use a filehandle/credential pair for which access has
+ * been affirmed (by ACCESS or OPEN NFS requests) over the
+ * wire. Skip both the xprtsec policy and the security flavor
+ * checks.
+ */
+ if (!rqstp)
+ goto check_permissions;
+
if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM))
/* NLM is allowed to fully bypass authentication */
goto out;
+ /*
+ * NLM is allowed to bypass the xprtsec policy check because lockd
+ * doesn't support xprtsec.
+ */
+ if (!(access & NFSD_MAY_NLM)) {
+ error = check_xprtsec_policy(exp, rqstp);
+ if (error)
+ goto out;
+ }
+
if (access & NFSD_MAY_BYPASS_GSS)
may_bypass_gss = true;
/*
@@ -379,13 +399,15 @@ __fh_verify(struct svc_rqst *rqstp,
&& exp->ex_path.dentry == dentry)
may_bypass_gss = true;
- error = check_nfsd_access(exp, rqstp, may_bypass_gss);
+ error = check_security_flavor(exp, rqstp, may_bypass_gss);
if (error)
goto out;
+
/* During LOCALIO call to fh_verify will be called with a NULL rqstp */
if (rqstp)
svc_xprt_set_valid(rqstp->rq_xprt);
+check_permissions:
/* Finally, check access permissions. */
error = nfsd_permission(cred, exp, dentry, access);
out:
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 2c27aaee934a1b5229152fe33a14f1fdf50da143
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101628-scribing-handwork-22ac@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2c27aaee934a1b5229152fe33a14f1fdf50da143 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:15 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Update calibration wait time for
startup state machine
Do read-modify-write so that we re-use the characterized reset value as
specified in TRM [1] to program calibration wait time which defines number
of cycles to wait for after startup state machine is in bandgap enable
state.
This fixes PLL lock timeout error faced while using RPi DSI Panel on TI's
AM62L and J721E SoC since earlier calibration wait time was getting
overwritten to zero value thus failing the PLL to lockup and causing
timeout.
[1] AM62P TRM (Section 14.8.6.3.2.1.1 DPHY_TX_DPHYTX_CMN0_CMN_DIG_TBIT2):
Link: https://www.ti.com/lit/pdf/spruj83
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-3-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index da8de0a9d086..24a25606996c 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -30,6 +30,7 @@
#define DPHY_CMN_SSM DPHY_PMA_CMN(0x20)
#define DPHY_CMN_SSM_EN BIT(0)
+#define DPHY_CMN_SSM_CAL_WAIT_TIME GENMASK(8, 1)
#define DPHY_CMN_TX_MODE_EN BIT(9)
#define DPHY_CMN_PWM DPHY_PMA_CMN(0x40)
@@ -410,7 +411,8 @@ static int cdns_dphy_power_on(struct phy *phy)
writel(reg, dphy->regs + DPHY_BAND_CFG);
/* Start TX state machine. */
- writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel((reg & DPHY_CMN_SSM_CAL_WAIT_TIME) | DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
ret = cdns_dphy_wait_for_pll_lock(dphy);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 2c27aaee934a1b5229152fe33a14f1fdf50da143
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101627-kerchief-happiness-2cc0@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2c27aaee934a1b5229152fe33a14f1fdf50da143 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:15 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Update calibration wait time for
startup state machine
Do read-modify-write so that we re-use the characterized reset value as
specified in TRM [1] to program calibration wait time which defines number
of cycles to wait for after startup state machine is in bandgap enable
state.
This fixes PLL lock timeout error faced while using RPi DSI Panel on TI's
AM62L and J721E SoC since earlier calibration wait time was getting
overwritten to zero value thus failing the PLL to lockup and causing
timeout.
[1] AM62P TRM (Section 14.8.6.3.2.1.1 DPHY_TX_DPHYTX_CMN0_CMN_DIG_TBIT2):
Link: https://www.ti.com/lit/pdf/spruj83
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-3-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index da8de0a9d086..24a25606996c 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -30,6 +30,7 @@
#define DPHY_CMN_SSM DPHY_PMA_CMN(0x20)
#define DPHY_CMN_SSM_EN BIT(0)
+#define DPHY_CMN_SSM_CAL_WAIT_TIME GENMASK(8, 1)
#define DPHY_CMN_TX_MODE_EN BIT(9)
#define DPHY_CMN_PWM DPHY_PMA_CMN(0x40)
@@ -410,7 +411,8 @@ static int cdns_dphy_power_on(struct phy *phy)
writel(reg, dphy->regs + DPHY_BAND_CFG);
/* Start TX state machine. */
- writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel((reg & DPHY_CMN_SSM_CAL_WAIT_TIME) | DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
ret = cdns_dphy_wait_for_pll_lock(dphy);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 2c27aaee934a1b5229152fe33a14f1fdf50da143
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101627-actress-refinery-4db5@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2c27aaee934a1b5229152fe33a14f1fdf50da143 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:15 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Update calibration wait time for
startup state machine
Do read-modify-write so that we re-use the characterized reset value as
specified in TRM [1] to program calibration wait time which defines number
of cycles to wait for after startup state machine is in bandgap enable
state.
This fixes PLL lock timeout error faced while using RPi DSI Panel on TI's
AM62L and J721E SoC since earlier calibration wait time was getting
overwritten to zero value thus failing the PLL to lockup and causing
timeout.
[1] AM62P TRM (Section 14.8.6.3.2.1.1 DPHY_TX_DPHYTX_CMN0_CMN_DIG_TBIT2):
Link: https://www.ti.com/lit/pdf/spruj83
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-3-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index da8de0a9d086..24a25606996c 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -30,6 +30,7 @@
#define DPHY_CMN_SSM DPHY_PMA_CMN(0x20)
#define DPHY_CMN_SSM_EN BIT(0)
+#define DPHY_CMN_SSM_CAL_WAIT_TIME GENMASK(8, 1)
#define DPHY_CMN_TX_MODE_EN BIT(9)
#define DPHY_CMN_PWM DPHY_PMA_CMN(0x40)
@@ -410,7 +411,8 @@ static int cdns_dphy_power_on(struct phy *phy)
writel(reg, dphy->regs + DPHY_BAND_CFG);
/* Start TX state machine. */
- writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel((reg & DPHY_CMN_SSM_CAL_WAIT_TIME) | DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
ret = cdns_dphy_wait_for_pll_lock(dphy);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 284fb19a3ffb1083c3ad9c00d29749d09dddb99c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101617-junkyard-commute-14a0@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 284fb19a3ffb1083c3ad9c00d29749d09dddb99c Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:14 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling
PLL lockup and O_CMN_READY assertion can only happen after common state
machine gets enabled by programming DPHY_CMN_SSM register, but driver was
polling them before the common state machine was enabled which is
incorrect. This is as per the DPHY initialization sequence as mentioned in
J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram".
It shows O_CMN_READY polling at the end after common configuration pin
setup where the common configuration pin setup step enables state machine
as referenced in "Table 12-1533. Common Configuration-Related Setup
mentions state machine"
To fix this :
- Add new function callbacks for polling on PLL lock and O_CMN_READY
assertion.
- As state machine and clocks get enabled in power_on callback only, move
the clock related programming part from configure callback to power_on
callback and poll for the PLL lockup and O_CMN_READY assertion after state
machine gets enabled.
- The configure callback only saves the PLL configuration received from the
client driver which will be applied later on in power_on callback.
- Add checks to ensure configure is called before power_on and state
machine is in disabled state before power_on callback is called.
- Disable state machine in power_off so that client driver can re-configure
the PLL by following up a power_off, configure, power_on sequence.
[1]: https://www.ti.com/lit/zip/spruil1
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index 33a42e72362e..da8de0a9d086 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -92,6 +92,8 @@ struct cdns_dphy_ops {
void (*set_pll_cfg)(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg);
unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
+ int (*wait_for_pll_lock)(struct cdns_dphy *dphy);
+ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy);
};
struct cdns_dphy {
@@ -101,6 +103,8 @@ struct cdns_dphy {
struct clk *pll_ref_clk;
const struct cdns_dphy_ops *ops;
struct phy *phy;
+ bool is_configured;
+ bool is_powered;
};
/* Order of bands is important since the index is the band number. */
@@ -186,6 +190,16 @@ static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
return dphy->ops->get_wakeup_time_ns(dphy);
}
+static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0;
+}
+
+static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0;
+}
+
static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
{
/* Default wakeup time is 800 ns (in a simulated environment). */
@@ -227,7 +241,6 @@ static unsigned long cdns_dphy_j721e_get_wakeup_time_ns(struct cdns_dphy *dphy)
static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg)
{
- u32 status;
/*
* set the PWM and PLL Byteclk divider settings to recommended values
@@ -244,13 +257,6 @@ static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
writel(DPHY_TX_J721E_WIZ_LANE_RSTB,
dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
- (status & DPHY_TX_WIZ_O_CMN_READY), 0,
- POLL_TIMEOUT_US);
}
static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
@@ -258,6 +264,23 @@ static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ);
}
+static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
+ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US);
+}
+
+static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
+ status & DPHY_TX_WIZ_O_CMN_READY, 0,
+ POLL_TIMEOUT_US);
+}
+
/*
* This is the reference implementation of DPHY hooks. Specific integration of
* this IP may have to re-implement some of them depending on how they decided
@@ -273,6 +296,8 @@ static const struct cdns_dphy_ops j721e_dphy_ops = {
.get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns,
.set_pll_cfg = cdns_dphy_j721e_set_pll_cfg,
.set_psm_div = cdns_dphy_j721e_set_psm_div,
+ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock,
+ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready,
};
static int cdns_dphy_config_from_opts(struct phy *phy,
@@ -328,21 +353,36 @@ static int cdns_dphy_validate(struct phy *phy, enum phy_mode mode, int submode,
static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
- struct cdns_dphy_cfg cfg = { 0 };
- int ret, band_ctrl;
- unsigned int reg;
+ int ret;
- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg);
- if (ret)
- return ret;
+ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg);
+ if (!ret)
+ dphy->is_configured = true;
+
+ return ret;
+}
+
+static int cdns_dphy_power_on(struct phy *phy)
+{
+ struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ int ret;
+ u32 reg;
+
+ if (!dphy->is_configured || dphy->is_powered)
+ return -EINVAL;
+
+ clk_prepare_enable(dphy->psm_clk);
+ clk_prepare_enable(dphy->pll_ref_clk);
/*
* Configure the internal PSM clk divider so that the DPHY has a
* 1MHz clk (or something close).
*/
ret = cdns_dphy_setup_psm(dphy);
- if (ret)
- return ret;
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret);
+ goto err_power_on;
+ }
/*
* Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
@@ -357,40 +397,60 @@ static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
* Configure the DPHY PLL that will be used to generate the TX byte
* clk.
*/
- cdns_dphy_set_pll_cfg(dphy, &cfg);
+ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg);
- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate);
- if (band_ctrl < 0)
- return band_ctrl;
+ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate);
+ if (ret < 0) {
+ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret);
+ goto err_power_on;
+ }
- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) |
- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl);
+ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) |
+ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret);
writel(reg, dphy->regs + DPHY_BAND_CFG);
- return 0;
-}
-
-static int cdns_dphy_power_on(struct phy *phy)
-{
- struct cdns_dphy *dphy = phy_get_drvdata(phy);
-
- clk_prepare_enable(dphy->psm_clk);
- clk_prepare_enable(dphy->pll_ref_clk);
-
/* Start TX state machine. */
writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
+ ret = cdns_dphy_wait_for_pll_lock(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret);
+ goto err_power_on;
+ }
+
+ ret = cdns_dphy_wait_for_cmn_ready(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n",
+ ret);
+ goto err_power_on;
+ }
+
+ dphy->is_powered = true;
+
return 0;
+
+err_power_on:
+ clk_disable_unprepare(dphy->pll_ref_clk);
+ clk_disable_unprepare(dphy->psm_clk);
+
+ return ret;
}
static int cdns_dphy_power_off(struct phy *phy)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ u32 reg;
clk_disable_unprepare(dphy->pll_ref_clk);
clk_disable_unprepare(dphy->psm_clk);
+ /* Stop TX state machine. */
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM);
+
+ dphy->is_powered = false;
+
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 284fb19a3ffb1083c3ad9c00d29749d09dddb99c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101616-shun-clover-b1c8@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 284fb19a3ffb1083c3ad9c00d29749d09dddb99c Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:14 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling
PLL lockup and O_CMN_READY assertion can only happen after common state
machine gets enabled by programming DPHY_CMN_SSM register, but driver was
polling them before the common state machine was enabled which is
incorrect. This is as per the DPHY initialization sequence as mentioned in
J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram".
It shows O_CMN_READY polling at the end after common configuration pin
setup where the common configuration pin setup step enables state machine
as referenced in "Table 12-1533. Common Configuration-Related Setup
mentions state machine"
To fix this :
- Add new function callbacks for polling on PLL lock and O_CMN_READY
assertion.
- As state machine and clocks get enabled in power_on callback only, move
the clock related programming part from configure callback to power_on
callback and poll for the PLL lockup and O_CMN_READY assertion after state
machine gets enabled.
- The configure callback only saves the PLL configuration received from the
client driver which will be applied later on in power_on callback.
- Add checks to ensure configure is called before power_on and state
machine is in disabled state before power_on callback is called.
- Disable state machine in power_off so that client driver can re-configure
the PLL by following up a power_off, configure, power_on sequence.
[1]: https://www.ti.com/lit/zip/spruil1
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index 33a42e72362e..da8de0a9d086 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -92,6 +92,8 @@ struct cdns_dphy_ops {
void (*set_pll_cfg)(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg);
unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
+ int (*wait_for_pll_lock)(struct cdns_dphy *dphy);
+ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy);
};
struct cdns_dphy {
@@ -101,6 +103,8 @@ struct cdns_dphy {
struct clk *pll_ref_clk;
const struct cdns_dphy_ops *ops;
struct phy *phy;
+ bool is_configured;
+ bool is_powered;
};
/* Order of bands is important since the index is the band number. */
@@ -186,6 +190,16 @@ static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
return dphy->ops->get_wakeup_time_ns(dphy);
}
+static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0;
+}
+
+static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0;
+}
+
static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
{
/* Default wakeup time is 800 ns (in a simulated environment). */
@@ -227,7 +241,6 @@ static unsigned long cdns_dphy_j721e_get_wakeup_time_ns(struct cdns_dphy *dphy)
static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg)
{
- u32 status;
/*
* set the PWM and PLL Byteclk divider settings to recommended values
@@ -244,13 +257,6 @@ static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
writel(DPHY_TX_J721E_WIZ_LANE_RSTB,
dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
- (status & DPHY_TX_WIZ_O_CMN_READY), 0,
- POLL_TIMEOUT_US);
}
static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
@@ -258,6 +264,23 @@ static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ);
}
+static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
+ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US);
+}
+
+static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
+ status & DPHY_TX_WIZ_O_CMN_READY, 0,
+ POLL_TIMEOUT_US);
+}
+
/*
* This is the reference implementation of DPHY hooks. Specific integration of
* this IP may have to re-implement some of them depending on how they decided
@@ -273,6 +296,8 @@ static const struct cdns_dphy_ops j721e_dphy_ops = {
.get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns,
.set_pll_cfg = cdns_dphy_j721e_set_pll_cfg,
.set_psm_div = cdns_dphy_j721e_set_psm_div,
+ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock,
+ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready,
};
static int cdns_dphy_config_from_opts(struct phy *phy,
@@ -328,21 +353,36 @@ static int cdns_dphy_validate(struct phy *phy, enum phy_mode mode, int submode,
static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
- struct cdns_dphy_cfg cfg = { 0 };
- int ret, band_ctrl;
- unsigned int reg;
+ int ret;
- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg);
- if (ret)
- return ret;
+ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg);
+ if (!ret)
+ dphy->is_configured = true;
+
+ return ret;
+}
+
+static int cdns_dphy_power_on(struct phy *phy)
+{
+ struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ int ret;
+ u32 reg;
+
+ if (!dphy->is_configured || dphy->is_powered)
+ return -EINVAL;
+
+ clk_prepare_enable(dphy->psm_clk);
+ clk_prepare_enable(dphy->pll_ref_clk);
/*
* Configure the internal PSM clk divider so that the DPHY has a
* 1MHz clk (or something close).
*/
ret = cdns_dphy_setup_psm(dphy);
- if (ret)
- return ret;
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret);
+ goto err_power_on;
+ }
/*
* Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
@@ -357,40 +397,60 @@ static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
* Configure the DPHY PLL that will be used to generate the TX byte
* clk.
*/
- cdns_dphy_set_pll_cfg(dphy, &cfg);
+ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg);
- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate);
- if (band_ctrl < 0)
- return band_ctrl;
+ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate);
+ if (ret < 0) {
+ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret);
+ goto err_power_on;
+ }
- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) |
- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl);
+ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) |
+ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret);
writel(reg, dphy->regs + DPHY_BAND_CFG);
- return 0;
-}
-
-static int cdns_dphy_power_on(struct phy *phy)
-{
- struct cdns_dphy *dphy = phy_get_drvdata(phy);
-
- clk_prepare_enable(dphy->psm_clk);
- clk_prepare_enable(dphy->pll_ref_clk);
-
/* Start TX state machine. */
writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
+ ret = cdns_dphy_wait_for_pll_lock(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret);
+ goto err_power_on;
+ }
+
+ ret = cdns_dphy_wait_for_cmn_ready(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n",
+ ret);
+ goto err_power_on;
+ }
+
+ dphy->is_powered = true;
+
return 0;
+
+err_power_on:
+ clk_disable_unprepare(dphy->pll_ref_clk);
+ clk_disable_unprepare(dphy->psm_clk);
+
+ return ret;
}
static int cdns_dphy_power_off(struct phy *phy)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ u32 reg;
clk_disable_unprepare(dphy->pll_ref_clk);
clk_disable_unprepare(dphy->psm_clk);
+ /* Stop TX state machine. */
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM);
+
+ dphy->is_powered = false;
+
return 0;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 284fb19a3ffb1083c3ad9c00d29749d09dddb99c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101615-broadband-graph-2e96@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 284fb19a3ffb1083c3ad9c00d29749d09dddb99c Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:14 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling
PLL lockup and O_CMN_READY assertion can only happen after common state
machine gets enabled by programming DPHY_CMN_SSM register, but driver was
polling them before the common state machine was enabled which is
incorrect. This is as per the DPHY initialization sequence as mentioned in
J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram".
It shows O_CMN_READY polling at the end after common configuration pin
setup where the common configuration pin setup step enables state machine
as referenced in "Table 12-1533. Common Configuration-Related Setup
mentions state machine"
To fix this :
- Add new function callbacks for polling on PLL lock and O_CMN_READY
assertion.
- As state machine and clocks get enabled in power_on callback only, move
the clock related programming part from configure callback to power_on
callback and poll for the PLL lockup and O_CMN_READY assertion after state
machine gets enabled.
- The configure callback only saves the PLL configuration received from the
client driver which will be applied later on in power_on callback.
- Add checks to ensure configure is called before power_on and state
machine is in disabled state before power_on callback is called.
- Disable state machine in power_off so that client driver can re-configure
the PLL by following up a power_off, configure, power_on sequence.
[1]: https://www.ti.com/lit/zip/spruil1
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index 33a42e72362e..da8de0a9d086 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -92,6 +92,8 @@ struct cdns_dphy_ops {
void (*set_pll_cfg)(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg);
unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
+ int (*wait_for_pll_lock)(struct cdns_dphy *dphy);
+ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy);
};
struct cdns_dphy {
@@ -101,6 +103,8 @@ struct cdns_dphy {
struct clk *pll_ref_clk;
const struct cdns_dphy_ops *ops;
struct phy *phy;
+ bool is_configured;
+ bool is_powered;
};
/* Order of bands is important since the index is the band number. */
@@ -186,6 +190,16 @@ static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
return dphy->ops->get_wakeup_time_ns(dphy);
}
+static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0;
+}
+
+static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0;
+}
+
static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
{
/* Default wakeup time is 800 ns (in a simulated environment). */
@@ -227,7 +241,6 @@ static unsigned long cdns_dphy_j721e_get_wakeup_time_ns(struct cdns_dphy *dphy)
static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg)
{
- u32 status;
/*
* set the PWM and PLL Byteclk divider settings to recommended values
@@ -244,13 +257,6 @@ static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
writel(DPHY_TX_J721E_WIZ_LANE_RSTB,
dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
- (status & DPHY_TX_WIZ_O_CMN_READY), 0,
- POLL_TIMEOUT_US);
}
static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
@@ -258,6 +264,23 @@ static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ);
}
+static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
+ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US);
+}
+
+static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
+ status & DPHY_TX_WIZ_O_CMN_READY, 0,
+ POLL_TIMEOUT_US);
+}
+
/*
* This is the reference implementation of DPHY hooks. Specific integration of
* this IP may have to re-implement some of them depending on how they decided
@@ -273,6 +296,8 @@ static const struct cdns_dphy_ops j721e_dphy_ops = {
.get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns,
.set_pll_cfg = cdns_dphy_j721e_set_pll_cfg,
.set_psm_div = cdns_dphy_j721e_set_psm_div,
+ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock,
+ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready,
};
static int cdns_dphy_config_from_opts(struct phy *phy,
@@ -328,21 +353,36 @@ static int cdns_dphy_validate(struct phy *phy, enum phy_mode mode, int submode,
static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
- struct cdns_dphy_cfg cfg = { 0 };
- int ret, band_ctrl;
- unsigned int reg;
+ int ret;
- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg);
- if (ret)
- return ret;
+ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg);
+ if (!ret)
+ dphy->is_configured = true;
+
+ return ret;
+}
+
+static int cdns_dphy_power_on(struct phy *phy)
+{
+ struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ int ret;
+ u32 reg;
+
+ if (!dphy->is_configured || dphy->is_powered)
+ return -EINVAL;
+
+ clk_prepare_enable(dphy->psm_clk);
+ clk_prepare_enable(dphy->pll_ref_clk);
/*
* Configure the internal PSM clk divider so that the DPHY has a
* 1MHz clk (or something close).
*/
ret = cdns_dphy_setup_psm(dphy);
- if (ret)
- return ret;
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret);
+ goto err_power_on;
+ }
/*
* Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
@@ -357,40 +397,60 @@ static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
* Configure the DPHY PLL that will be used to generate the TX byte
* clk.
*/
- cdns_dphy_set_pll_cfg(dphy, &cfg);
+ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg);
- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate);
- if (band_ctrl < 0)
- return band_ctrl;
+ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate);
+ if (ret < 0) {
+ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret);
+ goto err_power_on;
+ }
- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) |
- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl);
+ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) |
+ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret);
writel(reg, dphy->regs + DPHY_BAND_CFG);
- return 0;
-}
-
-static int cdns_dphy_power_on(struct phy *phy)
-{
- struct cdns_dphy *dphy = phy_get_drvdata(phy);
-
- clk_prepare_enable(dphy->psm_clk);
- clk_prepare_enable(dphy->pll_ref_clk);
-
/* Start TX state machine. */
writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
+ ret = cdns_dphy_wait_for_pll_lock(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret);
+ goto err_power_on;
+ }
+
+ ret = cdns_dphy_wait_for_cmn_ready(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n",
+ ret);
+ goto err_power_on;
+ }
+
+ dphy->is_powered = true;
+
return 0;
+
+err_power_on:
+ clk_disable_unprepare(dphy->pll_ref_clk);
+ clk_disable_unprepare(dphy->psm_clk);
+
+ return ret;
}
static int cdns_dphy_power_off(struct phy *phy)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ u32 reg;
clk_disable_unprepare(dphy->pll_ref_clk);
clk_disable_unprepare(dphy->psm_clk);
+ /* Stop TX state machine. */
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM);
+
+ dphy->is_powered = false;
+
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e747883c7d7306acb4d683038d881528fbfbe749
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101618-dotted-recount-322d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e747883c7d7306acb4d683038d881528fbfbe749 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Mon, 15 Sep 2025 06:20:30 -0700
Subject: [PATCH] xfs: fix log CRC mismatches between i386 and other
architectures
When mounting file systems with a log that was dirtied on i386 on
other architectures or vice versa, log recovery is unhappy:
[ 11.068052] XFS (vdb): Torn write (CRC failure) detected at log block 0x2. Truncating head block from 0xc.
This is because the CRCs generated by i386 and other architectures
always diff. The reason for that is that sizeof(struct xlog_rec_header)
returns different values for i386 vs the rest (324 vs 328), because the
struct is not sizeof(uint64_t) aligned, and i386 has odd struct size
alignment rules.
This issue goes back to commit 13cdc853c519 ("Add log versioning, and new
super block field for the log stripe") in the xfs-import tree, which
adds log v2 support and the h_size field that causes the unaligned size.
At that time it only mattered for the crude debug only log header
checksum, but with commit 0e446be44806 ("xfs: add CRC checks to the log")
it became a real issue for v5 file system, because now there is a proper
CRC, and regular builds actually expect it match.
Fix this by allowing checksums with and without the padding.
Fixes: 0e446be44806 ("xfs: add CRC checks to the log")
Cc: <stable(a)vger.kernel.org> # v3.8
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index a42a83211724..fd00b77af32b 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -173,12 +173,40 @@ typedef struct xlog_rec_header {
__be32 h_prev_block; /* block number to previous LR : 4 */
__be32 h_num_logops; /* number of log operations in this LR : 4 */
__be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
- /* new fields */
+
+ /* fields added by the Linux port: */
__be32 h_fmt; /* format of log record : 4 */
uuid_t h_fs_uuid; /* uuid of FS : 16 */
+
+ /* fields added for log v2: */
__be32 h_size; /* iclog size : 4 */
+
+ /*
+ * When h_size added for log v2 support, it caused structure to have
+ * a different size on i386 vs all other architectures because the
+ * sum of the size ofthe member is not aligned by that of the largest
+ * __be64-sized member, and i386 has really odd struct alignment rules.
+ *
+ * Due to the way the log headers are placed out on-disk that alone is
+ * not a problem becaue the xlog_rec_header always sits alone in a
+ * BBSIZEs area, and the rest of that area is padded with zeroes.
+ * But xlog_cksum used to calculate the checksum based on the structure
+ * size, and thus gives different checksums for i386 vs the rest.
+ * We now do two checksum validation passes for both sizes to allow
+ * moving v5 file systems with unclean logs between i386 and other
+ * (little-endian) architectures.
+ */
+ __u32 h_pad0;
} xlog_rec_header_t;
+#ifdef __i386__
+#define XLOG_REC_SIZE offsetofend(struct xlog_rec_header, h_size)
+#define XLOG_REC_SIZE_OTHER sizeof(struct xlog_rec_header)
+#else
+#define XLOG_REC_SIZE sizeof(struct xlog_rec_header)
+#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size)
+#endif /* __i386__ */
+
typedef struct xlog_rec_ext_header {
__be32 xh_cycle; /* write cycle of log : 4 */
__be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 5ed44fdf7491..7bfa3242e2c5 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -174,6 +174,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header, 328);
+ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header, 260);
XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7c590ecdf865..2978de9da38e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1568,13 +1568,13 @@ xlog_cksum(
struct xlog *log,
struct xlog_rec_header *rhead,
char *dp,
- int size)
+ unsigned int hdrsize,
+ unsigned int size)
{
uint32_t crc;
/* first generate the crc for the record header ... */
- crc = xfs_start_cksum_update((char *)rhead,
- sizeof(struct xlog_rec_header),
+ crc = xfs_start_cksum_update((char *)rhead, hdrsize,
offsetof(struct xlog_rec_header, h_crc));
/* ... then for additional cycle data for v2 logs ... */
@@ -1818,7 +1818,7 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
- iclog->ic_datap, size);
+ iclog->ic_datap, XLOG_REC_SIZE, size);
/*
* Intentionally corrupt the log record CRC based on the error injection
* frequency, if defined. This facilitates testing log recovery in the
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index a9a7a271c15b..0cfc654d8e87 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -499,8 +499,8 @@ xlog_recover_finish(
extern void
xlog_recover_cancel(struct xlog *);
-extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
- char *dp, int size);
+__le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
+ char *dp, unsigned int hdrsize, unsigned int size);
extern struct kmem_cache *xfs_log_ticket_cache;
struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0a4db8efd903..549d60959aee 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2894,9 +2894,24 @@ xlog_recover_process(
int pass,
struct list_head *buffer_list)
{
- __le32 expected_crc = rhead->h_crc, crc;
+ __le32 expected_crc = rhead->h_crc, crc, other_crc;
- crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
+ crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE,
+ be32_to_cpu(rhead->h_len));
+
+ /*
+ * Look at the end of the struct xlog_rec_header definition in
+ * xfs_log_format.h for the glory details.
+ */
+ if (expected_crc && crc != expected_crc) {
+ other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER,
+ be32_to_cpu(rhead->h_len));
+ if (other_crc == expected_crc) {
+ xfs_notice_once(log->l_mp,
+ "Fixing up incorrect CRC due to padding.");
+ crc = other_crc;
+ }
+ }
/*
* Nothing else to do if this is a CRC verification pass. Just return
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e747883c7d7306acb4d683038d881528fbfbe749
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101617-timothy-revival-0289@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e747883c7d7306acb4d683038d881528fbfbe749 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Mon, 15 Sep 2025 06:20:30 -0700
Subject: [PATCH] xfs: fix log CRC mismatches between i386 and other
architectures
When mounting file systems with a log that was dirtied on i386 on
other architectures or vice versa, log recovery is unhappy:
[ 11.068052] XFS (vdb): Torn write (CRC failure) detected at log block 0x2. Truncating head block from 0xc.
This is because the CRCs generated by i386 and other architectures
always diff. The reason for that is that sizeof(struct xlog_rec_header)
returns different values for i386 vs the rest (324 vs 328), because the
struct is not sizeof(uint64_t) aligned, and i386 has odd struct size
alignment rules.
This issue goes back to commit 13cdc853c519 ("Add log versioning, and new
super block field for the log stripe") in the xfs-import tree, which
adds log v2 support and the h_size field that causes the unaligned size.
At that time it only mattered for the crude debug only log header
checksum, but with commit 0e446be44806 ("xfs: add CRC checks to the log")
it became a real issue for v5 file system, because now there is a proper
CRC, and regular builds actually expect it match.
Fix this by allowing checksums with and without the padding.
Fixes: 0e446be44806 ("xfs: add CRC checks to the log")
Cc: <stable(a)vger.kernel.org> # v3.8
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Carlos Maiolino <cem(a)kernel.org>
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index a42a83211724..fd00b77af32b 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -173,12 +173,40 @@ typedef struct xlog_rec_header {
__be32 h_prev_block; /* block number to previous LR : 4 */
__be32 h_num_logops; /* number of log operations in this LR : 4 */
__be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
- /* new fields */
+
+ /* fields added by the Linux port: */
__be32 h_fmt; /* format of log record : 4 */
uuid_t h_fs_uuid; /* uuid of FS : 16 */
+
+ /* fields added for log v2: */
__be32 h_size; /* iclog size : 4 */
+
+ /*
+ * When h_size added for log v2 support, it caused structure to have
+ * a different size on i386 vs all other architectures because the
+ * sum of the size ofthe member is not aligned by that of the largest
+ * __be64-sized member, and i386 has really odd struct alignment rules.
+ *
+ * Due to the way the log headers are placed out on-disk that alone is
+ * not a problem becaue the xlog_rec_header always sits alone in a
+ * BBSIZEs area, and the rest of that area is padded with zeroes.
+ * But xlog_cksum used to calculate the checksum based on the structure
+ * size, and thus gives different checksums for i386 vs the rest.
+ * We now do two checksum validation passes for both sizes to allow
+ * moving v5 file systems with unclean logs between i386 and other
+ * (little-endian) architectures.
+ */
+ __u32 h_pad0;
} xlog_rec_header_t;
+#ifdef __i386__
+#define XLOG_REC_SIZE offsetofend(struct xlog_rec_header, h_size)
+#define XLOG_REC_SIZE_OTHER sizeof(struct xlog_rec_header)
+#else
+#define XLOG_REC_SIZE sizeof(struct xlog_rec_header)
+#define XLOG_REC_SIZE_OTHER offsetofend(struct xlog_rec_header, h_size)
+#endif /* __i386__ */
+
typedef struct xlog_rec_ext_header {
__be32 xh_cycle; /* write cycle of log : 4 */
__be32 xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /* : 256 */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 5ed44fdf7491..7bfa3242e2c5 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -174,6 +174,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_header, 328);
+ XFS_CHECK_STRUCT_SIZE(struct xlog_rec_ext_header, 260);
XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16);
XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7c590ecdf865..2978de9da38e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1568,13 +1568,13 @@ xlog_cksum(
struct xlog *log,
struct xlog_rec_header *rhead,
char *dp,
- int size)
+ unsigned int hdrsize,
+ unsigned int size)
{
uint32_t crc;
/* first generate the crc for the record header ... */
- crc = xfs_start_cksum_update((char *)rhead,
- sizeof(struct xlog_rec_header),
+ crc = xfs_start_cksum_update((char *)rhead, hdrsize,
offsetof(struct xlog_rec_header, h_crc));
/* ... then for additional cycle data for v2 logs ... */
@@ -1818,7 +1818,7 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
- iclog->ic_datap, size);
+ iclog->ic_datap, XLOG_REC_SIZE, size);
/*
* Intentionally corrupt the log record CRC based on the error injection
* frequency, if defined. This facilitates testing log recovery in the
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index a9a7a271c15b..0cfc654d8e87 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -499,8 +499,8 @@ xlog_recover_finish(
extern void
xlog_recover_cancel(struct xlog *);
-extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
- char *dp, int size);
+__le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
+ char *dp, unsigned int hdrsize, unsigned int size);
extern struct kmem_cache *xfs_log_ticket_cache;
struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0a4db8efd903..549d60959aee 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2894,9 +2894,24 @@ xlog_recover_process(
int pass,
struct list_head *buffer_list)
{
- __le32 expected_crc = rhead->h_crc, crc;
+ __le32 expected_crc = rhead->h_crc, crc, other_crc;
- crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
+ crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE,
+ be32_to_cpu(rhead->h_len));
+
+ /*
+ * Look at the end of the struct xlog_rec_header definition in
+ * xfs_log_format.h for the glory details.
+ */
+ if (expected_crc && crc != expected_crc) {
+ other_crc = xlog_cksum(log, rhead, dp, XLOG_REC_SIZE_OTHER,
+ be32_to_cpu(rhead->h_len));
+ if (other_crc == expected_crc) {
+ xfs_notice_once(log->l_mp,
+ "Fixing up incorrect CRC due to padding.");
+ crc = other_crc;
+ }
+ }
/*
* Nothing else to do if this is a CRC verification pass. Just return
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 42f9c66a6d0cc45758dab77233c5460e1cf003df
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101640-scouring-earthworm-26af@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 42f9c66a6d0cc45758dab77233c5460e1cf003df Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel(a)kernel.org>
Date: Mon, 22 Sep 2025 16:08:25 +0200
Subject: [PATCH] PCI: tegra194: Reset BARs when running in PCIe endpoint mode
Tegra already defines all BARs except BAR0 as BAR_RESERVED. This is
sufficient for pci-epf-test to not allocate backing memory and to not call
set_bar() for those BARs. However, marking a BAR as BAR_RESERVED does not
mean that the BAR gets disabled.
The host side driver, pci_endpoint_test, simply does an ioremap for all
enabled BARs and will run tests against all enabled BARs, so it will run
tests against the BARs marked as BAR_RESERVED.
After running the BAR tests (which will write to all enabled BARs), the
inbound address translation is broken. This is because the tegra controller
exposes the ATU Port Logic Structure in BAR4, so when BAR4 is written, the
inbound address translation settings get overwritten.
To avoid this, implement the dw_pcie_ep_ops .init() callback and start off
by disabling all BARs (pci-epf-test will later enable/configure BARs that
are not defined as BAR_RESERVED).
This matches the behavior of other PCIe endpoint drivers: dra7xx, imx6,
layerscape-ep, artpec6, dw-rockchip, qcom-ep, rcar-gen4, and uniphier-ep.
With this, the PCI endpoint kselftest test case CONSECUTIVE_BAR_TEST (which
was specifically made to detect address translation issues) passes.
Fixes: c57247f940e8 ("PCI: tegra: Add support for PCIe endpoint mode in Tegra194")
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
Signed-off-by: Manivannan Sadhasivam <mani(a)kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250922140822.519796-7-cassel@kernel.org
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
index fe418b9bfbb4..359d92dca86a 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -1941,6 +1941,15 @@ static irqreturn_t tegra_pcie_ep_pex_rst_irq(int irq, void *arg)
return IRQ_HANDLED;
}
+static void tegra_pcie_ep_init(struct dw_pcie_ep *ep)
+{
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+ enum pci_barno bar;
+
+ for (bar = 0; bar < PCI_STD_NUM_BARS; bar++)
+ dw_pcie_ep_reset_bar(pci, bar);
+};
+
static int tegra_pcie_ep_raise_intx_irq(struct tegra_pcie_dw *pcie, u16 irq)
{
/* Tegra194 supports only INTA */
@@ -2016,6 +2025,7 @@ tegra_pcie_ep_get_features(struct dw_pcie_ep *ep)
}
static const struct dw_pcie_ep_ops pcie_ep_ops = {
+ .init = tegra_pcie_ep_init,
.raise_irq = tegra_pcie_ep_raise_irq,
.get_features = tegra_pcie_ep_get_features,
};
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 1cbc5e25fb70e942a7a735a1f3d6dd391afc9b29
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101610-deserve-skittle-9bde@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1cbc5e25fb70e942a7a735a1f3d6dd391afc9b29 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Wed, 13 Aug 2025 07:11:02 +0200
Subject: [PATCH] PCI/ERR: Fix uevent on failure to recover
Upon failure to recover from a PCIe error through AER, DPC or EDR, a
uevent is sent to inform user space about disconnection of the bridge
whose subordinate devices failed to recover.
However the bridge itself is not disconnected. Instead, a uevent should
be sent for each of the subordinate devices.
Only if the "bridge" happens to be a Root Complex Event Collector or
Integrated Endpoint does it make sense to send a uevent for it (because
there are no subordinate devices).
Right now if there is a mix of subordinate devices with and without
pci_error_handlers, a BEGIN_RECOVERY event is sent for those with
pci_error_handlers but no FAILED_RECOVERY event is ever sent for them
afterwards. Fix it.
Fixes: 856e1eb9bdd4 ("PCI/AER: Add uevents in AER and EEH error/resume")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: stable(a)vger.kernel.org # v4.16+
Link: https://patch.msgid.link/68fc527a380821b5d861dd554d2ce42cb739591c.175500815…
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index e795e5ae6b03..21d554359fb1 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -108,6 +108,12 @@ static int report_normal_detected(struct pci_dev *dev, void *data)
return report_error_detected(dev, pci_channel_io_normal, data);
}
+static int report_perm_failure_detected(struct pci_dev *dev, void *data)
+{
+ pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+ return 0;
+}
+
static int report_mmio_enabled(struct pci_dev *dev, void *data)
{
struct pci_driver *pdrv;
@@ -272,7 +278,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
failed:
pci_walk_bridge(bridge, pci_pm_runtime_put, NULL);
- pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
+ pci_walk_bridge(bridge, report_perm_failure_detected, NULL);
pci_info(bridge, "device recovery failed\n");
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 48991e4935078b05f80616c75d1ee2ea3ae18e58
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101641-squire-emboss-834e@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 48991e4935078b05f80616c75d1ee2ea3ae18e58 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris(a)google.com>
Date: Wed, 24 Sep 2025 09:57:11 -0700
Subject: [PATCH] PCI/sysfs: Ensure devices are powered for config reads
The "max_link_width", "current_link_speed", "current_link_width",
"secondary_bus_number", and "subordinate_bus_number" sysfs files all access
config registers, but they don't check the runtime PM state. If the device
is in D3cold or a parent bridge is suspended, we may see -EINVAL, bogus
values, or worse, depending on implementation details.
Wrap these access in pci_config_pm_runtime_{get,put}() like most of the
rest of the similar sysfs attributes.
Notably, "max_link_speed" does not access config registers; it returns a
cached value since d2bd39c0456b ("PCI: Store all PCIe Supported Link
Speeds").
Fixes: 56c1af4606f0 ("PCI: Add sysfs max_link_speed/width, current_link_speed/width, etc")
Signed-off-by: Brian Norris <briannorris(a)google.com>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250924095711.v2.1.Ibb5b6ca1e2c059e04ec53140cd98a…
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 5eea14c1f7f5..2b231ef1dac9 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -201,8 +201,14 @@ static ssize_t max_link_width_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct pci_dev *pdev = to_pci_dev(dev);
+ ssize_t ret;
- return sysfs_emit(buf, "%u\n", pcie_get_width_cap(pdev));
+ /* We read PCI_EXP_LNKCAP, so we need the device to be accessible. */
+ pci_config_pm_runtime_get(pdev);
+ ret = sysfs_emit(buf, "%u\n", pcie_get_width_cap(pdev));
+ pci_config_pm_runtime_put(pdev);
+
+ return ret;
}
static DEVICE_ATTR_RO(max_link_width);
@@ -214,7 +220,10 @@ static ssize_t current_link_speed_show(struct device *dev,
int err;
enum pci_bus_speed speed;
+ pci_config_pm_runtime_get(pci_dev);
err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &linkstat);
+ pci_config_pm_runtime_put(pci_dev);
+
if (err)
return -EINVAL;
@@ -231,7 +240,10 @@ static ssize_t current_link_width_show(struct device *dev,
u16 linkstat;
int err;
+ pci_config_pm_runtime_get(pci_dev);
err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &linkstat);
+ pci_config_pm_runtime_put(pci_dev);
+
if (err)
return -EINVAL;
@@ -247,7 +259,10 @@ static ssize_t secondary_bus_number_show(struct device *dev,
u8 sec_bus;
int err;
+ pci_config_pm_runtime_get(pci_dev);
err = pci_read_config_byte(pci_dev, PCI_SECONDARY_BUS, &sec_bus);
+ pci_config_pm_runtime_put(pci_dev);
+
if (err)
return -EINVAL;
@@ -263,7 +278,10 @@ static ssize_t subordinate_bus_number_show(struct device *dev,
u8 sub_bus;
int err;
+ pci_config_pm_runtime_get(pci_dev);
err = pci_read_config_byte(pci_dev, PCI_SUBORDINATE_BUS, &sub_bus);
+ pci_config_pm_runtime_put(pci_dev);
+
if (err)
return -EINVAL;
This patch fixes ce7a381697cb ("net: bonding: add broadcast_neighbor option for 802.3ad").
Before this commit, on the broadcast mode, all devices were traversed using the
bond_for_each_slave_rcu. This patch supports traversing devices by using all_slaves.
Therefore, we need to update the slave array when enslave or release salve.
Fixes: ce7a381697cb ("net: bonding: add broadcast_neighbor option for 802.3ad")
Cc: Jay Vosburgh <jv(a)jvosburgh.net>
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: Paolo Abeni <pabeni(a)redhat.com>
Cc: Simon Horman <horms(a)kernel.org>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Andrew Lunn <andrew+netdev(a)lunn.ch>
Cc: Nikolay Aleksandrov <razor(a)blackwall.org>
Cc: Hangbin Liu <liuhangbin(a)gmail.com>
Cc: Jiri Slaby <jirislaby(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Tonghao Zhang <tonghao(a)bamaicloud.com>
---
drivers/net/bonding/bond_main.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 17c7542be6a5..2d6883296e32 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2384,7 +2384,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
unblock_netpoll_tx();
}
- if (bond_mode_can_use_xmit_hash(bond))
+ /* broadcast mode uses the all_slaves to loop through slaves. */
+ if (bond_mode_can_use_xmit_hash(bond) ||
+ BOND_MODE(bond) == BOND_MODE_BROADCAST)
bond_update_slave_arr(bond, NULL);
if (!slave_dev->netdev_ops->ndo_bpf ||
@@ -2560,7 +2562,8 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_upper_dev_unlink(bond, slave);
- if (bond_mode_can_use_xmit_hash(bond))
+ if (bond_mode_can_use_xmit_hash(bond) ||
+ BOND_MODE(bond) == BOND_MODE_BROADCAST)
bond_update_slave_arr(bond, slave);
slave_info(bond_dev, slave_dev, "Releasing %s interface\n",
--
2.34.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5d5f08fd0cd970184376bee07d59f635c8403f63
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101607-uncivil-cartload-f9ef@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5d5f08fd0cd970184376bee07d59f635c8403f63 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006(a)gmail.com>
Date: Fri, 29 Aug 2025 16:30:15 +0800
Subject: [PATCH] xtensa: simdisk: add input size check in proc_write_simdisk
A malicious user could pass an arbitrarily bad value
to memdup_user_nul(), potentially causing kernel crash.
This follows the same pattern as commit ee76746387f6
("netdevsim: prevent bad user input in nsim_dev_health_break_write()")
Fixes: b6c7e873daf7 ("xtensa: ISS: add host file-based simulated disk")
Fixes: 16e5c1fc3604 ("convert a bunch of open-coded instances of memdup_user_nul()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
Message-Id: <20250829083015.1992751-1-linmq006(a)gmail.com>
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 6ed009318d24..3cafc8feddee 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -231,10 +231,14 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
static ssize_t proc_write_simdisk(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- char *tmp = memdup_user_nul(buf, count);
+ char *tmp;
struct simdisk *dev = pde_data(file_inode(file));
int err;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
+
+ tmp = memdup_user_nul(buf, count);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5d5f08fd0cd970184376bee07d59f635c8403f63
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101607-sift-throttle-b1aa@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5d5f08fd0cd970184376bee07d59f635c8403f63 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006(a)gmail.com>
Date: Fri, 29 Aug 2025 16:30:15 +0800
Subject: [PATCH] xtensa: simdisk: add input size check in proc_write_simdisk
A malicious user could pass an arbitrarily bad value
to memdup_user_nul(), potentially causing kernel crash.
This follows the same pattern as commit ee76746387f6
("netdevsim: prevent bad user input in nsim_dev_health_break_write()")
Fixes: b6c7e873daf7 ("xtensa: ISS: add host file-based simulated disk")
Fixes: 16e5c1fc3604 ("convert a bunch of open-coded instances of memdup_user_nul()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
Message-Id: <20250829083015.1992751-1-linmq006(a)gmail.com>
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 6ed009318d24..3cafc8feddee 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -231,10 +231,14 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
static ssize_t proc_write_simdisk(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- char *tmp = memdup_user_nul(buf, count);
+ char *tmp;
struct simdisk *dev = pde_data(file_inode(file));
int err;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
+
+ tmp = memdup_user_nul(buf, count);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5d5f08fd0cd970184376bee07d59f635c8403f63
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101606-wildfire-moody-50d8@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5d5f08fd0cd970184376bee07d59f635c8403f63 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006(a)gmail.com>
Date: Fri, 29 Aug 2025 16:30:15 +0800
Subject: [PATCH] xtensa: simdisk: add input size check in proc_write_simdisk
A malicious user could pass an arbitrarily bad value
to memdup_user_nul(), potentially causing kernel crash.
This follows the same pattern as commit ee76746387f6
("netdevsim: prevent bad user input in nsim_dev_health_break_write()")
Fixes: b6c7e873daf7 ("xtensa: ISS: add host file-based simulated disk")
Fixes: 16e5c1fc3604 ("convert a bunch of open-coded instances of memdup_user_nul()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006(a)gmail.com>
Message-Id: <20250829083015.1992751-1-linmq006(a)gmail.com>
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 6ed009318d24..3cafc8feddee 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -231,10 +231,14 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf,
static ssize_t proc_write_simdisk(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- char *tmp = memdup_user_nul(buf, count);
+ char *tmp;
struct simdisk *dev = pde_data(file_inode(file));
int err;
+ if (count == 0 || count > PAGE_SIZE)
+ return -EINVAL;
+
+ tmp = memdup_user_nul(buf, count);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 3a4b9d027e4061766f618292df91760ea64a1fcc
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101613-candle-babble-137f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a4b9d027e4061766f618292df91760ea64a1fcc Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang(a)kernel.org>
Date: Tue, 19 Aug 2025 19:42:24 +0800
Subject: [PATCH] pwm: berlin: Fix wrong register in suspend/resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The 'enable' register should be BERLIN_PWM_EN rather than
BERLIN_PWM_ENABLE, otherwise, the driver accesses wrong address, there
will be cpu exception then kernel panic during suspend/resume.
Fixes: bbf0722c1c66 ("pwm: berlin: Add suspend/resume support")
Signed-off-by: Jisheng Zhang <jszhang(a)kernel.org>
Link: https://lore.kernel.org/r/20250819114224.31825-1-jszhang@kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Uwe Kleine-König <ukleinek(a)kernel.org>
diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c
index 831aed228caf..858d36991374 100644
--- a/drivers/pwm/pwm-berlin.c
+++ b/drivers/pwm/pwm-berlin.c
@@ -234,7 +234,7 @@ static int berlin_pwm_suspend(struct device *dev)
for (i = 0; i < chip->npwm; i++) {
struct berlin_pwm_channel *channel = &bpc->channel[i];
- channel->enable = berlin_pwm_readl(bpc, i, BERLIN_PWM_ENABLE);
+ channel->enable = berlin_pwm_readl(bpc, i, BERLIN_PWM_EN);
channel->ctrl = berlin_pwm_readl(bpc, i, BERLIN_PWM_CONTROL);
channel->duty = berlin_pwm_readl(bpc, i, BERLIN_PWM_DUTY);
channel->tcnt = berlin_pwm_readl(bpc, i, BERLIN_PWM_TCNT);
@@ -262,7 +262,7 @@ static int berlin_pwm_resume(struct device *dev)
berlin_pwm_writel(bpc, i, channel->ctrl, BERLIN_PWM_CONTROL);
berlin_pwm_writel(bpc, i, channel->duty, BERLIN_PWM_DUTY);
berlin_pwm_writel(bpc, i, channel->tcnt, BERLIN_PWM_TCNT);
- berlin_pwm_writel(bpc, i, channel->enable, BERLIN_PWM_ENABLE);
+ berlin_pwm_writel(bpc, i, channel->enable, BERLIN_PWM_EN);
}
return 0;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 284fb19a3ffb1083c3ad9c00d29749d09dddb99c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101651-unskilled-shore-57a7@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 284fb19a3ffb1083c3ad9c00d29749d09dddb99c Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:14 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling
PLL lockup and O_CMN_READY assertion can only happen after common state
machine gets enabled by programming DPHY_CMN_SSM register, but driver was
polling them before the common state machine was enabled which is
incorrect. This is as per the DPHY initialization sequence as mentioned in
J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram".
It shows O_CMN_READY polling at the end after common configuration pin
setup where the common configuration pin setup step enables state machine
as referenced in "Table 12-1533. Common Configuration-Related Setup
mentions state machine"
To fix this :
- Add new function callbacks for polling on PLL lock and O_CMN_READY
assertion.
- As state machine and clocks get enabled in power_on callback only, move
the clock related programming part from configure callback to power_on
callback and poll for the PLL lockup and O_CMN_READY assertion after state
machine gets enabled.
- The configure callback only saves the PLL configuration received from the
client driver which will be applied later on in power_on callback.
- Add checks to ensure configure is called before power_on and state
machine is in disabled state before power_on callback is called.
- Disable state machine in power_off so that client driver can re-configure
the PLL by following up a power_off, configure, power_on sequence.
[1]: https://www.ti.com/lit/zip/spruil1
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index 33a42e72362e..da8de0a9d086 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -92,6 +92,8 @@ struct cdns_dphy_ops {
void (*set_pll_cfg)(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg);
unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
+ int (*wait_for_pll_lock)(struct cdns_dphy *dphy);
+ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy);
};
struct cdns_dphy {
@@ -101,6 +103,8 @@ struct cdns_dphy {
struct clk *pll_ref_clk;
const struct cdns_dphy_ops *ops;
struct phy *phy;
+ bool is_configured;
+ bool is_powered;
};
/* Order of bands is important since the index is the band number. */
@@ -186,6 +190,16 @@ static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
return dphy->ops->get_wakeup_time_ns(dphy);
}
+static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0;
+}
+
+static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0;
+}
+
static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
{
/* Default wakeup time is 800 ns (in a simulated environment). */
@@ -227,7 +241,6 @@ static unsigned long cdns_dphy_j721e_get_wakeup_time_ns(struct cdns_dphy *dphy)
static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg)
{
- u32 status;
/*
* set the PWM and PLL Byteclk divider settings to recommended values
@@ -244,13 +257,6 @@ static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
writel(DPHY_TX_J721E_WIZ_LANE_RSTB,
dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
- (status & DPHY_TX_WIZ_O_CMN_READY), 0,
- POLL_TIMEOUT_US);
}
static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
@@ -258,6 +264,23 @@ static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ);
}
+static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
+ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US);
+}
+
+static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
+ status & DPHY_TX_WIZ_O_CMN_READY, 0,
+ POLL_TIMEOUT_US);
+}
+
/*
* This is the reference implementation of DPHY hooks. Specific integration of
* this IP may have to re-implement some of them depending on how they decided
@@ -273,6 +296,8 @@ static const struct cdns_dphy_ops j721e_dphy_ops = {
.get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns,
.set_pll_cfg = cdns_dphy_j721e_set_pll_cfg,
.set_psm_div = cdns_dphy_j721e_set_psm_div,
+ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock,
+ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready,
};
static int cdns_dphy_config_from_opts(struct phy *phy,
@@ -328,21 +353,36 @@ static int cdns_dphy_validate(struct phy *phy, enum phy_mode mode, int submode,
static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
- struct cdns_dphy_cfg cfg = { 0 };
- int ret, band_ctrl;
- unsigned int reg;
+ int ret;
- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg);
- if (ret)
- return ret;
+ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg);
+ if (!ret)
+ dphy->is_configured = true;
+
+ return ret;
+}
+
+static int cdns_dphy_power_on(struct phy *phy)
+{
+ struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ int ret;
+ u32 reg;
+
+ if (!dphy->is_configured || dphy->is_powered)
+ return -EINVAL;
+
+ clk_prepare_enable(dphy->psm_clk);
+ clk_prepare_enable(dphy->pll_ref_clk);
/*
* Configure the internal PSM clk divider so that the DPHY has a
* 1MHz clk (or something close).
*/
ret = cdns_dphy_setup_psm(dphy);
- if (ret)
- return ret;
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret);
+ goto err_power_on;
+ }
/*
* Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
@@ -357,40 +397,60 @@ static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
* Configure the DPHY PLL that will be used to generate the TX byte
* clk.
*/
- cdns_dphy_set_pll_cfg(dphy, &cfg);
+ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg);
- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate);
- if (band_ctrl < 0)
- return band_ctrl;
+ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate);
+ if (ret < 0) {
+ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret);
+ goto err_power_on;
+ }
- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) |
- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl);
+ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) |
+ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret);
writel(reg, dphy->regs + DPHY_BAND_CFG);
- return 0;
-}
-
-static int cdns_dphy_power_on(struct phy *phy)
-{
- struct cdns_dphy *dphy = phy_get_drvdata(phy);
-
- clk_prepare_enable(dphy->psm_clk);
- clk_prepare_enable(dphy->pll_ref_clk);
-
/* Start TX state machine. */
writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
+ ret = cdns_dphy_wait_for_pll_lock(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret);
+ goto err_power_on;
+ }
+
+ ret = cdns_dphy_wait_for_cmn_ready(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n",
+ ret);
+ goto err_power_on;
+ }
+
+ dphy->is_powered = true;
+
return 0;
+
+err_power_on:
+ clk_disable_unprepare(dphy->pll_ref_clk);
+ clk_disable_unprepare(dphy->psm_clk);
+
+ return ret;
}
static int cdns_dphy_power_off(struct phy *phy)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ u32 reg;
clk_disable_unprepare(dphy->pll_ref_clk);
clk_disable_unprepare(dphy->psm_clk);
+ /* Stop TX state machine. */
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM);
+
+ dphy->is_powered = false;
+
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 284fb19a3ffb1083c3ad9c00d29749d09dddb99c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101651-cotton-movable-3a6d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 284fb19a3ffb1083c3ad9c00d29749d09dddb99c Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:14 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling
PLL lockup and O_CMN_READY assertion can only happen after common state
machine gets enabled by programming DPHY_CMN_SSM register, but driver was
polling them before the common state machine was enabled which is
incorrect. This is as per the DPHY initialization sequence as mentioned in
J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram".
It shows O_CMN_READY polling at the end after common configuration pin
setup where the common configuration pin setup step enables state machine
as referenced in "Table 12-1533. Common Configuration-Related Setup
mentions state machine"
To fix this :
- Add new function callbacks for polling on PLL lock and O_CMN_READY
assertion.
- As state machine and clocks get enabled in power_on callback only, move
the clock related programming part from configure callback to power_on
callback and poll for the PLL lockup and O_CMN_READY assertion after state
machine gets enabled.
- The configure callback only saves the PLL configuration received from the
client driver which will be applied later on in power_on callback.
- Add checks to ensure configure is called before power_on and state
machine is in disabled state before power_on callback is called.
- Disable state machine in power_off so that client driver can re-configure
the PLL by following up a power_off, configure, power_on sequence.
[1]: https://www.ti.com/lit/zip/spruil1
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index 33a42e72362e..da8de0a9d086 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -92,6 +92,8 @@ struct cdns_dphy_ops {
void (*set_pll_cfg)(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg);
unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
+ int (*wait_for_pll_lock)(struct cdns_dphy *dphy);
+ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy);
};
struct cdns_dphy {
@@ -101,6 +103,8 @@ struct cdns_dphy {
struct clk *pll_ref_clk;
const struct cdns_dphy_ops *ops;
struct phy *phy;
+ bool is_configured;
+ bool is_powered;
};
/* Order of bands is important since the index is the band number. */
@@ -186,6 +190,16 @@ static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
return dphy->ops->get_wakeup_time_ns(dphy);
}
+static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0;
+}
+
+static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0;
+}
+
static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
{
/* Default wakeup time is 800 ns (in a simulated environment). */
@@ -227,7 +241,6 @@ static unsigned long cdns_dphy_j721e_get_wakeup_time_ns(struct cdns_dphy *dphy)
static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg)
{
- u32 status;
/*
* set the PWM and PLL Byteclk divider settings to recommended values
@@ -244,13 +257,6 @@ static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
writel(DPHY_TX_J721E_WIZ_LANE_RSTB,
dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
- (status & DPHY_TX_WIZ_O_CMN_READY), 0,
- POLL_TIMEOUT_US);
}
static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
@@ -258,6 +264,23 @@ static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ);
}
+static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
+ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US);
+}
+
+static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
+ status & DPHY_TX_WIZ_O_CMN_READY, 0,
+ POLL_TIMEOUT_US);
+}
+
/*
* This is the reference implementation of DPHY hooks. Specific integration of
* this IP may have to re-implement some of them depending on how they decided
@@ -273,6 +296,8 @@ static const struct cdns_dphy_ops j721e_dphy_ops = {
.get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns,
.set_pll_cfg = cdns_dphy_j721e_set_pll_cfg,
.set_psm_div = cdns_dphy_j721e_set_psm_div,
+ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock,
+ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready,
};
static int cdns_dphy_config_from_opts(struct phy *phy,
@@ -328,21 +353,36 @@ static int cdns_dphy_validate(struct phy *phy, enum phy_mode mode, int submode,
static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
- struct cdns_dphy_cfg cfg = { 0 };
- int ret, band_ctrl;
- unsigned int reg;
+ int ret;
- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg);
- if (ret)
- return ret;
+ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg);
+ if (!ret)
+ dphy->is_configured = true;
+
+ return ret;
+}
+
+static int cdns_dphy_power_on(struct phy *phy)
+{
+ struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ int ret;
+ u32 reg;
+
+ if (!dphy->is_configured || dphy->is_powered)
+ return -EINVAL;
+
+ clk_prepare_enable(dphy->psm_clk);
+ clk_prepare_enable(dphy->pll_ref_clk);
/*
* Configure the internal PSM clk divider so that the DPHY has a
* 1MHz clk (or something close).
*/
ret = cdns_dphy_setup_psm(dphy);
- if (ret)
- return ret;
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret);
+ goto err_power_on;
+ }
/*
* Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
@@ -357,40 +397,60 @@ static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
* Configure the DPHY PLL that will be used to generate the TX byte
* clk.
*/
- cdns_dphy_set_pll_cfg(dphy, &cfg);
+ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg);
- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate);
- if (band_ctrl < 0)
- return band_ctrl;
+ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate);
+ if (ret < 0) {
+ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret);
+ goto err_power_on;
+ }
- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) |
- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl);
+ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) |
+ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret);
writel(reg, dphy->regs + DPHY_BAND_CFG);
- return 0;
-}
-
-static int cdns_dphy_power_on(struct phy *phy)
-{
- struct cdns_dphy *dphy = phy_get_drvdata(phy);
-
- clk_prepare_enable(dphy->psm_clk);
- clk_prepare_enable(dphy->pll_ref_clk);
-
/* Start TX state machine. */
writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
+ ret = cdns_dphy_wait_for_pll_lock(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret);
+ goto err_power_on;
+ }
+
+ ret = cdns_dphy_wait_for_cmn_ready(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n",
+ ret);
+ goto err_power_on;
+ }
+
+ dphy->is_powered = true;
+
return 0;
+
+err_power_on:
+ clk_disable_unprepare(dphy->pll_ref_clk);
+ clk_disable_unprepare(dphy->psm_clk);
+
+ return ret;
}
static int cdns_dphy_power_off(struct phy *phy)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ u32 reg;
clk_disable_unprepare(dphy->pll_ref_clk);
clk_disable_unprepare(dphy->psm_clk);
+ /* Stop TX state machine. */
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM);
+
+ dphy->is_powered = false;
+
return 0;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 284fb19a3ffb1083c3ad9c00d29749d09dddb99c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101650-unloving-grinning-8caf@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 284fb19a3ffb1083c3ad9c00d29749d09dddb99c Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Fri, 4 Jul 2025 18:29:14 +0530
Subject: [PATCH] phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling
PLL lockup and O_CMN_READY assertion can only happen after common state
machine gets enabled by programming DPHY_CMN_SSM register, but driver was
polling them before the common state machine was enabled which is
incorrect. This is as per the DPHY initialization sequence as mentioned in
J721E TRM [1] at section "12.7.2.4.1.2.1 Start-up Sequence Timing Diagram".
It shows O_CMN_READY polling at the end after common configuration pin
setup where the common configuration pin setup step enables state machine
as referenced in "Table 12-1533. Common Configuration-Related Setup
mentions state machine"
To fix this :
- Add new function callbacks for polling on PLL lock and O_CMN_READY
assertion.
- As state machine and clocks get enabled in power_on callback only, move
the clock related programming part from configure callback to power_on
callback and poll for the PLL lockup and O_CMN_READY assertion after state
machine gets enabled.
- The configure callback only saves the PLL configuration received from the
client driver which will be applied later on in power_on callback.
- Add checks to ensure configure is called before power_on and state
machine is in disabled state before power_on callback is called.
- Disable state machine in power_off so that client driver can re-configure
the PLL by following up a power_off, configure, power_on sequence.
[1]: https://www.ti.com/lit/zip/spruil1
Cc: stable(a)vger.kernel.org
Fixes: 7a343c8bf4b5 ("phy: Add Cadence D-PHY support")
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
Tested-by: Harikrishna Shenoy <h-shenoy(a)ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250704125915.1224738-2-devarsht@ti.com
Signed-off-by: Vinod Koul <vkoul(a)kernel.org>
diff --git a/drivers/phy/cadence/cdns-dphy.c b/drivers/phy/cadence/cdns-dphy.c
index 33a42e72362e..da8de0a9d086 100644
--- a/drivers/phy/cadence/cdns-dphy.c
+++ b/drivers/phy/cadence/cdns-dphy.c
@@ -92,6 +92,8 @@ struct cdns_dphy_ops {
void (*set_pll_cfg)(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg);
unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
+ int (*wait_for_pll_lock)(struct cdns_dphy *dphy);
+ int (*wait_for_cmn_ready)(struct cdns_dphy *dphy);
};
struct cdns_dphy {
@@ -101,6 +103,8 @@ struct cdns_dphy {
struct clk *pll_ref_clk;
const struct cdns_dphy_ops *ops;
struct phy *phy;
+ bool is_configured;
+ bool is_powered;
};
/* Order of bands is important since the index is the band number. */
@@ -186,6 +190,16 @@ static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
return dphy->ops->get_wakeup_time_ns(dphy);
}
+static int cdns_dphy_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_pll_lock ? dphy->ops->wait_for_pll_lock(dphy) : 0;
+}
+
+static int cdns_dphy_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ return dphy->ops->wait_for_cmn_ready ? dphy->ops->wait_for_cmn_ready(dphy) : 0;
+}
+
static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
{
/* Default wakeup time is 800 ns (in a simulated environment). */
@@ -227,7 +241,6 @@ static unsigned long cdns_dphy_j721e_get_wakeup_time_ns(struct cdns_dphy *dphy)
static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
const struct cdns_dphy_cfg *cfg)
{
- u32 status;
/*
* set the PWM and PLL Byteclk divider settings to recommended values
@@ -244,13 +257,6 @@ static void cdns_dphy_j721e_set_pll_cfg(struct cdns_dphy *dphy,
writel(DPHY_TX_J721E_WIZ_LANE_RSTB,
dphy->regs + DPHY_TX_J721E_WIZ_RST_CTRL);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
- (status & DPHY_TX_WIZ_PLL_LOCK), 0, POLL_TIMEOUT_US);
-
- readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
- (status & DPHY_TX_WIZ_O_CMN_READY), 0,
- POLL_TIMEOUT_US);
}
static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
@@ -258,6 +264,23 @@ static void cdns_dphy_j721e_set_psm_div(struct cdns_dphy *dphy, u8 div)
writel(div, dphy->regs + DPHY_TX_J721E_WIZ_PSM_FREQ);
}
+static int cdns_dphy_j721e_wait_for_pll_lock(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_PLL_CTRL, status,
+ status & DPHY_TX_WIZ_PLL_LOCK, 0, POLL_TIMEOUT_US);
+}
+
+static int cdns_dphy_j721e_wait_for_cmn_ready(struct cdns_dphy *dphy)
+{
+ u32 status;
+
+ return readl_poll_timeout(dphy->regs + DPHY_TX_J721E_WIZ_STATUS, status,
+ status & DPHY_TX_WIZ_O_CMN_READY, 0,
+ POLL_TIMEOUT_US);
+}
+
/*
* This is the reference implementation of DPHY hooks. Specific integration of
* this IP may have to re-implement some of them depending on how they decided
@@ -273,6 +296,8 @@ static const struct cdns_dphy_ops j721e_dphy_ops = {
.get_wakeup_time_ns = cdns_dphy_j721e_get_wakeup_time_ns,
.set_pll_cfg = cdns_dphy_j721e_set_pll_cfg,
.set_psm_div = cdns_dphy_j721e_set_psm_div,
+ .wait_for_pll_lock = cdns_dphy_j721e_wait_for_pll_lock,
+ .wait_for_cmn_ready = cdns_dphy_j721e_wait_for_cmn_ready,
};
static int cdns_dphy_config_from_opts(struct phy *phy,
@@ -328,21 +353,36 @@ static int cdns_dphy_validate(struct phy *phy, enum phy_mode mode, int submode,
static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
- struct cdns_dphy_cfg cfg = { 0 };
- int ret, band_ctrl;
- unsigned int reg;
+ int ret;
- ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &cfg);
- if (ret)
- return ret;
+ ret = cdns_dphy_config_from_opts(phy, &opts->mipi_dphy, &dphy->cfg);
+ if (!ret)
+ dphy->is_configured = true;
+
+ return ret;
+}
+
+static int cdns_dphy_power_on(struct phy *phy)
+{
+ struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ int ret;
+ u32 reg;
+
+ if (!dphy->is_configured || dphy->is_powered)
+ return -EINVAL;
+
+ clk_prepare_enable(dphy->psm_clk);
+ clk_prepare_enable(dphy->pll_ref_clk);
/*
* Configure the internal PSM clk divider so that the DPHY has a
* 1MHz clk (or something close).
*/
ret = cdns_dphy_setup_psm(dphy);
- if (ret)
- return ret;
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to setup PSM with error %d\n", ret);
+ goto err_power_on;
+ }
/*
* Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
@@ -357,40 +397,60 @@ static int cdns_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
* Configure the DPHY PLL that will be used to generate the TX byte
* clk.
*/
- cdns_dphy_set_pll_cfg(dphy, &cfg);
+ cdns_dphy_set_pll_cfg(dphy, &dphy->cfg);
- band_ctrl = cdns_dphy_tx_get_band_ctrl(opts->mipi_dphy.hs_clk_rate);
- if (band_ctrl < 0)
- return band_ctrl;
+ ret = cdns_dphy_tx_get_band_ctrl(dphy->cfg.hs_clk_rate);
+ if (ret < 0) {
+ dev_err(&dphy->phy->dev, "Failed to get band control value with error %d\n", ret);
+ goto err_power_on;
+ }
- reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, band_ctrl) |
- FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, band_ctrl);
+ reg = FIELD_PREP(DPHY_BAND_CFG_LEFT_BAND, ret) |
+ FIELD_PREP(DPHY_BAND_CFG_RIGHT_BAND, ret);
writel(reg, dphy->regs + DPHY_BAND_CFG);
- return 0;
-}
-
-static int cdns_dphy_power_on(struct phy *phy)
-{
- struct cdns_dphy *dphy = phy_get_drvdata(phy);
-
- clk_prepare_enable(dphy->psm_clk);
- clk_prepare_enable(dphy->pll_ref_clk);
-
/* Start TX state machine. */
writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
dphy->regs + DPHY_CMN_SSM);
+ ret = cdns_dphy_wait_for_pll_lock(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "Failed to lock PLL with error %d\n", ret);
+ goto err_power_on;
+ }
+
+ ret = cdns_dphy_wait_for_cmn_ready(dphy);
+ if (ret) {
+ dev_err(&dphy->phy->dev, "O_CMN_READY signal failed to assert with error %d\n",
+ ret);
+ goto err_power_on;
+ }
+
+ dphy->is_powered = true;
+
return 0;
+
+err_power_on:
+ clk_disable_unprepare(dphy->pll_ref_clk);
+ clk_disable_unprepare(dphy->psm_clk);
+
+ return ret;
}
static int cdns_dphy_power_off(struct phy *phy)
{
struct cdns_dphy *dphy = phy_get_drvdata(phy);
+ u32 reg;
clk_disable_unprepare(dphy->pll_ref_clk);
clk_disable_unprepare(dphy->psm_clk);
+ /* Stop TX state machine. */
+ reg = readl(dphy->regs + DPHY_CMN_SSM);
+ writel(reg & ~DPHY_CMN_SSM_EN, dphy->regs + DPHY_CMN_SSM);
+
+ dphy->is_powered = false;
+
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5ef7e24c742038a5d8c626fdc0e3a21834358341
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101631-encrust-snagged-88c4@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5ef7e24c742038a5d8c626fdc0e3a21834358341 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu(a)linux.intel.com>
Date: Thu, 18 Sep 2025 13:02:02 +0800
Subject: [PATCH] iommu/vt-d: PRS isn't usable if PDS isn't supported
The specification, Section 7.10, "Software Steps to Drain Page Requests &
Responses," requires software to submit an Invalidation Wait Descriptor
(inv_wait_dsc) with the Page-request Drain (PD=1) flag set, along with
the Invalidation Wait Completion Status Write flag (SW=1). It then waits
for the Invalidation Wait Descriptor's completion.
However, the PD field in the Invalidation Wait Descriptor is optional, as
stated in Section 6.5.2.9, "Invalidation Wait Descriptor":
"Page-request Drain (PD): Remapping hardware implementations reporting
Page-request draining as not supported (PDS = 0 in ECAP_REG) treat this
field as reserved."
This implies that if the IOMMU doesn't support the PDS capability, software
can't drain page requests and group responses as expected.
Do not enable PCI/PRI if the IOMMU doesn't support PDS.
Reported-by: Joel Granados <joel.granados(a)kernel.org>
Closes: https://lore.kernel.org/r/20250909-jag-pds-v1-1-ad8cba0e494e@kernel.org
Fixes: 66ac4db36f4c ("iommu/vt-d: Add page request draining support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Link: https://lore.kernel.org/r/20250915062946.120196-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <joerg.roedel(a)amd.com>
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9c3ab9d9f69a..92759a8f8330 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3812,7 +3812,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_pri_supported(pdev))
+ ecap_pds(iommu->ecap) && pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 26e5c67deb2e1f42a951f022fdf5b9f7eb747b01
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101631-recount-smashing-e39c@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 26e5c67deb2e1f42a951f022fdf5b9f7eb747b01 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong(a)kernel.org>
Date: Mon, 15 Sep 2025 17:24:17 -0700
Subject: [PATCH] fuse: fix livelock in synchronous file put from fuseblk
workers
I observed a hang when running generic/323 against a fuseblk server.
This test opens a file, initiates a lot of AIO writes to that file
descriptor, and closes the file descriptor before the writes complete.
Unsurprisingly, the AIO exerciser threads are mostly stuck waiting for
responses from the fuseblk server:
# cat /proc/372265/task/372313/stack
[<0>] request_wait_answer+0x1fe/0x2a0 [fuse]
[<0>] __fuse_simple_request+0xd3/0x2b0 [fuse]
[<0>] fuse_do_getattr+0xfc/0x1f0 [fuse]
[<0>] fuse_file_read_iter+0xbe/0x1c0 [fuse]
[<0>] aio_read+0x130/0x1e0
[<0>] io_submit_one+0x542/0x860
[<0>] __x64_sys_io_submit+0x98/0x1a0
[<0>] do_syscall_64+0x37/0xf0
[<0>] entry_SYSCALL_64_after_hwframe+0x4b/0x53
But the /weird/ part is that the fuseblk server threads are waiting for
responses from itself:
# cat /proc/372210/task/372232/stack
[<0>] request_wait_answer+0x1fe/0x2a0 [fuse]
[<0>] __fuse_simple_request+0xd3/0x2b0 [fuse]
[<0>] fuse_file_put+0x9a/0xd0 [fuse]
[<0>] fuse_release+0x36/0x50 [fuse]
[<0>] __fput+0xec/0x2b0
[<0>] task_work_run+0x55/0x90
[<0>] syscall_exit_to_user_mode+0xe9/0x100
[<0>] do_syscall_64+0x43/0xf0
[<0>] entry_SYSCALL_64_after_hwframe+0x4b/0x53
The fuseblk server is fuse2fs so there's nothing all that exciting in
the server itself. So why is the fuse server calling fuse_file_put?
The commit message for the fstest sheds some light on that:
"By closing the file descriptor before calling io_destroy, you pretty
much guarantee that the last put on the ioctx will be done in interrupt
context (during I/O completion).
Aha. AIO fgets a new struct file from the fd when it queues the ioctx.
The completion of the FUSE_WRITE command from userspace causes the fuse
server to call the AIO completion function. The completion puts the
struct file, queuing a delayed fput to the fuse server task. When the
fuse server task returns to userspace, it has to run the delayed fput,
which in the case of a fuseblk server, it does synchronously.
Sending the FUSE_RELEASE command sychronously from fuse server threads
is a bad idea because a client program can initiate enough simultaneous
AIOs such that all the fuse server threads end up in delayed_fput, and
now there aren't any threads left to handle the queued fuse commands.
Fix this by only using asynchronous fputs when closing files, and leave
a comment explaining why.
Cc: stable(a)vger.kernel.org # v2.6.38
Fixes: 5a18ec176c934c ("fuse: fix hang of single threaded fuseblk filesystem")
Signed-off-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com>
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 54786f62a9d8..f1ef77a0be05 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -356,8 +356,14 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
* Make the release synchronous if this is a fuseblk mount,
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
+ *
+ * Always use the asynchronous file put because the current thread
+ * might be the fuse server. This can happen if a process starts some
+ * aio and closes the fd before the aio completes. Since aio takes its
+ * own ref to the file, the IO completion has to drop the ref, which is
+ * how the fuse server can end up closing its clients' files.
*/
- fuse_file_put(ff, ff->fm->fc->destroy);
+ fuse_file_put(ff, false);
}
void fuse_release_common(struct file *file, bool isdir)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 72b7ceca857f38a8ca7c5629feffc63769638974
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101653-unloved-prodigal-b5e0@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72b7ceca857f38a8ca7c5629feffc63769638974 Mon Sep 17 00:00:00 2001
From: Shashank A P <shashank.ap(a)samsung.com>
Date: Mon, 1 Sep 2025 14:59:00 +0530
Subject: [PATCH] fs: quota: create dedicated workqueue for quota_release_work
There is a kernel panic due to WARN_ONCE when panic_on_warn is set.
This issue occurs when writeback is triggered due to sync call for an
opened file(ie, writeback reason is WB_REASON_SYNC). When f2fs balance
is needed at sync path, flush for quota_release_work is triggered.
By default quota_release_work is queued to "events_unbound" queue which
does not have WQ_MEM_RECLAIM flag. During f2fs balance "writeback"
workqueue tries to flush quota_release_work causing kernel panic due to
MEM_RECLAIM flag mismatch errors.
This patch creates dedicated workqueue with WQ_MEM_RECLAIM flag
for work quota_release_work.
------------[ cut here ]------------
WARNING: CPU: 4 PID: 14867 at kernel/workqueue.c:3721 check_flush_dependency+0x13c/0x148
Call trace:
check_flush_dependency+0x13c/0x148
__flush_work+0xd0/0x398
flush_delayed_work+0x44/0x5c
dquot_writeback_dquots+0x54/0x318
f2fs_do_quota_sync+0xb8/0x1a8
f2fs_write_checkpoint+0x3cc/0x99c
f2fs_gc+0x190/0x750
f2fs_balance_fs+0x110/0x168
f2fs_write_single_data_page+0x474/0x7dc
f2fs_write_data_pages+0x7d0/0xd0c
do_writepages+0xe0/0x2f4
__writeback_single_inode+0x44/0x4ac
writeback_sb_inodes+0x30c/0x538
wb_writeback+0xf4/0x440
wb_workfn+0x128/0x5d4
process_scheduled_works+0x1c4/0x45c
worker_thread+0x32c/0x3e8
kthread+0x11c/0x1b0
ret_from_fork+0x10/0x20
Kernel panic - not syncing: kernel: panic_on_warn set ...
Fixes: ac6f420291b3 ("quota: flush quota_release_work upon quota writeback")
CC: stable(a)vger.kernel.org
Signed-off-by: Shashank A P <shashank.ap(a)samsung.com>
Link: https://patch.msgid.link/20250901092905.2115-1-shashank.ap@samsung.com
Signed-off-by: Jan Kara <jack(a)suse.cz>
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index df4a9b348769..6c4a6ee1fa2b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -162,6 +162,9 @@ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
/* SLAB cache for dquot structures */
static struct kmem_cache *dquot_cachep;
+/* workqueue for work quota_release_work*/
+static struct workqueue_struct *quota_unbound_wq;
+
void register_quota_format(struct quota_format_type *fmt)
{
spin_lock(&dq_list_lock);
@@ -881,7 +884,7 @@ void dqput(struct dquot *dquot)
put_releasing_dquots(dquot);
atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock);
- queue_delayed_work(system_unbound_wq, "a_release_work, 1);
+ queue_delayed_work(quota_unbound_wq, "a_release_work, 1);
}
EXPORT_SYMBOL(dqput);
@@ -3041,6 +3044,11 @@ static int __init dquot_init(void)
shrinker_register(dqcache_shrinker);
+ quota_unbound_wq = alloc_workqueue("quota_events_unbound",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE);
+ if (!quota_unbound_wq)
+ panic("Cannot create quota_unbound_wq\n");
+
return 0;
}
fs_initcall(dquot_init);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 72b7ceca857f38a8ca7c5629feffc63769638974
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101652-coziness-jailer-d369@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72b7ceca857f38a8ca7c5629feffc63769638974 Mon Sep 17 00:00:00 2001
From: Shashank A P <shashank.ap(a)samsung.com>
Date: Mon, 1 Sep 2025 14:59:00 +0530
Subject: [PATCH] fs: quota: create dedicated workqueue for quota_release_work
There is a kernel panic due to WARN_ONCE when panic_on_warn is set.
This issue occurs when writeback is triggered due to sync call for an
opened file(ie, writeback reason is WB_REASON_SYNC). When f2fs balance
is needed at sync path, flush for quota_release_work is triggered.
By default quota_release_work is queued to "events_unbound" queue which
does not have WQ_MEM_RECLAIM flag. During f2fs balance "writeback"
workqueue tries to flush quota_release_work causing kernel panic due to
MEM_RECLAIM flag mismatch errors.
This patch creates dedicated workqueue with WQ_MEM_RECLAIM flag
for work quota_release_work.
------------[ cut here ]------------
WARNING: CPU: 4 PID: 14867 at kernel/workqueue.c:3721 check_flush_dependency+0x13c/0x148
Call trace:
check_flush_dependency+0x13c/0x148
__flush_work+0xd0/0x398
flush_delayed_work+0x44/0x5c
dquot_writeback_dquots+0x54/0x318
f2fs_do_quota_sync+0xb8/0x1a8
f2fs_write_checkpoint+0x3cc/0x99c
f2fs_gc+0x190/0x750
f2fs_balance_fs+0x110/0x168
f2fs_write_single_data_page+0x474/0x7dc
f2fs_write_data_pages+0x7d0/0xd0c
do_writepages+0xe0/0x2f4
__writeback_single_inode+0x44/0x4ac
writeback_sb_inodes+0x30c/0x538
wb_writeback+0xf4/0x440
wb_workfn+0x128/0x5d4
process_scheduled_works+0x1c4/0x45c
worker_thread+0x32c/0x3e8
kthread+0x11c/0x1b0
ret_from_fork+0x10/0x20
Kernel panic - not syncing: kernel: panic_on_warn set ...
Fixes: ac6f420291b3 ("quota: flush quota_release_work upon quota writeback")
CC: stable(a)vger.kernel.org
Signed-off-by: Shashank A P <shashank.ap(a)samsung.com>
Link: https://patch.msgid.link/20250901092905.2115-1-shashank.ap@samsung.com
Signed-off-by: Jan Kara <jack(a)suse.cz>
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index df4a9b348769..6c4a6ee1fa2b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -162,6 +162,9 @@ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
/* SLAB cache for dquot structures */
static struct kmem_cache *dquot_cachep;
+/* workqueue for work quota_release_work*/
+static struct workqueue_struct *quota_unbound_wq;
+
void register_quota_format(struct quota_format_type *fmt)
{
spin_lock(&dq_list_lock);
@@ -881,7 +884,7 @@ void dqput(struct dquot *dquot)
put_releasing_dquots(dquot);
atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock);
- queue_delayed_work(system_unbound_wq, "a_release_work, 1);
+ queue_delayed_work(quota_unbound_wq, "a_release_work, 1);
}
EXPORT_SYMBOL(dqput);
@@ -3041,6 +3044,11 @@ static int __init dquot_init(void)
shrinker_register(dqcache_shrinker);
+ quota_unbound_wq = alloc_workqueue("quota_events_unbound",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE);
+ if (!quota_unbound_wq)
+ panic("Cannot create quota_unbound_wq\n");
+
return 0;
}
fs_initcall(dquot_init);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 72b7ceca857f38a8ca7c5629feffc63769638974
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101651-whiny-creme-9639@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72b7ceca857f38a8ca7c5629feffc63769638974 Mon Sep 17 00:00:00 2001
From: Shashank A P <shashank.ap(a)samsung.com>
Date: Mon, 1 Sep 2025 14:59:00 +0530
Subject: [PATCH] fs: quota: create dedicated workqueue for quota_release_work
There is a kernel panic due to WARN_ONCE when panic_on_warn is set.
This issue occurs when writeback is triggered due to sync call for an
opened file(ie, writeback reason is WB_REASON_SYNC). When f2fs balance
is needed at sync path, flush for quota_release_work is triggered.
By default quota_release_work is queued to "events_unbound" queue which
does not have WQ_MEM_RECLAIM flag. During f2fs balance "writeback"
workqueue tries to flush quota_release_work causing kernel panic due to
MEM_RECLAIM flag mismatch errors.
This patch creates dedicated workqueue with WQ_MEM_RECLAIM flag
for work quota_release_work.
------------[ cut here ]------------
WARNING: CPU: 4 PID: 14867 at kernel/workqueue.c:3721 check_flush_dependency+0x13c/0x148
Call trace:
check_flush_dependency+0x13c/0x148
__flush_work+0xd0/0x398
flush_delayed_work+0x44/0x5c
dquot_writeback_dquots+0x54/0x318
f2fs_do_quota_sync+0xb8/0x1a8
f2fs_write_checkpoint+0x3cc/0x99c
f2fs_gc+0x190/0x750
f2fs_balance_fs+0x110/0x168
f2fs_write_single_data_page+0x474/0x7dc
f2fs_write_data_pages+0x7d0/0xd0c
do_writepages+0xe0/0x2f4
__writeback_single_inode+0x44/0x4ac
writeback_sb_inodes+0x30c/0x538
wb_writeback+0xf4/0x440
wb_workfn+0x128/0x5d4
process_scheduled_works+0x1c4/0x45c
worker_thread+0x32c/0x3e8
kthread+0x11c/0x1b0
ret_from_fork+0x10/0x20
Kernel panic - not syncing: kernel: panic_on_warn set ...
Fixes: ac6f420291b3 ("quota: flush quota_release_work upon quota writeback")
CC: stable(a)vger.kernel.org
Signed-off-by: Shashank A P <shashank.ap(a)samsung.com>
Link: https://patch.msgid.link/20250901092905.2115-1-shashank.ap@samsung.com
Signed-off-by: Jan Kara <jack(a)suse.cz>
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index df4a9b348769..6c4a6ee1fa2b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -162,6 +162,9 @@ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
/* SLAB cache for dquot structures */
static struct kmem_cache *dquot_cachep;
+/* workqueue for work quota_release_work*/
+static struct workqueue_struct *quota_unbound_wq;
+
void register_quota_format(struct quota_format_type *fmt)
{
spin_lock(&dq_list_lock);
@@ -881,7 +884,7 @@ void dqput(struct dquot *dquot)
put_releasing_dquots(dquot);
atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock);
- queue_delayed_work(system_unbound_wq, "a_release_work, 1);
+ queue_delayed_work(quota_unbound_wq, "a_release_work, 1);
}
EXPORT_SYMBOL(dqput);
@@ -3041,6 +3044,11 @@ static int __init dquot_init(void)
shrinker_register(dqcache_shrinker);
+ quota_unbound_wq = alloc_workqueue("quota_events_unbound",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE);
+ if (!quota_unbound_wq)
+ panic("Cannot create quota_unbound_wq\n");
+
return 0;
}
fs_initcall(dquot_init);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 72b7ceca857f38a8ca7c5629feffc63769638974
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101651-arrive-frail-3c0f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72b7ceca857f38a8ca7c5629feffc63769638974 Mon Sep 17 00:00:00 2001
From: Shashank A P <shashank.ap(a)samsung.com>
Date: Mon, 1 Sep 2025 14:59:00 +0530
Subject: [PATCH] fs: quota: create dedicated workqueue for quota_release_work
There is a kernel panic due to WARN_ONCE when panic_on_warn is set.
This issue occurs when writeback is triggered due to sync call for an
opened file(ie, writeback reason is WB_REASON_SYNC). When f2fs balance
is needed at sync path, flush for quota_release_work is triggered.
By default quota_release_work is queued to "events_unbound" queue which
does not have WQ_MEM_RECLAIM flag. During f2fs balance "writeback"
workqueue tries to flush quota_release_work causing kernel panic due to
MEM_RECLAIM flag mismatch errors.
This patch creates dedicated workqueue with WQ_MEM_RECLAIM flag
for work quota_release_work.
------------[ cut here ]------------
WARNING: CPU: 4 PID: 14867 at kernel/workqueue.c:3721 check_flush_dependency+0x13c/0x148
Call trace:
check_flush_dependency+0x13c/0x148
__flush_work+0xd0/0x398
flush_delayed_work+0x44/0x5c
dquot_writeback_dquots+0x54/0x318
f2fs_do_quota_sync+0xb8/0x1a8
f2fs_write_checkpoint+0x3cc/0x99c
f2fs_gc+0x190/0x750
f2fs_balance_fs+0x110/0x168
f2fs_write_single_data_page+0x474/0x7dc
f2fs_write_data_pages+0x7d0/0xd0c
do_writepages+0xe0/0x2f4
__writeback_single_inode+0x44/0x4ac
writeback_sb_inodes+0x30c/0x538
wb_writeback+0xf4/0x440
wb_workfn+0x128/0x5d4
process_scheduled_works+0x1c4/0x45c
worker_thread+0x32c/0x3e8
kthread+0x11c/0x1b0
ret_from_fork+0x10/0x20
Kernel panic - not syncing: kernel: panic_on_warn set ...
Fixes: ac6f420291b3 ("quota: flush quota_release_work upon quota writeback")
CC: stable(a)vger.kernel.org
Signed-off-by: Shashank A P <shashank.ap(a)samsung.com>
Link: https://patch.msgid.link/20250901092905.2115-1-shashank.ap@samsung.com
Signed-off-by: Jan Kara <jack(a)suse.cz>
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index df4a9b348769..6c4a6ee1fa2b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -162,6 +162,9 @@ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
/* SLAB cache for dquot structures */
static struct kmem_cache *dquot_cachep;
+/* workqueue for work quota_release_work*/
+static struct workqueue_struct *quota_unbound_wq;
+
void register_quota_format(struct quota_format_type *fmt)
{
spin_lock(&dq_list_lock);
@@ -881,7 +884,7 @@ void dqput(struct dquot *dquot)
put_releasing_dquots(dquot);
atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock);
- queue_delayed_work(system_unbound_wq, "a_release_work, 1);
+ queue_delayed_work(quota_unbound_wq, "a_release_work, 1);
}
EXPORT_SYMBOL(dqput);
@@ -3041,6 +3044,11 @@ static int __init dquot_init(void)
shrinker_register(dqcache_shrinker);
+ quota_unbound_wq = alloc_workqueue("quota_events_unbound",
+ WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE);
+ if (!quota_unbound_wq)
+ panic("Cannot create quota_unbound_wq\n");
+
return 0;
}
fs_initcall(dquot_init);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101620-overcook-path-9b00@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao(a)linutronix.de>
Date: Tue, 15 Jul 2025 14:46:34 +0200
Subject: [PATCH] eventpoll: Replace rwlock with spinlock
The ready event list of an epoll object is protected by read-write
semaphore:
- The consumer (waiter) acquires the write lock and takes items.
- the producer (waker) takes the read lock and adds items.
The point of this design is enabling epoll to scale well with large number
of producers, as multiple producers can hold the read lock at the same
time.
Unfortunately, this implementation may cause scheduling priority inversion
problem. Suppose the consumer has higher scheduling priority than the
producer. The consumer needs to acquire the write lock, but may be blocked
by the producer holding the read lock. Since read-write semaphore does not
support priority-boosting for the readers (even with CONFIG_PREEMPT_RT=y),
we have a case of priority inversion: a higher priority consumer is blocked
by a lower priority producer. This problem was reported in [1].
Furthermore, this could also cause stall problem, as described in [2].
Fix this problem by replacing rwlock with spinlock.
This reduces the event bandwidth, as the producers now have to contend with
each other for the spinlock. According to the benchmark from
https://github.com/rouming/test-tools/blob/master/stress-epoll.c:
On 12 x86 CPUs:
Before After Diff
threads events/ms events/ms
8 7162 4956 -31%
16 8733 5383 -38%
32 7968 5572 -30%
64 10652 5739 -46%
128 11236 5931 -47%
On 4 riscv CPUs:
Before After Diff
threads events/ms events/ms
8 2958 2833 -4%
16 3323 3097 -7%
32 3451 3240 -6%
64 3554 3178 -11%
128 3601 3235 -10%
Although the numbers look bad, it should be noted that this benchmark
creates multiple threads who do nothing except constantly generating new
epoll events, thus contention on the spinlock is high. For real workload,
the event rate is likely much lower, and the performance drop is not as
bad.
Using another benchmark (perf bench epoll wait) where spinlock contention
is lower, improvement is even observed on x86:
On 12 x86 CPUs:
Before: Averaged 110279 operations/sec (+- 1.09%), total secs = 8
After: Averaged 114577 operations/sec (+- 2.25%), total secs = 8
On 4 riscv CPUs:
Before: Averaged 175767 operations/sec (+- 0.62%), total secs = 8
After: Averaged 167396 operations/sec (+- 0.23%), total secs = 8
In conclusion, no one is likely to be upset over this change. After all,
spinlock was used originally for years, and the commit which converted to
rwlock didn't mention a real workload, just that the benchmark numbers are
nice.
This patch is not exactly the revert of commit a218cc491420 ("epoll: use
rwlock in order to reduce ep_poll_callback() contention"), because git
revert conflicts in some places which are not obvious on the resolution.
This patch is intended to be backported, therefore go with the obvious
approach:
- Replace rwlock_t with spinlock_t one to one
- Delete list_add_tail_lockless() and chain_epi_lockless(). These were
introduced to allow producers to concurrently add items to the list.
But now that spinlock no longer allows producers to touch the event
list concurrently, these two functions are not necessary anymore.
Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention")
Signed-off-by: Nam Cao <namcao(a)linutronix.de>
Link: https://lore.kernel.org/ec92458ea357ec503c737ead0f10b2c6e4c37d47.1752581388…
Tested-by: K Prateek Nayak <kprateek.nayak(a)amd.com>
Cc: stable(a)vger.kernel.org
Reported-by: Frederic Weisbecker <frederic(a)kernel.org>
Closes: https://lore.kernel.org/linux-rt-users/20210825132754.GA895675@lothringen/ [1]
Reported-by: Valentin Schneider <vschneid(a)redhat.com>
Closes: https://lore.kernel.org/linux-rt-users/xhsmhttqvnall.mognet@vschneid.remote… [2]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b22d6f819f78..ee7c4b683ec3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -46,10 +46,10 @@
*
* 1) epnested_mutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (rwlock)
+ * 3) ep->lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a rwlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
@@ -195,7 +195,7 @@ struct eventpoll {
struct list_head rdllist;
/* Lock which protects rdllist and ovflist */
- rwlock_t lock;
+ spinlock_t lock;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
@@ -741,10 +741,10 @@ static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
* in a lockless way.
*/
lockdep_assert_irqs_enabled();
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
list_splice_init(&ep->rdllist, txlist);
WRITE_ONCE(ep->ovflist, NULL);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_done_scan(struct eventpoll *ep,
@@ -752,7 +752,7 @@ static void ep_done_scan(struct eventpoll *ep,
{
struct epitem *epi, *nepi;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
@@ -793,7 +793,7 @@ static void ep_done_scan(struct eventpoll *ep,
wake_up(&ep->wq);
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_get(struct eventpoll *ep)
@@ -868,10 +868,10 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -1152,7 +1152,7 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
- rwlock_init(&ep->lock);
+ spin_lock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
@@ -1239,100 +1239,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
}
#endif /* CONFIG_KCMP */
-/*
- * Adds a new entry to the tail of the list in a lockless way, i.e.
- * multiple CPUs are allowed to call this function concurrently.
- *
- * Beware: it is necessary to prevent any other modifications of the
- * existing list until all changes are completed, in other words
- * concurrent list_add_tail_lockless() calls should be protected
- * with a read lock, where write lock acts as a barrier which
- * makes sure all list_add_tail_lockless() calls are fully
- * completed.
- *
- * Also an element can be locklessly added to the list only in one
- * direction i.e. either to the tail or to the head, otherwise
- * concurrent access will corrupt the list.
- *
- * Return: %false if element has been already added to the list, %true
- * otherwise.
- */
-static inline bool list_add_tail_lockless(struct list_head *new,
- struct list_head *head)
-{
- struct list_head *prev;
-
- /*
- * This is simple 'new->next = head' operation, but cmpxchg()
- * is used in order to detect that same element has been just
- * added to the list from another CPU: the winner observes
- * new->next == new.
- */
- if (!try_cmpxchg(&new->next, &new, head))
- return false;
-
- /*
- * Initially ->next of a new element must be updated with the head
- * (we are inserting to the tail) and only then pointers are atomically
- * exchanged. XCHG guarantees memory ordering, thus ->next should be
- * updated before pointers are actually swapped and pointers are
- * swapped before prev->next is updated.
- */
-
- prev = xchg(&head->prev, new);
-
- /*
- * It is safe to modify prev->next and new->prev, because a new element
- * is added only to the tail and new->next is updated before XCHG.
- */
-
- prev->next = new;
- new->prev = prev;
-
- return true;
-}
-
-/*
- * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
- * i.e. multiple CPUs are allowed to call this function concurrently.
- *
- * Return: %false if epi element has been already chained, %true otherwise.
- */
-static inline bool chain_epi_lockless(struct epitem *epi)
-{
- struct eventpoll *ep = epi->ep;
-
- /* Fast preliminary check */
- if (epi->next != EP_UNACTIVE_PTR)
- return false;
-
- /* Check that the same epi has not been just chained from another CPU */
- if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
- return false;
-
- /* Atomically exchange tail */
- epi->next = xchg(&ep->ovflist, epi);
-
- return true;
-}
-
/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
- *
- * This callback takes a read lock in order not to contend with concurrent
- * events from another file descriptor, thus all modifications to ->rdllist
- * or ->ovflist are lockless. Read lock is paired with the write lock from
- * ep_start/done_scan(), which stops all list modifications and guarantees
- * that lists state is seen correctly.
- *
- * Another thing worth to mention is that ep_poll_callback() can be called
- * concurrently for the same @epi from different CPUs if poll table was inited
- * with several wait queues entries. Plural wakeup from different CPUs of a
- * single wait queue is serialized by wq.lock, but the case when multiple wait
- * queues are used should be detected accordingly. This is detected using
- * cmpxchg() operation.
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
@@ -1343,7 +1253,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
unsigned long flags;
int ewake = 0;
- read_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1372,12 +1282,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* chained in ep->ovflist and requeued later on.
*/
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
- if (chain_epi_lockless(epi))
+ if (epi->next == EP_UNACTIVE_PTR) {
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
ep_pm_stay_awake_rcu(epi);
+ }
} else if (!ep_is_linked(epi)) {
/* In the usual case, add event to ready list. */
- if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
- ep_pm_stay_awake_rcu(epi);
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake_rcu(epi);
}
/*
@@ -1410,7 +1323,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
pwake++;
out_unlock:
- read_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
@@ -1745,7 +1658,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* We have to drop the new item inside our item list to keep track of it */
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
@@ -1762,7 +1675,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
/* We have to call this outside the lock */
if (pwake)
@@ -1826,7 +1739,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1837,7 +1750,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
/* We have to call this outside the lock */
@@ -2089,7 +2002,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* Barrierless variant, waitqueue_active() is called under
* the same lock on wakeup ep_poll_callback() side, so it
@@ -2108,7 +2021,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (!eavail)
__add_wait_queue_exclusive(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
if (!eavail)
timed_out = !ep_schedule_timeout(to) ||
@@ -2124,7 +2037,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
eavail = 1;
if (!list_empty_careful(&wait.entry)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* If the thread timed out and is not on the wait queue,
* it means that the thread was woken up after its
@@ -2135,7 +2048,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (timed_out)
eavail = list_empty(&wait.entry);
__remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
}
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101619-cesarean-cresting-01cb@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao(a)linutronix.de>
Date: Tue, 15 Jul 2025 14:46:34 +0200
Subject: [PATCH] eventpoll: Replace rwlock with spinlock
The ready event list of an epoll object is protected by read-write
semaphore:
- The consumer (waiter) acquires the write lock and takes items.
- the producer (waker) takes the read lock and adds items.
The point of this design is enabling epoll to scale well with large number
of producers, as multiple producers can hold the read lock at the same
time.
Unfortunately, this implementation may cause scheduling priority inversion
problem. Suppose the consumer has higher scheduling priority than the
producer. The consumer needs to acquire the write lock, but may be blocked
by the producer holding the read lock. Since read-write semaphore does not
support priority-boosting for the readers (even with CONFIG_PREEMPT_RT=y),
we have a case of priority inversion: a higher priority consumer is blocked
by a lower priority producer. This problem was reported in [1].
Furthermore, this could also cause stall problem, as described in [2].
Fix this problem by replacing rwlock with spinlock.
This reduces the event bandwidth, as the producers now have to contend with
each other for the spinlock. According to the benchmark from
https://github.com/rouming/test-tools/blob/master/stress-epoll.c:
On 12 x86 CPUs:
Before After Diff
threads events/ms events/ms
8 7162 4956 -31%
16 8733 5383 -38%
32 7968 5572 -30%
64 10652 5739 -46%
128 11236 5931 -47%
On 4 riscv CPUs:
Before After Diff
threads events/ms events/ms
8 2958 2833 -4%
16 3323 3097 -7%
32 3451 3240 -6%
64 3554 3178 -11%
128 3601 3235 -10%
Although the numbers look bad, it should be noted that this benchmark
creates multiple threads who do nothing except constantly generating new
epoll events, thus contention on the spinlock is high. For real workload,
the event rate is likely much lower, and the performance drop is not as
bad.
Using another benchmark (perf bench epoll wait) where spinlock contention
is lower, improvement is even observed on x86:
On 12 x86 CPUs:
Before: Averaged 110279 operations/sec (+- 1.09%), total secs = 8
After: Averaged 114577 operations/sec (+- 2.25%), total secs = 8
On 4 riscv CPUs:
Before: Averaged 175767 operations/sec (+- 0.62%), total secs = 8
After: Averaged 167396 operations/sec (+- 0.23%), total secs = 8
In conclusion, no one is likely to be upset over this change. After all,
spinlock was used originally for years, and the commit which converted to
rwlock didn't mention a real workload, just that the benchmark numbers are
nice.
This patch is not exactly the revert of commit a218cc491420 ("epoll: use
rwlock in order to reduce ep_poll_callback() contention"), because git
revert conflicts in some places which are not obvious on the resolution.
This patch is intended to be backported, therefore go with the obvious
approach:
- Replace rwlock_t with spinlock_t one to one
- Delete list_add_tail_lockless() and chain_epi_lockless(). These were
introduced to allow producers to concurrently add items to the list.
But now that spinlock no longer allows producers to touch the event
list concurrently, these two functions are not necessary anymore.
Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention")
Signed-off-by: Nam Cao <namcao(a)linutronix.de>
Link: https://lore.kernel.org/ec92458ea357ec503c737ead0f10b2c6e4c37d47.1752581388…
Tested-by: K Prateek Nayak <kprateek.nayak(a)amd.com>
Cc: stable(a)vger.kernel.org
Reported-by: Frederic Weisbecker <frederic(a)kernel.org>
Closes: https://lore.kernel.org/linux-rt-users/20210825132754.GA895675@lothringen/ [1]
Reported-by: Valentin Schneider <vschneid(a)redhat.com>
Closes: https://lore.kernel.org/linux-rt-users/xhsmhttqvnall.mognet@vschneid.remote… [2]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b22d6f819f78..ee7c4b683ec3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -46,10 +46,10 @@
*
* 1) epnested_mutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (rwlock)
+ * 3) ep->lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a rwlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
@@ -195,7 +195,7 @@ struct eventpoll {
struct list_head rdllist;
/* Lock which protects rdllist and ovflist */
- rwlock_t lock;
+ spinlock_t lock;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
@@ -741,10 +741,10 @@ static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
* in a lockless way.
*/
lockdep_assert_irqs_enabled();
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
list_splice_init(&ep->rdllist, txlist);
WRITE_ONCE(ep->ovflist, NULL);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_done_scan(struct eventpoll *ep,
@@ -752,7 +752,7 @@ static void ep_done_scan(struct eventpoll *ep,
{
struct epitem *epi, *nepi;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
@@ -793,7 +793,7 @@ static void ep_done_scan(struct eventpoll *ep,
wake_up(&ep->wq);
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_get(struct eventpoll *ep)
@@ -868,10 +868,10 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -1152,7 +1152,7 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
- rwlock_init(&ep->lock);
+ spin_lock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
@@ -1239,100 +1239,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
}
#endif /* CONFIG_KCMP */
-/*
- * Adds a new entry to the tail of the list in a lockless way, i.e.
- * multiple CPUs are allowed to call this function concurrently.
- *
- * Beware: it is necessary to prevent any other modifications of the
- * existing list until all changes are completed, in other words
- * concurrent list_add_tail_lockless() calls should be protected
- * with a read lock, where write lock acts as a barrier which
- * makes sure all list_add_tail_lockless() calls are fully
- * completed.
- *
- * Also an element can be locklessly added to the list only in one
- * direction i.e. either to the tail or to the head, otherwise
- * concurrent access will corrupt the list.
- *
- * Return: %false if element has been already added to the list, %true
- * otherwise.
- */
-static inline bool list_add_tail_lockless(struct list_head *new,
- struct list_head *head)
-{
- struct list_head *prev;
-
- /*
- * This is simple 'new->next = head' operation, but cmpxchg()
- * is used in order to detect that same element has been just
- * added to the list from another CPU: the winner observes
- * new->next == new.
- */
- if (!try_cmpxchg(&new->next, &new, head))
- return false;
-
- /*
- * Initially ->next of a new element must be updated with the head
- * (we are inserting to the tail) and only then pointers are atomically
- * exchanged. XCHG guarantees memory ordering, thus ->next should be
- * updated before pointers are actually swapped and pointers are
- * swapped before prev->next is updated.
- */
-
- prev = xchg(&head->prev, new);
-
- /*
- * It is safe to modify prev->next and new->prev, because a new element
- * is added only to the tail and new->next is updated before XCHG.
- */
-
- prev->next = new;
- new->prev = prev;
-
- return true;
-}
-
-/*
- * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
- * i.e. multiple CPUs are allowed to call this function concurrently.
- *
- * Return: %false if epi element has been already chained, %true otherwise.
- */
-static inline bool chain_epi_lockless(struct epitem *epi)
-{
- struct eventpoll *ep = epi->ep;
-
- /* Fast preliminary check */
- if (epi->next != EP_UNACTIVE_PTR)
- return false;
-
- /* Check that the same epi has not been just chained from another CPU */
- if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
- return false;
-
- /* Atomically exchange tail */
- epi->next = xchg(&ep->ovflist, epi);
-
- return true;
-}
-
/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
- *
- * This callback takes a read lock in order not to contend with concurrent
- * events from another file descriptor, thus all modifications to ->rdllist
- * or ->ovflist are lockless. Read lock is paired with the write lock from
- * ep_start/done_scan(), which stops all list modifications and guarantees
- * that lists state is seen correctly.
- *
- * Another thing worth to mention is that ep_poll_callback() can be called
- * concurrently for the same @epi from different CPUs if poll table was inited
- * with several wait queues entries. Plural wakeup from different CPUs of a
- * single wait queue is serialized by wq.lock, but the case when multiple wait
- * queues are used should be detected accordingly. This is detected using
- * cmpxchg() operation.
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
@@ -1343,7 +1253,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
unsigned long flags;
int ewake = 0;
- read_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1372,12 +1282,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* chained in ep->ovflist and requeued later on.
*/
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
- if (chain_epi_lockless(epi))
+ if (epi->next == EP_UNACTIVE_PTR) {
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
ep_pm_stay_awake_rcu(epi);
+ }
} else if (!ep_is_linked(epi)) {
/* In the usual case, add event to ready list. */
- if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
- ep_pm_stay_awake_rcu(epi);
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake_rcu(epi);
}
/*
@@ -1410,7 +1323,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
pwake++;
out_unlock:
- read_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
@@ -1745,7 +1658,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* We have to drop the new item inside our item list to keep track of it */
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
@@ -1762,7 +1675,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
/* We have to call this outside the lock */
if (pwake)
@@ -1826,7 +1739,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1837,7 +1750,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
/* We have to call this outside the lock */
@@ -2089,7 +2002,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* Barrierless variant, waitqueue_active() is called under
* the same lock on wakeup ep_poll_callback() side, so it
@@ -2108,7 +2021,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (!eavail)
__add_wait_queue_exclusive(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
if (!eavail)
timed_out = !ep_schedule_timeout(to) ||
@@ -2124,7 +2037,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
eavail = 1;
if (!list_empty_careful(&wait.entry)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* If the thread timed out and is not on the wait queue,
* it means that the thread was woken up after its
@@ -2135,7 +2048,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (timed_out)
eavail = list_empty(&wait.entry);
__remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
}
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101617-gaming-preplan-1aaf@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao(a)linutronix.de>
Date: Tue, 15 Jul 2025 14:46:34 +0200
Subject: [PATCH] eventpoll: Replace rwlock with spinlock
The ready event list of an epoll object is protected by read-write
semaphore:
- The consumer (waiter) acquires the write lock and takes items.
- the producer (waker) takes the read lock and adds items.
The point of this design is enabling epoll to scale well with large number
of producers, as multiple producers can hold the read lock at the same
time.
Unfortunately, this implementation may cause scheduling priority inversion
problem. Suppose the consumer has higher scheduling priority than the
producer. The consumer needs to acquire the write lock, but may be blocked
by the producer holding the read lock. Since read-write semaphore does not
support priority-boosting for the readers (even with CONFIG_PREEMPT_RT=y),
we have a case of priority inversion: a higher priority consumer is blocked
by a lower priority producer. This problem was reported in [1].
Furthermore, this could also cause stall problem, as described in [2].
Fix this problem by replacing rwlock with spinlock.
This reduces the event bandwidth, as the producers now have to contend with
each other for the spinlock. According to the benchmark from
https://github.com/rouming/test-tools/blob/master/stress-epoll.c:
On 12 x86 CPUs:
Before After Diff
threads events/ms events/ms
8 7162 4956 -31%
16 8733 5383 -38%
32 7968 5572 -30%
64 10652 5739 -46%
128 11236 5931 -47%
On 4 riscv CPUs:
Before After Diff
threads events/ms events/ms
8 2958 2833 -4%
16 3323 3097 -7%
32 3451 3240 -6%
64 3554 3178 -11%
128 3601 3235 -10%
Although the numbers look bad, it should be noted that this benchmark
creates multiple threads who do nothing except constantly generating new
epoll events, thus contention on the spinlock is high. For real workload,
the event rate is likely much lower, and the performance drop is not as
bad.
Using another benchmark (perf bench epoll wait) where spinlock contention
is lower, improvement is even observed on x86:
On 12 x86 CPUs:
Before: Averaged 110279 operations/sec (+- 1.09%), total secs = 8
After: Averaged 114577 operations/sec (+- 2.25%), total secs = 8
On 4 riscv CPUs:
Before: Averaged 175767 operations/sec (+- 0.62%), total secs = 8
After: Averaged 167396 operations/sec (+- 0.23%), total secs = 8
In conclusion, no one is likely to be upset over this change. After all,
spinlock was used originally for years, and the commit which converted to
rwlock didn't mention a real workload, just that the benchmark numbers are
nice.
This patch is not exactly the revert of commit a218cc491420 ("epoll: use
rwlock in order to reduce ep_poll_callback() contention"), because git
revert conflicts in some places which are not obvious on the resolution.
This patch is intended to be backported, therefore go with the obvious
approach:
- Replace rwlock_t with spinlock_t one to one
- Delete list_add_tail_lockless() and chain_epi_lockless(). These were
introduced to allow producers to concurrently add items to the list.
But now that spinlock no longer allows producers to touch the event
list concurrently, these two functions are not necessary anymore.
Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention")
Signed-off-by: Nam Cao <namcao(a)linutronix.de>
Link: https://lore.kernel.org/ec92458ea357ec503c737ead0f10b2c6e4c37d47.1752581388…
Tested-by: K Prateek Nayak <kprateek.nayak(a)amd.com>
Cc: stable(a)vger.kernel.org
Reported-by: Frederic Weisbecker <frederic(a)kernel.org>
Closes: https://lore.kernel.org/linux-rt-users/20210825132754.GA895675@lothringen/ [1]
Reported-by: Valentin Schneider <vschneid(a)redhat.com>
Closes: https://lore.kernel.org/linux-rt-users/xhsmhttqvnall.mognet@vschneid.remote… [2]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b22d6f819f78..ee7c4b683ec3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -46,10 +46,10 @@
*
* 1) epnested_mutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (rwlock)
+ * 3) ep->lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a rwlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
@@ -195,7 +195,7 @@ struct eventpoll {
struct list_head rdllist;
/* Lock which protects rdllist and ovflist */
- rwlock_t lock;
+ spinlock_t lock;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
@@ -741,10 +741,10 @@ static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
* in a lockless way.
*/
lockdep_assert_irqs_enabled();
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
list_splice_init(&ep->rdllist, txlist);
WRITE_ONCE(ep->ovflist, NULL);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_done_scan(struct eventpoll *ep,
@@ -752,7 +752,7 @@ static void ep_done_scan(struct eventpoll *ep,
{
struct epitem *epi, *nepi;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
@@ -793,7 +793,7 @@ static void ep_done_scan(struct eventpoll *ep,
wake_up(&ep->wq);
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_get(struct eventpoll *ep)
@@ -868,10 +868,10 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -1152,7 +1152,7 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
- rwlock_init(&ep->lock);
+ spin_lock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
@@ -1239,100 +1239,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
}
#endif /* CONFIG_KCMP */
-/*
- * Adds a new entry to the tail of the list in a lockless way, i.e.
- * multiple CPUs are allowed to call this function concurrently.
- *
- * Beware: it is necessary to prevent any other modifications of the
- * existing list until all changes are completed, in other words
- * concurrent list_add_tail_lockless() calls should be protected
- * with a read lock, where write lock acts as a barrier which
- * makes sure all list_add_tail_lockless() calls are fully
- * completed.
- *
- * Also an element can be locklessly added to the list only in one
- * direction i.e. either to the tail or to the head, otherwise
- * concurrent access will corrupt the list.
- *
- * Return: %false if element has been already added to the list, %true
- * otherwise.
- */
-static inline bool list_add_tail_lockless(struct list_head *new,
- struct list_head *head)
-{
- struct list_head *prev;
-
- /*
- * This is simple 'new->next = head' operation, but cmpxchg()
- * is used in order to detect that same element has been just
- * added to the list from another CPU: the winner observes
- * new->next == new.
- */
- if (!try_cmpxchg(&new->next, &new, head))
- return false;
-
- /*
- * Initially ->next of a new element must be updated with the head
- * (we are inserting to the tail) and only then pointers are atomically
- * exchanged. XCHG guarantees memory ordering, thus ->next should be
- * updated before pointers are actually swapped and pointers are
- * swapped before prev->next is updated.
- */
-
- prev = xchg(&head->prev, new);
-
- /*
- * It is safe to modify prev->next and new->prev, because a new element
- * is added only to the tail and new->next is updated before XCHG.
- */
-
- prev->next = new;
- new->prev = prev;
-
- return true;
-}
-
-/*
- * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
- * i.e. multiple CPUs are allowed to call this function concurrently.
- *
- * Return: %false if epi element has been already chained, %true otherwise.
- */
-static inline bool chain_epi_lockless(struct epitem *epi)
-{
- struct eventpoll *ep = epi->ep;
-
- /* Fast preliminary check */
- if (epi->next != EP_UNACTIVE_PTR)
- return false;
-
- /* Check that the same epi has not been just chained from another CPU */
- if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
- return false;
-
- /* Atomically exchange tail */
- epi->next = xchg(&ep->ovflist, epi);
-
- return true;
-}
-
/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
- *
- * This callback takes a read lock in order not to contend with concurrent
- * events from another file descriptor, thus all modifications to ->rdllist
- * or ->ovflist are lockless. Read lock is paired with the write lock from
- * ep_start/done_scan(), which stops all list modifications and guarantees
- * that lists state is seen correctly.
- *
- * Another thing worth to mention is that ep_poll_callback() can be called
- * concurrently for the same @epi from different CPUs if poll table was inited
- * with several wait queues entries. Plural wakeup from different CPUs of a
- * single wait queue is serialized by wq.lock, but the case when multiple wait
- * queues are used should be detected accordingly. This is detected using
- * cmpxchg() operation.
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
@@ -1343,7 +1253,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
unsigned long flags;
int ewake = 0;
- read_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1372,12 +1282,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* chained in ep->ovflist and requeued later on.
*/
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
- if (chain_epi_lockless(epi))
+ if (epi->next == EP_UNACTIVE_PTR) {
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
ep_pm_stay_awake_rcu(epi);
+ }
} else if (!ep_is_linked(epi)) {
/* In the usual case, add event to ready list. */
- if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
- ep_pm_stay_awake_rcu(epi);
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake_rcu(epi);
}
/*
@@ -1410,7 +1323,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
pwake++;
out_unlock:
- read_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
@@ -1745,7 +1658,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* We have to drop the new item inside our item list to keep track of it */
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
@@ -1762,7 +1675,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
/* We have to call this outside the lock */
if (pwake)
@@ -1826,7 +1739,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1837,7 +1750,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
/* We have to call this outside the lock */
@@ -2089,7 +2002,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* Barrierless variant, waitqueue_active() is called under
* the same lock on wakeup ep_poll_callback() side, so it
@@ -2108,7 +2021,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (!eavail)
__add_wait_queue_exclusive(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
if (!eavail)
timed_out = !ep_schedule_timeout(to) ||
@@ -2124,7 +2037,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
eavail = 1;
if (!list_empty_careful(&wait.entry)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* If the thread timed out and is not on the wait queue,
* it means that the thread was woken up after its
@@ -2135,7 +2048,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (timed_out)
eavail = list_empty(&wait.entry);
__remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
}
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101616-causal-numerator-0c1e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0c43094f8cc9d3d99d835c0ac9c4fe1ccc62babd Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao(a)linutronix.de>
Date: Tue, 15 Jul 2025 14:46:34 +0200
Subject: [PATCH] eventpoll: Replace rwlock with spinlock
The ready event list of an epoll object is protected by read-write
semaphore:
- The consumer (waiter) acquires the write lock and takes items.
- the producer (waker) takes the read lock and adds items.
The point of this design is enabling epoll to scale well with large number
of producers, as multiple producers can hold the read lock at the same
time.
Unfortunately, this implementation may cause scheduling priority inversion
problem. Suppose the consumer has higher scheduling priority than the
producer. The consumer needs to acquire the write lock, but may be blocked
by the producer holding the read lock. Since read-write semaphore does not
support priority-boosting for the readers (even with CONFIG_PREEMPT_RT=y),
we have a case of priority inversion: a higher priority consumer is blocked
by a lower priority producer. This problem was reported in [1].
Furthermore, this could also cause stall problem, as described in [2].
Fix this problem by replacing rwlock with spinlock.
This reduces the event bandwidth, as the producers now have to contend with
each other for the spinlock. According to the benchmark from
https://github.com/rouming/test-tools/blob/master/stress-epoll.c:
On 12 x86 CPUs:
Before After Diff
threads events/ms events/ms
8 7162 4956 -31%
16 8733 5383 -38%
32 7968 5572 -30%
64 10652 5739 -46%
128 11236 5931 -47%
On 4 riscv CPUs:
Before After Diff
threads events/ms events/ms
8 2958 2833 -4%
16 3323 3097 -7%
32 3451 3240 -6%
64 3554 3178 -11%
128 3601 3235 -10%
Although the numbers look bad, it should be noted that this benchmark
creates multiple threads who do nothing except constantly generating new
epoll events, thus contention on the spinlock is high. For real workload,
the event rate is likely much lower, and the performance drop is not as
bad.
Using another benchmark (perf bench epoll wait) where spinlock contention
is lower, improvement is even observed on x86:
On 12 x86 CPUs:
Before: Averaged 110279 operations/sec (+- 1.09%), total secs = 8
After: Averaged 114577 operations/sec (+- 2.25%), total secs = 8
On 4 riscv CPUs:
Before: Averaged 175767 operations/sec (+- 0.62%), total secs = 8
After: Averaged 167396 operations/sec (+- 0.23%), total secs = 8
In conclusion, no one is likely to be upset over this change. After all,
spinlock was used originally for years, and the commit which converted to
rwlock didn't mention a real workload, just that the benchmark numbers are
nice.
This patch is not exactly the revert of commit a218cc491420 ("epoll: use
rwlock in order to reduce ep_poll_callback() contention"), because git
revert conflicts in some places which are not obvious on the resolution.
This patch is intended to be backported, therefore go with the obvious
approach:
- Replace rwlock_t with spinlock_t one to one
- Delete list_add_tail_lockless() and chain_epi_lockless(). These were
introduced to allow producers to concurrently add items to the list.
But now that spinlock no longer allows producers to touch the event
list concurrently, these two functions are not necessary anymore.
Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention")
Signed-off-by: Nam Cao <namcao(a)linutronix.de>
Link: https://lore.kernel.org/ec92458ea357ec503c737ead0f10b2c6e4c37d47.1752581388…
Tested-by: K Prateek Nayak <kprateek.nayak(a)amd.com>
Cc: stable(a)vger.kernel.org
Reported-by: Frederic Weisbecker <frederic(a)kernel.org>
Closes: https://lore.kernel.org/linux-rt-users/20210825132754.GA895675@lothringen/ [1]
Reported-by: Valentin Schneider <vschneid(a)redhat.com>
Closes: https://lore.kernel.org/linux-rt-users/xhsmhttqvnall.mognet@vschneid.remote… [2]
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b22d6f819f78..ee7c4b683ec3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -46,10 +46,10 @@
*
* 1) epnested_mutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (rwlock)
+ * 3) ep->lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a rwlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
@@ -195,7 +195,7 @@ struct eventpoll {
struct list_head rdllist;
/* Lock which protects rdllist and ovflist */
- rwlock_t lock;
+ spinlock_t lock;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
@@ -741,10 +741,10 @@ static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
* in a lockless way.
*/
lockdep_assert_irqs_enabled();
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
list_splice_init(&ep->rdllist, txlist);
WRITE_ONCE(ep->ovflist, NULL);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_done_scan(struct eventpoll *ep,
@@ -752,7 +752,7 @@ static void ep_done_scan(struct eventpoll *ep,
{
struct epitem *epi, *nepi;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
@@ -793,7 +793,7 @@ static void ep_done_scan(struct eventpoll *ep,
wake_up(&ep->wq);
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
static void ep_get(struct eventpoll *ep)
@@ -868,10 +868,10 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -1152,7 +1152,7 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
- rwlock_init(&ep->lock);
+ spin_lock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
@@ -1239,100 +1239,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
}
#endif /* CONFIG_KCMP */
-/*
- * Adds a new entry to the tail of the list in a lockless way, i.e.
- * multiple CPUs are allowed to call this function concurrently.
- *
- * Beware: it is necessary to prevent any other modifications of the
- * existing list until all changes are completed, in other words
- * concurrent list_add_tail_lockless() calls should be protected
- * with a read lock, where write lock acts as a barrier which
- * makes sure all list_add_tail_lockless() calls are fully
- * completed.
- *
- * Also an element can be locklessly added to the list only in one
- * direction i.e. either to the tail or to the head, otherwise
- * concurrent access will corrupt the list.
- *
- * Return: %false if element has been already added to the list, %true
- * otherwise.
- */
-static inline bool list_add_tail_lockless(struct list_head *new,
- struct list_head *head)
-{
- struct list_head *prev;
-
- /*
- * This is simple 'new->next = head' operation, but cmpxchg()
- * is used in order to detect that same element has been just
- * added to the list from another CPU: the winner observes
- * new->next == new.
- */
- if (!try_cmpxchg(&new->next, &new, head))
- return false;
-
- /*
- * Initially ->next of a new element must be updated with the head
- * (we are inserting to the tail) and only then pointers are atomically
- * exchanged. XCHG guarantees memory ordering, thus ->next should be
- * updated before pointers are actually swapped and pointers are
- * swapped before prev->next is updated.
- */
-
- prev = xchg(&head->prev, new);
-
- /*
- * It is safe to modify prev->next and new->prev, because a new element
- * is added only to the tail and new->next is updated before XCHG.
- */
-
- prev->next = new;
- new->prev = prev;
-
- return true;
-}
-
-/*
- * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
- * i.e. multiple CPUs are allowed to call this function concurrently.
- *
- * Return: %false if epi element has been already chained, %true otherwise.
- */
-static inline bool chain_epi_lockless(struct epitem *epi)
-{
- struct eventpoll *ep = epi->ep;
-
- /* Fast preliminary check */
- if (epi->next != EP_UNACTIVE_PTR)
- return false;
-
- /* Check that the same epi has not been just chained from another CPU */
- if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
- return false;
-
- /* Atomically exchange tail */
- epi->next = xchg(&ep->ovflist, epi);
-
- return true;
-}
-
/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
- *
- * This callback takes a read lock in order not to contend with concurrent
- * events from another file descriptor, thus all modifications to ->rdllist
- * or ->ovflist are lockless. Read lock is paired with the write lock from
- * ep_start/done_scan(), which stops all list modifications and guarantees
- * that lists state is seen correctly.
- *
- * Another thing worth to mention is that ep_poll_callback() can be called
- * concurrently for the same @epi from different CPUs if poll table was inited
- * with several wait queues entries. Plural wakeup from different CPUs of a
- * single wait queue is serialized by wq.lock, but the case when multiple wait
- * queues are used should be detected accordingly. This is detected using
- * cmpxchg() operation.
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
@@ -1343,7 +1253,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
unsigned long flags;
int ewake = 0;
- read_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1372,12 +1282,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* chained in ep->ovflist and requeued later on.
*/
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
- if (chain_epi_lockless(epi))
+ if (epi->next == EP_UNACTIVE_PTR) {
+ epi->next = READ_ONCE(ep->ovflist);
+ WRITE_ONCE(ep->ovflist, epi);
ep_pm_stay_awake_rcu(epi);
+ }
} else if (!ep_is_linked(epi)) {
/* In the usual case, add event to ready list. */
- if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
- ep_pm_stay_awake_rcu(epi);
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake_rcu(epi);
}
/*
@@ -1410,7 +1323,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
pwake++;
out_unlock:
- read_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
@@ -1745,7 +1658,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* We have to drop the new item inside our item list to keep track of it */
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
@@ -1762,7 +1675,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
/* We have to call this outside the lock */
if (pwake)
@@ -1826,7 +1739,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1837,7 +1750,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
/* We have to call this outside the lock */
@@ -2089,7 +2002,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* Barrierless variant, waitqueue_active() is called under
* the same lock on wakeup ep_poll_callback() side, so it
@@ -2108,7 +2021,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (!eavail)
__add_wait_queue_exclusive(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
if (!eavail)
timed_out = !ep_schedule_timeout(to) ||
@@ -2124,7 +2037,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
eavail = 1;
if (!list_empty_careful(&wait.entry)) {
- write_lock_irq(&ep->lock);
+ spin_lock_irq(&ep->lock);
/*
* If the thread timed out and is not on the wait queue,
* it means that the thread was woken up after its
@@ -2135,7 +2048,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (timed_out)
eavail = list_empty(&wait.entry);
__remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->lock);
}
}
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f965d111e68f4a993cc44d487d416e3d954eea11
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101656-peso-landfill-d499@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f965d111e68f4a993cc44d487d416e3d954eea11 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Fri, 26 Sep 2025 12:19:41 +0200
Subject: [PATCH] cpufreq: CPPC: Avoid using CPUFREQ_ETERNAL as transition
delay
If cppc_get_transition_latency() returns CPUFREQ_ETERNAL to indicate a
failure to retrieve the transition latency value from the platform
firmware, the CPPC cpufreq driver will use that value (converted to
microseconds) as the policy transition delay, but it is way too large
for any practical use.
Address this by making the driver use the cpufreq's default
transition latency value (in microseconds) as the transition delay
if CPUFREQ_ETERNAL is returned by cppc_get_transition_latency().
Fixes: d4f3388afd48 ("cpufreq / CPPC: Set platform specific transition_delay_us")
Cc: 5.19+ <stable(a)vger.kernel.org> # 5.19
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1(a)kernel.org>
Reviewed-by: Jie Zhan <zhanjie9(a)hisilicon.com>
Acked-by: Viresh Kumar <viresh.kumar(a)linaro.org>
Reviewed-by: Qais Yousef <qyousef(a)layalina.io>
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 12de0ac7bbaf..b71946937c52 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -308,6 +308,16 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy)
return 0;
}
+static unsigned int __cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
+{
+ unsigned int transition_latency_ns = cppc_get_transition_latency(cpu);
+
+ if (transition_latency_ns == CPUFREQ_ETERNAL)
+ return CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS / NSEC_PER_USEC;
+
+ return transition_latency_ns / NSEC_PER_USEC;
+}
+
/*
* The PCC subspace describes the rate at which platform can accept commands
* on the shared PCC channel (including READs which do not count towards freq
@@ -330,12 +340,12 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
return 10000;
}
}
- return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ return __cppc_cpufreq_get_transition_delay_us(cpu);
}
#else
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
{
- return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ return __cppc_cpufreq_get_transition_delay_us(cpu);
}
#endif
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f965d111e68f4a993cc44d487d416e3d954eea11
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101655-hardhead-variably-b7de@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f965d111e68f4a993cc44d487d416e3d954eea11 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Date: Fri, 26 Sep 2025 12:19:41 +0200
Subject: [PATCH] cpufreq: CPPC: Avoid using CPUFREQ_ETERNAL as transition
delay
If cppc_get_transition_latency() returns CPUFREQ_ETERNAL to indicate a
failure to retrieve the transition latency value from the platform
firmware, the CPPC cpufreq driver will use that value (converted to
microseconds) as the policy transition delay, but it is way too large
for any practical use.
Address this by making the driver use the cpufreq's default
transition latency value (in microseconds) as the transition delay
if CPUFREQ_ETERNAL is returned by cppc_get_transition_latency().
Fixes: d4f3388afd48 ("cpufreq / CPPC: Set platform specific transition_delay_us")
Cc: 5.19+ <stable(a)vger.kernel.org> # 5.19
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1(a)kernel.org>
Reviewed-by: Jie Zhan <zhanjie9(a)hisilicon.com>
Acked-by: Viresh Kumar <viresh.kumar(a)linaro.org>
Reviewed-by: Qais Yousef <qyousef(a)layalina.io>
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 12de0ac7bbaf..b71946937c52 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -308,6 +308,16 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy)
return 0;
}
+static unsigned int __cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
+{
+ unsigned int transition_latency_ns = cppc_get_transition_latency(cpu);
+
+ if (transition_latency_ns == CPUFREQ_ETERNAL)
+ return CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS / NSEC_PER_USEC;
+
+ return transition_latency_ns / NSEC_PER_USEC;
+}
+
/*
* The PCC subspace describes the rate at which platform can accept commands
* on the shared PCC channel (including READs which do not count towards freq
@@ -330,12 +340,12 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
return 10000;
}
}
- return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ return __cppc_cpufreq_get_transition_delay_us(cpu);
}
#else
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
{
- return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ return __cppc_cpufreq_get_transition_delay_us(cpu);
}
#endif
When adding dependencies with drm_sched_job_add_dependency(), that
function consumes the fence reference both on success and failure, so in
the latter case the dma_fence_put() on the error path (xarray failed to
expand) is a double free.
Interestingly this bug appears to have been present ever since
commit ebd5f74255b9 ("drm/sched: Add dependency tracking"), since the code
back then looked like this:
drm_sched_job_add_implicit_dependencies():
...
for (i = 0; i < fence_count; i++) {
ret = drm_sched_job_add_dependency(job, fences[i]);
if (ret)
break;
}
for (; i < fence_count; i++)
dma_fence_put(fences[i]);
Which means for the failing 'i' the dma_fence_put was already a double
free. Possibly there were no users at that time, or the test cases were
insufficient to hit it.
The bug was then only noticed and fixed after
commit 9c2ba265352a ("drm/scheduler: use new iterator in drm_sched_job_add_implicit_dependencies v2")
landed, with its fixup of
commit 4eaf02d6076c ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies").
At that point it was a slightly different flavour of a double free, which
commit 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder")
noticed and attempted to fix.
But it only moved the double free from happening inside the
drm_sched_job_add_dependency(), when releasing the reference not yet
obtained, to the caller, when releasing the reference already released by
the former in the failure case.
As such it is not easy to identify the right target for the fixes tag so
lets keep it simple and just continue the chain.
While fixing we also improve the comment and explain the reason for taking
the reference and not dropping it.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
Fixes: 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder")
Reported-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Closes: https://lore.kernel.org/dri-devel/aNFbXq8OeYl3QSdm@stanley.mountain/
Cc: Christian König <christian.koenig(a)amd.com>
Cc: Rob Clark <robdclark(a)chromium.org>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Danilo Krummrich <dakr(a)kernel.org>
Cc: Philipp Stanner <phasta(a)kernel.org>
Cc: "Christian König" <ckoenig.leichtzumerken(a)gmail.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v5.16+
---
v2:
* Re-arrange commit text so discussion around sentences starting with
capital letters in all cases can be avoided.
* Keep double return for now.
* Improved comment instead of dropping it.
v3:
* Commit SHA formatting in the commit message.
---
drivers/gpu/drm/scheduler/sched_main.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 46119aacb809..c39f0245e3a9 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -965,13 +965,14 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
dma_resv_assert_held(resv);
dma_resv_for_each_fence(&cursor, resv, usage, fence) {
- /* Make sure to grab an additional ref on the added fence */
- dma_fence_get(fence);
- ret = drm_sched_job_add_dependency(job, fence);
- if (ret) {
- dma_fence_put(fence);
+ /*
+ * As drm_sched_job_add_dependency always consumes the fence
+ * reference (even when it fails), and dma_resv_for_each_fence
+ * is not obtaining one, we need to grab one before calling.
+ */
+ ret = drm_sched_job_add_dependency(job, dma_fence_get(fence));
+ if (ret)
return ret;
- }
}
return 0;
}
--
2.48.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 195a1b7d8388c0ec2969a39324feb8bebf9bb907
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101634-riveter-crust-f6b3@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 195a1b7d8388c0ec2969a39324feb8bebf9bb907 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang(a)os.amperecomputing.com>
Date: Thu, 18 Sep 2025 09:23:49 -0700
Subject: [PATCH] arm64: kprobes: call set_memory_rox() for kprobe page
The kprobe page is allocated by execmem allocator with ROX permission.
It needs to call set_memory_rox() to set proper permission for the
direct map too. It was missed.
Fixes: 10d5e97c1bf8 ("arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com>
Reviewed-by: Catalin Marinas <catalin.marinas(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 0c5d408afd95..8ab6104a4883 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "kprobes: " fmt
+#include <linux/execmem.h>
#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -41,6 +42,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+void *alloc_insn_page(void)
+{
+ void *addr;
+
+ addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+ set_memory_rox((unsigned long)addr, 1);
+ return addr;
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
kprobe_opcode_t *addr = p->ainsn.xol_insn;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 195a1b7d8388c0ec2969a39324feb8bebf9bb907
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101608-fastball-thaw-dc90@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 195a1b7d8388c0ec2969a39324feb8bebf9bb907 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang(a)os.amperecomputing.com>
Date: Thu, 18 Sep 2025 09:23:49 -0700
Subject: [PATCH] arm64: kprobes: call set_memory_rox() for kprobe page
The kprobe page is allocated by execmem allocator with ROX permission.
It needs to call set_memory_rox() to set proper permission for the
direct map too. It was missed.
Fixes: 10d5e97c1bf8 ("arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com>
Reviewed-by: Catalin Marinas <catalin.marinas(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 0c5d408afd95..8ab6104a4883 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "kprobes: " fmt
+#include <linux/execmem.h>
#include <linux/extable.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -41,6 +42,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
+void *alloc_insn_page(void)
+{
+ void *addr;
+
+ addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
+ if (!addr)
+ return NULL;
+ set_memory_rox((unsigned long)addr, 1);
+ return addr;
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
kprobe_opcode_t *addr = p->ainsn.xol_insn;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f5225a34bd8f9f64eec37f6ae1461289aaa3eb86
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101623-error-attain-05f5@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f5225a34bd8f9f64eec37f6ae1461289aaa3eb86 Mon Sep 17 00:00:00 2001
From: Sumit Kumar <sumit.kumar(a)oss.qualcomm.com>
Date: Wed, 10 Sep 2025 18:11:09 +0530
Subject: [PATCH] bus: mhi: ep: Fix chained transfer handling in read path
The mhi_ep_read_channel function incorrectly assumes the End of Transfer
(EOT) bit is present for each packet in a chained transactions, causing
it to advance mhi_chan->rd_offset beyond wr_offset during host-to-device
transfers when EOT has not yet arrived. This leads to access of unmapped
host memory, causing IOMMU faults and processing of stale TREs.
Modify the loop condition to ensure mhi_queue is not empty, allowing the
function to process only valid TREs up to the current write pointer to
prevent premature reads and ensure safe traversal of chained TREs.
Due to this change, buf_left needs to be removed from the while loop
condition to avoid exiting prematurely before reading the ring completely,
and also remove write_offset since it will always be zero because the new
cache buffer is allocated every time.
Fixes: 5301258899773 ("bus: mhi: ep: Add support for reading from the host")
Co-developed-by: Akhil Vinod <akhil.vinod(a)oss.qualcomm.com>
Signed-off-by: Akhil Vinod <akhil.vinod(a)oss.qualcomm.com>
Signed-off-by: Sumit Kumar <sumit.kumar(a)oss.qualcomm.com>
[mani: reworded description slightly]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
Reviewed-by: Krishna Chaitanya Chundru <krishna.chundru(a)oss.qualcomm.com>
Cc: stable(a)vger.kernel.org
Link: https://patch.msgid.link/20250910-final_chained-v3-1-ec77c9d88ace@oss.qualc…
diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c
index b3eafcf2a2c5..cdea24e92919 100644
--- a/drivers/bus/mhi/ep/main.c
+++ b/drivers/bus/mhi/ep/main.c
@@ -403,17 +403,13 @@ static int mhi_ep_read_channel(struct mhi_ep_cntrl *mhi_cntrl,
{
struct mhi_ep_chan *mhi_chan = &mhi_cntrl->mhi_chan[ring->ch_id];
struct device *dev = &mhi_cntrl->mhi_dev->dev;
- size_t tr_len, read_offset, write_offset;
+ size_t tr_len, read_offset;
struct mhi_ep_buf_info buf_info = {};
u32 len = MHI_EP_DEFAULT_MTU;
struct mhi_ring_element *el;
- bool tr_done = false;
void *buf_addr;
- u32 buf_left;
int ret;
- buf_left = len;
-
do {
/* Don't process the transfer ring if the channel is not in RUNNING state */
if (mhi_chan->state != MHI_CH_STATE_RUNNING) {
@@ -426,24 +422,23 @@ static int mhi_ep_read_channel(struct mhi_ep_cntrl *mhi_cntrl,
/* Check if there is data pending to be read from previous read operation */
if (mhi_chan->tre_bytes_left) {
dev_dbg(dev, "TRE bytes remaining: %u\n", mhi_chan->tre_bytes_left);
- tr_len = min(buf_left, mhi_chan->tre_bytes_left);
+ tr_len = min(len, mhi_chan->tre_bytes_left);
} else {
mhi_chan->tre_loc = MHI_TRE_DATA_GET_PTR(el);
mhi_chan->tre_size = MHI_TRE_DATA_GET_LEN(el);
mhi_chan->tre_bytes_left = mhi_chan->tre_size;
- tr_len = min(buf_left, mhi_chan->tre_size);
+ tr_len = min(len, mhi_chan->tre_size);
}
read_offset = mhi_chan->tre_size - mhi_chan->tre_bytes_left;
- write_offset = len - buf_left;
buf_addr = kmem_cache_zalloc(mhi_cntrl->tre_buf_cache, GFP_KERNEL);
if (!buf_addr)
return -ENOMEM;
buf_info.host_addr = mhi_chan->tre_loc + read_offset;
- buf_info.dev_addr = buf_addr + write_offset;
+ buf_info.dev_addr = buf_addr;
buf_info.size = tr_len;
buf_info.cb = mhi_ep_read_completion;
buf_info.cb_buf = buf_addr;
@@ -459,16 +454,12 @@ static int mhi_ep_read_channel(struct mhi_ep_cntrl *mhi_cntrl,
goto err_free_buf_addr;
}
- buf_left -= tr_len;
mhi_chan->tre_bytes_left -= tr_len;
- if (!mhi_chan->tre_bytes_left) {
- if (MHI_TRE_DATA_GET_IEOT(el))
- tr_done = true;
-
+ if (!mhi_chan->tre_bytes_left)
mhi_chan->rd_offset = (mhi_chan->rd_offset + 1) % ring->ring_size;
- }
- } while (buf_left && !tr_done);
+ /* Read until the some buffer is left or the ring becomes not empty */
+ } while (!mhi_ep_queue_is_empty(mhi_chan->mhi_dev, DMA_TO_DEVICE));
return 0;
@@ -502,15 +493,11 @@ static int mhi_ep_process_ch_ring(struct mhi_ep_ring *ring)
mhi_chan->xfer_cb(mhi_chan->mhi_dev, &result);
} else {
/* UL channel */
- do {
- ret = mhi_ep_read_channel(mhi_cntrl, ring);
- if (ret < 0) {
- dev_err(&mhi_chan->mhi_dev->dev, "Failed to read channel\n");
- return ret;
- }
-
- /* Read until the ring becomes empty */
- } while (!mhi_ep_queue_is_empty(mhi_chan->mhi_dev, DMA_TO_DEVICE));
+ ret = mhi_ep_read_channel(mhi_cntrl, ring);
+ if (ret < 0) {
+ dev_err(&mhi_chan->mhi_dev->dev, "Failed to read channel\n");
+ return ret;
+ }
}
return 0;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6eb350a2233100a283f882c023e5ad426d0ed63b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101555-untitled-sighing-27f5@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6eb350a2233100a283f882c023e5ad426d0ed63b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Wed, 13 Aug 2025 17:02:30 +0200
Subject: [PATCH] rseq: Protect event mask against membarrier IPI
rseq_need_restart() reads and clears task::rseq_event_mask with preemption
disabled to guard against the scheduler.
But membarrier() uses an IPI and sets the PREEMPT bit in the event mask
from the IPI, which leaves that RMW operation unprotected.
Use guard(irq) if CONFIG_MEMBARRIER is enabled to fix that.
Fixes: 2a36ab717e8f ("rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ")
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Boqun Feng <boqun.feng(a)gmail.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: stable(a)vger.kernel.org
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index bc8af3eb5598..1fbeb61babeb 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,6 +7,12 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -41,9 +47,8 @@ static inline void rseq_handle_notify_resume(struct ksignal *ksig,
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- preempt_disable();
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD)
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
rseq_handle_notify_resume(ksig, regs);
}
diff --git a/kernel/rseq.c b/kernel/rseq.c
index b7a1ec327e81..2452b7366b00 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -342,12 +342,12 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
/*
* Load and clear event mask atomically with respect to
- * scheduler preemption.
+ * scheduler preemption and membarrier IPIs.
*/
- preempt_disable();
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- preempt_enable();
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event_mask = t->rseq_event_mask;
+ t->rseq_event_mask = 0;
+ }
return !!event_mask;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x e8c84e2082e69335f66c8ade4895e80ec270d7c4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101533-mutt-routing-a332@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e8c84e2082e69335f66c8ade4895e80ec270d7c4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner(a)kernel.org>
Date: Fri, 19 Sep 2025 17:03:51 +0200
Subject: [PATCH] statmount: don't call path_put() under namespace semaphore
Massage statmount() and make sure we don't call path_put() under the
namespace semaphore. If we put the last reference we're fscked.
Fixes: 46eae99ef733 ("add statmount(2) syscall")
Cc: stable(a)vger.kernel.org # v6.8+
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
diff --git a/fs/namespace.c b/fs/namespace.c
index ba3e5215b636..993983a76853 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5708,7 +5708,6 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
struct mnt_namespace *ns)
{
- struct path root __free(path_put) = {};
struct mount *m;
int err;
@@ -5720,7 +5719,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
if (!s->mnt)
return -ENOENT;
- err = grab_requested_root(ns, &root);
+ err = grab_requested_root(ns, &s->root);
if (err)
return err;
@@ -5729,7 +5728,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
* mounts to show users.
*/
m = real_mount(s->mnt);
- if (!is_path_reachable(m, m->mnt.mnt_root, &root) &&
+ if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
@@ -5737,8 +5736,6 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
if (err)
return err;
- s->root = root;
-
/*
* Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap
* can change concurrently as we only hold the read-side of the
@@ -5960,6 +5957,7 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
if (!ret)
ret = copy_statmount_to_user(ks);
kvfree(ks->seq.buf);
+ path_put(&ks->root);
if (retry_statmount(ret, &seq_size))
goto retry;
return ret;
Hello,
We are observing performance regressions (cpu usage, power
consumption, dropped frames in video playback test, etc.)
after updating to recent stable kernels. We tracked it down
to commit 3cd2aa93674e in linux-6.1.y and commit 3cd2aa93674
in linux-6.6.y ("cpuidle: menu: Avoid discarding useful information",
upstream commit 85975daeaa4).
Upstream fixup fa3fa55de0d ("cpuidle: governors: menu: Avoid using
invalid recent intervals data") doesn't address the problems we are
observing. Revert seems to be bringing performance metrics back to
pre-regression levels.
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d83f1d19c898ac1b54ae64d1c950f5beff801982
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101648-foyer-lapping-1bf6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d83f1d19c898ac1b54ae64d1c950f5beff801982 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut+renesas(a)mailbox.org>
Date: Wed, 13 Aug 2025 23:08:13 +0200
Subject: [PATCH] drm/rcar-du: dsi: Fix 1/2/3 lane support
Remove fixed PPI lane count setup. The R-Car DSI host is capable
of operating in 1..4 DSI lane mode. Remove the hard-coded 4-lane
configuration from PPI register settings and instead configure
the PPI lane count according to lane count information already
obtained by this driver instance.
Configure TXSETR register to match PPI lane count. The R-Car V4H
Reference Manual R19UH0186EJ0121 Rev.1.21 section 67.2.2.3 Tx Set
Register (TXSETR), field LANECNT description indicates that the
TXSETR register LANECNT bitfield lane count must be configured
such, that it matches lane count configuration in PPISETR register
DLEN bitfield. Make sure the LANECNT and DLEN bitfields are
configured to match.
Fixes: 155358310f01 ("drm: rcar-du: Add R-Car DSI driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Marek Vasut <marek.vasut+renesas(a)mailbox.org>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen+renesas(a)ideasonboard.com>
Link: https://lore.kernel.org/r/20250813210840.97621-1-marek.vasut+renesas@mailbo…
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
index 1af4c73f7a88..952c3efb74da 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
@@ -576,7 +576,10 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi,
udelay(10);
rcar_mipi_dsi_clr(dsi, CLOCKSET1, CLOCKSET1_UPDATEPLL);
- ppisetr = PPISETR_DLEN_3 | PPISETR_CLEN;
+ rcar_mipi_dsi_clr(dsi, TXSETR, TXSETR_LANECNT_MASK);
+ rcar_mipi_dsi_set(dsi, TXSETR, dsi->lanes - 1);
+
+ ppisetr = ((BIT(dsi->lanes) - 1) & PPISETR_DLEN_MASK) | PPISETR_CLEN;
rcar_mipi_dsi_write(dsi, PPISETR, ppisetr);
rcar_mipi_dsi_set(dsi, PHYSETUP, PHYSETUP_SHUTDOWNZ);
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
index a6b276f1d6ee..a54c7eb4113b 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
@@ -12,6 +12,9 @@
#define LINKSR_LPBUSY (1 << 1)
#define LINKSR_HSBUSY (1 << 0)
+#define TXSETR 0x100
+#define TXSETR_LANECNT_MASK (0x3 << 0)
+
/*
* Video Mode Register
*/
@@ -80,10 +83,7 @@
* PHY-Protocol Interface (PPI) Registers
*/
#define PPISETR 0x700
-#define PPISETR_DLEN_0 (0x1 << 0)
-#define PPISETR_DLEN_1 (0x3 << 0)
-#define PPISETR_DLEN_2 (0x7 << 0)
-#define PPISETR_DLEN_3 (0xf << 0)
+#define PPISETR_DLEN_MASK (0xf << 0)
#define PPISETR_CLEN (1 << 8)
#define PPICLCR 0x710
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f248d5d5159a88ded55329f0b1b463d0f4094228
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101633-evil-concierge-79d4@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f248d5d5159a88ded55329f0b1b463d0f4094228 Mon Sep 17 00:00:00 2001
From: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Date: Mon, 8 Sep 2025 13:56:57 +0530
Subject: [PATCH] drm/msm/a6xx: Fix PDC sleep sequence
Since the PDC resides out of the GPU subsystem and cannot be reset in
case it enters bad state, utmost care must be taken to trigger the PDC
wake/sleep routines in the correct order.
The PDC wake sequence can be exercised only after a PDC sleep sequence.
Additionally, GMU firmware should initialize a few registers before the
KMD can trigger a PDC sleep sequence. So PDC sleep can't be done if the
GMU firmware has not initialized. Track these dependencies using a new
status variable and trigger PDC sleep/wake sequences appropriately.
Cc: stable(a)vger.kernel.org
Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
Signed-off-by: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/673362/
Signed-off-by: Rob Clark <robin.clark(a)oss.qualcomm.com>
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e74651d5bd7a..5594499ad2d1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -279,6 +279,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu)
if (ret)
DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n");
+ set_bit(GMU_STATUS_FW_START, &gmu->status);
+
return ret;
}
@@ -525,6 +527,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status))
+ return 0;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val,
@@ -552,6 +557,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status))
+ return;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1);
ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0,
@@ -560,6 +568,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n");
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+ set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status);
}
static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value)
@@ -688,8 +698,6 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
/* ensure no writes happen before the uCode is fully written */
wmb();
- a6xx_rpmh_stop(gmu);
-
err:
if (!IS_ERR_OR_NULL(pdcptr))
iounmap(pdcptr);
@@ -849,19 +857,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
else
gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
- if (state == GMU_WARM_BOOT) {
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
- } else {
+ ret = a6xx_rpmh_start(gmu);
+ if (ret)
+ return ret;
+
+ if (state == GMU_COLD_BOOT) {
if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU],
"GMU firmware is not loaded\n"))
return -ENOENT;
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
-
ret = a6xx_gmu_fw_load(gmu);
if (ret)
return ret;
@@ -1046,6 +1050,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
/* Reset GPU core blocks */
a6xx_gpu_sw_reset(gpu, true);
+
+ a6xx_rpmh_stop(gmu);
}
static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index d1ce11131ba6..069a8c9474e8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -117,6 +117,12 @@ struct a6xx_gmu {
struct qmp *qmp;
struct a6xx_hfi_msg_bw_table *bw_table;
+
+/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */
+#define GMU_STATUS_FW_START 0
+/* To track if PDC sleep seq was done */
+#define GMU_STATUS_PDC_SLEEP 1
+ unsigned long status;
};
static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f248d5d5159a88ded55329f0b1b463d0f4094228
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101633-sympathy-sheep-6874@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f248d5d5159a88ded55329f0b1b463d0f4094228 Mon Sep 17 00:00:00 2001
From: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Date: Mon, 8 Sep 2025 13:56:57 +0530
Subject: [PATCH] drm/msm/a6xx: Fix PDC sleep sequence
Since the PDC resides out of the GPU subsystem and cannot be reset in
case it enters bad state, utmost care must be taken to trigger the PDC
wake/sleep routines in the correct order.
The PDC wake sequence can be exercised only after a PDC sleep sequence.
Additionally, GMU firmware should initialize a few registers before the
KMD can trigger a PDC sleep sequence. So PDC sleep can't be done if the
GMU firmware has not initialized. Track these dependencies using a new
status variable and trigger PDC sleep/wake sequences appropriately.
Cc: stable(a)vger.kernel.org
Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
Signed-off-by: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/673362/
Signed-off-by: Rob Clark <robin.clark(a)oss.qualcomm.com>
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e74651d5bd7a..5594499ad2d1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -279,6 +279,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu)
if (ret)
DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n");
+ set_bit(GMU_STATUS_FW_START, &gmu->status);
+
return ret;
}
@@ -525,6 +527,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status))
+ return 0;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val,
@@ -552,6 +557,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status))
+ return;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1);
ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0,
@@ -560,6 +568,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n");
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+ set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status);
}
static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value)
@@ -688,8 +698,6 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
/* ensure no writes happen before the uCode is fully written */
wmb();
- a6xx_rpmh_stop(gmu);
-
err:
if (!IS_ERR_OR_NULL(pdcptr))
iounmap(pdcptr);
@@ -849,19 +857,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
else
gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
- if (state == GMU_WARM_BOOT) {
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
- } else {
+ ret = a6xx_rpmh_start(gmu);
+ if (ret)
+ return ret;
+
+ if (state == GMU_COLD_BOOT) {
if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU],
"GMU firmware is not loaded\n"))
return -ENOENT;
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
-
ret = a6xx_gmu_fw_load(gmu);
if (ret)
return ret;
@@ -1046,6 +1050,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
/* Reset GPU core blocks */
a6xx_gpu_sw_reset(gpu, true);
+
+ a6xx_rpmh_stop(gmu);
}
static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index d1ce11131ba6..069a8c9474e8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -117,6 +117,12 @@ struct a6xx_gmu {
struct qmp *qmp;
struct a6xx_hfi_msg_bw_table *bw_table;
+
+/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */
+#define GMU_STATUS_FW_START 0
+/* To track if PDC sleep seq was done */
+#define GMU_STATUS_PDC_SLEEP 1
+ unsigned long status;
};
static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f248d5d5159a88ded55329f0b1b463d0f4094228
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101632-almost-grappling-1698@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f248d5d5159a88ded55329f0b1b463d0f4094228 Mon Sep 17 00:00:00 2001
From: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Date: Mon, 8 Sep 2025 13:56:57 +0530
Subject: [PATCH] drm/msm/a6xx: Fix PDC sleep sequence
Since the PDC resides out of the GPU subsystem and cannot be reset in
case it enters bad state, utmost care must be taken to trigger the PDC
wake/sleep routines in the correct order.
The PDC wake sequence can be exercised only after a PDC sleep sequence.
Additionally, GMU firmware should initialize a few registers before the
KMD can trigger a PDC sleep sequence. So PDC sleep can't be done if the
GMU firmware has not initialized. Track these dependencies using a new
status variable and trigger PDC sleep/wake sequences appropriately.
Cc: stable(a)vger.kernel.org
Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
Signed-off-by: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/673362/
Signed-off-by: Rob Clark <robin.clark(a)oss.qualcomm.com>
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e74651d5bd7a..5594499ad2d1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -279,6 +279,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu)
if (ret)
DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n");
+ set_bit(GMU_STATUS_FW_START, &gmu->status);
+
return ret;
}
@@ -525,6 +527,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status))
+ return 0;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val,
@@ -552,6 +557,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status))
+ return;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1);
ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0,
@@ -560,6 +568,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n");
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+ set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status);
}
static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value)
@@ -688,8 +698,6 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
/* ensure no writes happen before the uCode is fully written */
wmb();
- a6xx_rpmh_stop(gmu);
-
err:
if (!IS_ERR_OR_NULL(pdcptr))
iounmap(pdcptr);
@@ -849,19 +857,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
else
gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
- if (state == GMU_WARM_BOOT) {
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
- } else {
+ ret = a6xx_rpmh_start(gmu);
+ if (ret)
+ return ret;
+
+ if (state == GMU_COLD_BOOT) {
if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU],
"GMU firmware is not loaded\n"))
return -ENOENT;
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
-
ret = a6xx_gmu_fw_load(gmu);
if (ret)
return ret;
@@ -1046,6 +1050,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
/* Reset GPU core blocks */
a6xx_gpu_sw_reset(gpu, true);
+
+ a6xx_rpmh_stop(gmu);
}
static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index d1ce11131ba6..069a8c9474e8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -117,6 +117,12 @@ struct a6xx_gmu {
struct qmp *qmp;
struct a6xx_hfi_msg_bw_table *bw_table;
+
+/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */
+#define GMU_STATUS_FW_START 0
+/* To track if PDC sleep seq was done */
+#define GMU_STATUS_PDC_SLEEP 1
+ unsigned long status;
};
static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f248d5d5159a88ded55329f0b1b463d0f4094228
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101632-faucet-product-a032@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f248d5d5159a88ded55329f0b1b463d0f4094228 Mon Sep 17 00:00:00 2001
From: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Date: Mon, 8 Sep 2025 13:56:57 +0530
Subject: [PATCH] drm/msm/a6xx: Fix PDC sleep sequence
Since the PDC resides out of the GPU subsystem and cannot be reset in
case it enters bad state, utmost care must be taken to trigger the PDC
wake/sleep routines in the correct order.
The PDC wake sequence can be exercised only after a PDC sleep sequence.
Additionally, GMU firmware should initialize a few registers before the
KMD can trigger a PDC sleep sequence. So PDC sleep can't be done if the
GMU firmware has not initialized. Track these dependencies using a new
status variable and trigger PDC sleep/wake sequences appropriately.
Cc: stable(a)vger.kernel.org
Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
Signed-off-by: Akhil P Oommen <akhilpo(a)oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/673362/
Signed-off-by: Rob Clark <robin.clark(a)oss.qualcomm.com>
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e74651d5bd7a..5594499ad2d1 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -279,6 +279,8 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu)
if (ret)
DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n");
+ set_bit(GMU_STATUS_FW_START, &gmu->status);
+
return ret;
}
@@ -525,6 +527,9 @@ static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status))
+ return 0;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val,
@@ -552,6 +557,9 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
int ret;
u32 val;
+ if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status))
+ return;
+
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1);
ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0,
@@ -560,6 +568,8 @@ static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n");
gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+ set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status);
}
static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value)
@@ -688,8 +698,6 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
/* ensure no writes happen before the uCode is fully written */
wmb();
- a6xx_rpmh_stop(gmu);
-
err:
if (!IS_ERR_OR_NULL(pdcptr))
iounmap(pdcptr);
@@ -849,19 +857,15 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
else
gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
- if (state == GMU_WARM_BOOT) {
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
- } else {
+ ret = a6xx_rpmh_start(gmu);
+ if (ret)
+ return ret;
+
+ if (state == GMU_COLD_BOOT) {
if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU],
"GMU firmware is not loaded\n"))
return -ENOENT;
- ret = a6xx_rpmh_start(gmu);
- if (ret)
- return ret;
-
ret = a6xx_gmu_fw_load(gmu);
if (ret)
return ret;
@@ -1046,6 +1050,8 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
/* Reset GPU core blocks */
a6xx_gpu_sw_reset(gpu, true);
+
+ a6xx_rpmh_stop(gmu);
}
static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
index d1ce11131ba6..069a8c9474e8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -117,6 +117,12 @@ struct a6xx_gmu {
struct qmp *qmp;
struct a6xx_hfi_msg_bw_table *bw_table;
+
+/* To check if we can trigger sleep seq at PDC. Cleared in a6xx_rpmh_stop() */
+#define GMU_STATUS_FW_START 0
+/* To track if PDC sleep seq was done */
+#define GMU_STATUS_PDC_SLEEP 1
+ unsigned long status;
};
static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x 6fc957185e1691bb6dfa4193698a229db537c2a2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101656-edgy-outbreak-15f9@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6fc957185e1691bb6dfa4193698a229db537c2a2 Mon Sep 17 00:00:00 2001
From: Julia Filipchuk <julia.filipchuk(a)intel.com>
Date: Wed, 3 Sep 2025 12:00:38 -0700
Subject: [PATCH] drm/xe: Extend Wa_13011645652 to PTL-H, WCL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Expand workaround to additional graphics architectures.
Cc: Vinay Belgaumkar <vinay.belgaumkar(a)intel.com>
Cc: Stuart Summers <stuart.summers(a)intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio(a)intel.com>
Cc: Lucas De Marchi <lucas.demarchi(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: intel-xe(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v6.17+
Signed-off-by: Julia Filipchuk <julia.filipchuk(a)intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
Link: https://lore.kernel.org/r/20250903190122.1028373-2-julia.filipchuk@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 0cf2b3c03532..338c344dcd7d 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -32,7 +32,8 @@
16022287689 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
13011645652 GRAPHICS_VERSION(2004)
- GRAPHICS_VERSION(3001)
+ GRAPHICS_VERSION_RANGE(3000, 3001)
+ GRAPHICS_VERSION(3003)
14022293748 GRAPHICS_VERSION_RANGE(2001, 2002)
GRAPHICS_VERSION(2004)
GRAPHICS_VERSION_RANGE(3000, 3001)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 201c53c687f2b55a7cc6d9f4000af4797860174b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101645-fever-premiere-e9fb@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai(a)google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: [PATCH] usb: gadget: Introduce free_usb_request helper
Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.
This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H
+#include <linux/cleanup.h>
#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
/*-------------------------------------------------------------------------*/
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+ if (!req)
+ return;
+
+ kfree(req->buf);
+ usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
struct usb_dcd_config_params {
__u8 bU1devExitLat; /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 201c53c687f2b55a7cc6d9f4000af4797860174b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101644-evoke-acutely-da83@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai(a)google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: [PATCH] usb: gadget: Introduce free_usb_request helper
Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.
This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H
+#include <linux/cleanup.h>
#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
/*-------------------------------------------------------------------------*/
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+ if (!req)
+ return;
+
+ kfree(req->buf);
+ usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
struct usb_dcd_config_params {
__u8 bU1devExitLat; /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 201c53c687f2b55a7cc6d9f4000af4797860174b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101644-scoured-unarmored-e328@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai(a)google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: [PATCH] usb: gadget: Introduce free_usb_request helper
Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.
This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H
+#include <linux/cleanup.h>
#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
/*-------------------------------------------------------------------------*/
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+ if (!req)
+ return;
+
+ kfree(req->buf);
+ usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
struct usb_dcd_config_params {
__u8 bU1devExitLat; /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 201c53c687f2b55a7cc6d9f4000af4797860174b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101644-chrome-refurbish-cad6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai(a)google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: [PATCH] usb: gadget: Introduce free_usb_request helper
Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.
This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H
+#include <linux/cleanup.h>
#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
/*-------------------------------------------------------------------------*/
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+ if (!req)
+ return;
+
+ kfree(req->buf);
+ usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
struct usb_dcd_config_params {
__u8 bU1devExitLat; /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 201c53c687f2b55a7cc6d9f4000af4797860174b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101643-coveting-prewashed-cd25@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai(a)google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: [PATCH] usb: gadget: Introduce free_usb_request helper
Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.
This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H
+#include <linux/cleanup.h>
#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
/*-------------------------------------------------------------------------*/
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+ if (!req)
+ return;
+
+ kfree(req->buf);
+ usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
struct usb_dcd_config_params {
__u8 bU1devExitLat; /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 201c53c687f2b55a7cc6d9f4000af4797860174b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101643-efficient-commotion-51f2@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai(a)google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: [PATCH] usb: gadget: Introduce free_usb_request helper
Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.
This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H
+#include <linux/cleanup.h>
#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
/*-------------------------------------------------------------------------*/
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+ if (!req)
+ return;
+
+ kfree(req->buf);
+ usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
struct usb_dcd_config_params {
__u8 bU1devExitLat; /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */
The patch below does not apply to the 6.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.17.y
git checkout FETCH_HEAD
git cherry-pick -x 201c53c687f2b55a7cc6d9f4000af4797860174b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101642-jumble-hatchback-ff0f@gregkh' --subject-prefix 'PATCH 6.17.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 201c53c687f2b55a7cc6d9f4000af4797860174b Mon Sep 17 00:00:00 2001
From: Kuen-Han Tsai <khtsai(a)google.com>
Date: Tue, 16 Sep 2025 16:21:33 +0800
Subject: [PATCH] usb: gadget: Introduce free_usb_request helper
Introduce the free_usb_request() function that frees both the request's
buffer and the request itself.
This function serves as the cleanup callback for DEFINE_FREE() to enable
automatic, scope-based cleanup for usb_request pointers.
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Link: https://lore.kernel.org/r/20250916-ready-v1-2-4997bf277548@google.com
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0f2079476088..3aaf19e77558 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -15,6 +15,7 @@
#ifndef __LINUX_USB_GADGET_H
#define __LINUX_USB_GADGET_H
+#include <linux/cleanup.h>
#include <linux/configfs.h>
#include <linux/device.h>
#include <linux/errno.h>
@@ -293,6 +294,28 @@ static inline void usb_ep_fifo_flush(struct usb_ep *ep)
/*-------------------------------------------------------------------------*/
+/**
+ * free_usb_request - frees a usb_request object and its buffer
+ * @req: the request being freed
+ *
+ * This helper function frees both the request's buffer and the request object
+ * itself by calling usb_ep_free_request(). Its signature is designed to be used
+ * with DEFINE_FREE() to enable automatic, scope-based cleanup for usb_request
+ * pointers.
+ */
+static inline void free_usb_request(struct usb_request *req)
+{
+ if (!req)
+ return;
+
+ kfree(req->buf);
+ usb_ep_free_request(req->ep, req);
+}
+
+DEFINE_FREE(free_usb_request, struct usb_request *, free_usb_request(_T))
+
+/*-------------------------------------------------------------------------*/
+
struct usb_dcd_config_params {
__u8 bU1devExitLat; /* U1 Device exit Latency */
#define USB_DEFAULT_U1_DEV_EXIT_LAT 0x01 /* Less then 1 microsec */
From: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
[ Upstream commit fa3fa55de0d6177fdcaf6fc254f13cc8f33c3eed ]
Marc has reported that commit 85975daeaa4d ("cpuidle: menu: Avoid
discarding useful information") caused the number of wakeup interrupts
to increase on an idle system [1], which was not expected to happen
after merely allowing shallower idle states to be selected by the
governor in some cases.
However, on the system in question, all of the idle states deeper than
WFI are rejected by the driver due to a firmware issue [2]. This causes
the governor to only consider the recent interval duriation data
corresponding to attempts to enter WFI that are successful and the
recent invervals table is filled with values lower than the scheduler
tick period. Consequently, the governor predicts an idle duration
below the scheduler tick period length and avoids stopping the tick
more often which leads to the observed symptom.
Address it by modifying the governor to update the recent intervals
table also when entering the previously selected idle state fails, so
it knows that the short idle intervals might have been the minority
had the selected idle states been actually entered every time.
Fixes: 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information")
Link: https://lore.kernel.org/linux-pm/86o6sv6n94.wl-maz@kernel.org/ [1]
Link: https://lore.kernel.org/linux-pm/7ffcb716-9a1b-48c2-aaa4-469d0df7c792@arm.c… [2]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Tested-by: Christian Loehle <christian.loehle(a)arm.com>
Tested-by: Marc Zyngier <maz(a)kernel.org>
Reviewed-by: Christian Loehle <christian.loehle(a)arm.com>
Link: https://patch.msgid.link/2793874.mvXUDI8C0e@rafael.j.wysocki
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
(cherry picked from commit 7337a6356dffc93194af24ee31023b3578661a5b)
---
drivers/cpuidle/governors/menu.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 4edac724983a..0b3c917d505d 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -158,6 +158,14 @@ static inline int performance_multiplier(unsigned int nr_iowaiters)
static DEFINE_PER_CPU(struct menu_device, menu_devices);
+static void menu_update_intervals(struct menu_device *data, unsigned int interval_us)
+{
+ /* Update the repeating-pattern data. */
+ data->intervals[data->interval_ptr++] = interval_us;
+ if (data->interval_ptr >= INTERVALS)
+ data->interval_ptr = 0;
+}
+
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
/*
@@ -288,6 +296,14 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (data->needs_update) {
menu_update(drv, dev);
data->needs_update = 0;
+ } else if (!dev->last_residency_ns) {
+ /*
+ * This happens when the driver rejects the previously selected
+ * idle state and returns an error, so update the recent
+ * intervals table to prevent invalid information from being
+ * used going forward.
+ */
+ menu_update_intervals(data, UINT_MAX);
}
/* determine the expected residency time, round up */
@@ -542,10 +558,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->correction_factor[data->bucket] = new_factor;
- /* update the repeating-pattern data */
- data->intervals[data->interval_ptr++] = ktime_to_us(measured_ns);
- if (data->interval_ptr >= INTERVALS)
- data->interval_ptr = 0;
+ menu_update_intervals(data, ktime_to_us(measured_ns));
}
/**
--
2.51.0.760.g7b8bcc2412-goog
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 07ce121d93a5e5fb2440a24da3dbf408fcee978e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101625-rebalance-humbling-87a2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 07ce121d93a5e5fb2440a24da3dbf408fcee978e Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jason.andryuk(a)amd.com>
Date: Wed, 27 Aug 2025 20:36:02 -0400
Subject: [PATCH] xen/events: Return -EEXIST for bound VIRQs
Change find_virq() to return -EEXIST when a VIRQ is bound to a
different CPU than the one passed in. With that, remove the BUG_ON()
from bind_virq_to_irq() to propogate the error upwards.
Some VIRQs are per-cpu, but others are per-domain or global. Those must
be bound to CPU0 and can then migrate elsewhere. The lookup for
per-domain and global will probably fail when migrated off CPU 0,
especially when the current CPU is tracked. This now returns -EEXIST
instead of BUG_ON().
A second call to bind a per-domain or global VIRQ is not expected, but
make it non-fatal to avoid trying to look up the irq, since we don't
know which per_cpu(virq_to_irq) it will be in.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jason Andryuk <jason.andryuk(a)amd.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Signed-off-by: Juergen Gross <jgross(a)suse.com>
Message-ID: <20250828003604.8949-3-jason.andryuk(a)amd.com>
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 374231d84e4f..b060b5a95f45 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
-static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
+static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn,
+ bool percpu)
{
struct evtchn_status status;
evtchn_port_t port;
+ bool exists = false;
memset(&status, 0, sizeof(status));
for (port = 0; port < xen_evtchn_max_channels(); port++) {
@@ -1330,12 +1332,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
continue;
if (status.status != EVTCHNSTAT_virq)
continue;
- if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
+ if (status.u.virq != virq)
+ continue;
+ if (status.vcpu == xen_vcpu_nr(cpu)) {
*evtchn = port;
return 0;
+ } else if (!percpu) {
+ exists = true;
}
}
- return -ENOENT;
+ return exists ? -EEXIST : -ENOENT;
}
/**
@@ -1382,8 +1388,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
evtchn = bind_virq.port;
else {
if (ret == -EEXIST)
- ret = find_virq(virq, cpu, &evtchn);
- BUG_ON(ret < 0);
+ ret = find_virq(virq, cpu, &evtchn, percpu);
+ if (ret) {
+ __unbind_from_irq(info, info->irq);
+ goto out;
+ }
}
ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 07ce121d93a5e5fb2440a24da3dbf408fcee978e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101625-gully-disaster-8717@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 07ce121d93a5e5fb2440a24da3dbf408fcee978e Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jason.andryuk(a)amd.com>
Date: Wed, 27 Aug 2025 20:36:02 -0400
Subject: [PATCH] xen/events: Return -EEXIST for bound VIRQs
Change find_virq() to return -EEXIST when a VIRQ is bound to a
different CPU than the one passed in. With that, remove the BUG_ON()
from bind_virq_to_irq() to propogate the error upwards.
Some VIRQs are per-cpu, but others are per-domain or global. Those must
be bound to CPU0 and can then migrate elsewhere. The lookup for
per-domain and global will probably fail when migrated off CPU 0,
especially when the current CPU is tracked. This now returns -EEXIST
instead of BUG_ON().
A second call to bind a per-domain or global VIRQ is not expected, but
make it non-fatal to avoid trying to look up the irq, since we don't
know which per_cpu(virq_to_irq) it will be in.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jason Andryuk <jason.andryuk(a)amd.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Signed-off-by: Juergen Gross <jgross(a)suse.com>
Message-ID: <20250828003604.8949-3-jason.andryuk(a)amd.com>
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 374231d84e4f..b060b5a95f45 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
-static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
+static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn,
+ bool percpu)
{
struct evtchn_status status;
evtchn_port_t port;
+ bool exists = false;
memset(&status, 0, sizeof(status));
for (port = 0; port < xen_evtchn_max_channels(); port++) {
@@ -1330,12 +1332,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
continue;
if (status.status != EVTCHNSTAT_virq)
continue;
- if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
+ if (status.u.virq != virq)
+ continue;
+ if (status.vcpu == xen_vcpu_nr(cpu)) {
*evtchn = port;
return 0;
+ } else if (!percpu) {
+ exists = true;
}
}
- return -ENOENT;
+ return exists ? -EEXIST : -ENOENT;
}
/**
@@ -1382,8 +1388,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
evtchn = bind_virq.port;
else {
if (ret == -EEXIST)
- ret = find_virq(virq, cpu, &evtchn);
- BUG_ON(ret < 0);
+ ret = find_virq(virq, cpu, &evtchn, percpu);
+ if (ret) {
+ __unbind_from_irq(info, info->irq);
+ goto out;
+ }
}
ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 07ce121d93a5e5fb2440a24da3dbf408fcee978e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025101624-yield-grime-5287@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 07ce121d93a5e5fb2440a24da3dbf408fcee978e Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jason.andryuk(a)amd.com>
Date: Wed, 27 Aug 2025 20:36:02 -0400
Subject: [PATCH] xen/events: Return -EEXIST for bound VIRQs
Change find_virq() to return -EEXIST when a VIRQ is bound to a
different CPU than the one passed in. With that, remove the BUG_ON()
from bind_virq_to_irq() to propogate the error upwards.
Some VIRQs are per-cpu, but others are per-domain or global. Those must
be bound to CPU0 and can then migrate elsewhere. The lookup for
per-domain and global will probably fail when migrated off CPU 0,
especially when the current CPU is tracked. This now returns -EEXIST
instead of BUG_ON().
A second call to bind a per-domain or global VIRQ is not expected, but
make it non-fatal to avoid trying to look up the irq, since we don't
know which per_cpu(virq_to_irq) it will be in.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jason Andryuk <jason.andryuk(a)amd.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Signed-off-by: Juergen Gross <jgross(a)suse.com>
Message-ID: <20250828003604.8949-3-jason.andryuk(a)amd.com>
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 374231d84e4f..b060b5a95f45 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1314,10 +1314,12 @@ int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
-static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
+static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn,
+ bool percpu)
{
struct evtchn_status status;
evtchn_port_t port;
+ bool exists = false;
memset(&status, 0, sizeof(status));
for (port = 0; port < xen_evtchn_max_channels(); port++) {
@@ -1330,12 +1332,16 @@ static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
continue;
if (status.status != EVTCHNSTAT_virq)
continue;
- if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
+ if (status.u.virq != virq)
+ continue;
+ if (status.vcpu == xen_vcpu_nr(cpu)) {
*evtchn = port;
return 0;
+ } else if (!percpu) {
+ exists = true;
}
}
- return -ENOENT;
+ return exists ? -EEXIST : -ENOENT;
}
/**
@@ -1382,8 +1388,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
evtchn = bind_virq.port;
else {
if (ret == -EEXIST)
- ret = find_virq(virq, cpu, &evtchn);
- BUG_ON(ret < 0);
+ ret = find_virq(virq, cpu, &evtchn, percpu);
+ if (ret) {
+ __unbind_from_irq(info, info->irq);
+ goto out;
+ }
}
ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);