The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6d849ff573722afcf5508d2800017bdd40f27eb9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025120156-spirits-payroll-7475@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6d849ff573722afcf5508d2800017bdd40f27eb9 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz(a)bp.renesas.com>
Date: Tue, 18 Nov 2025 12:39:25 +0000
Subject: [PATCH] can: rcar_canfd: Fix CAN-FD mode as default
The commit 5cff263606a1 ("can: rcar_canfd: Fix controller mode setting")
has aligned with the flow mentioned in the hardware manual for all SoCs
except R-Car Gen3 and RZ/G2L SoCs. On R-Car Gen4 and RZ/G3E SoCs, due to
the wrong logic in the commit[1] sets the default mode to FD-Only mode
instead of CAN-FD mode.
This patch sets the CAN-FD mode as the default for all SoCs by dropping
the rcar_canfd_set_mode() as some SoC requires mode setting in global
reset mode, and the rest of the SoCs in channel reset mode and update the
rcar_canfd_reset_controller() to take care of these constraints. Moreover,
the RZ/G3E and R-Car Gen4 SoCs support 3 modes compared to 2 modes on the
R-Car Gen3. Use inverted logic in rcar_canfd_reset_controller() to
simplify the code later to support FD-only mode.
[1]
commit 45721c406dcf ("can: rcar_canfd: Add support for r8a779a0 SoC")
Fixes: 5cff263606a1 ("can: rcar_canfd: Fix controller mode setting")
Cc: stable(a)vger.kernel.org
Signed-off-by: Biju Das <biju.das.jz(a)bp.renesas.com>
Link: https://patch.msgid.link/20251118123926.193445-1-biju.das.jz@bp.renesas.com
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index 45d36adb51b7..4c0d7d26df9f 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -709,6 +709,11 @@ static void rcar_canfd_set_bit_reg(void __iomem *addr, u32 val)
rcar_canfd_update(val, val, addr);
}
+static void rcar_canfd_clear_bit_reg(void __iomem *addr, u32 val)
+{
+ rcar_canfd_update(val, 0, addr);
+}
+
static void rcar_canfd_update_bit_reg(void __iomem *addr, u32 mask, u32 val)
{
rcar_canfd_update(mask, val, addr);
@@ -755,25 +760,6 @@ static void rcar_canfd_set_rnc(struct rcar_canfd_global *gpriv, unsigned int ch,
rcar_canfd_set_bit(gpriv->base, RCANFD_GAFLCFG(w), rnc);
}
-static void rcar_canfd_set_mode(struct rcar_canfd_global *gpriv)
-{
- if (gpriv->info->ch_interface_mode) {
- u32 ch, val = gpriv->fdmode ? RCANFD_GEN4_FDCFG_FDOE
- : RCANFD_GEN4_FDCFG_CLOE;
-
- for_each_set_bit(ch, &gpriv->channels_mask,
- gpriv->info->max_channels)
- rcar_canfd_set_bit_reg(&gpriv->fcbase[ch].cfdcfg, val);
- } else {
- if (gpriv->fdmode)
- rcar_canfd_set_bit(gpriv->base, RCANFD_GRMCFG,
- RCANFD_GRMCFG_RCMC);
- else
- rcar_canfd_clear_bit(gpriv->base, RCANFD_GRMCFG,
- RCANFD_GRMCFG_RCMC);
- }
-}
-
static int rcar_canfd_reset_controller(struct rcar_canfd_global *gpriv)
{
struct device *dev = &gpriv->pdev->dev;
@@ -806,6 +792,16 @@ static int rcar_canfd_reset_controller(struct rcar_canfd_global *gpriv)
/* Reset Global error flags */
rcar_canfd_write(gpriv->base, RCANFD_GERFL, 0x0);
+ /* Set the controller into appropriate mode */
+ if (!gpriv->info->ch_interface_mode) {
+ if (gpriv->fdmode)
+ rcar_canfd_set_bit(gpriv->base, RCANFD_GRMCFG,
+ RCANFD_GRMCFG_RCMC);
+ else
+ rcar_canfd_clear_bit(gpriv->base, RCANFD_GRMCFG,
+ RCANFD_GRMCFG_RCMC);
+ }
+
/* Transition all Channels to reset mode */
for_each_set_bit(ch, &gpriv->channels_mask, gpriv->info->max_channels) {
rcar_canfd_clear_bit(gpriv->base,
@@ -823,10 +819,23 @@ static int rcar_canfd_reset_controller(struct rcar_canfd_global *gpriv)
dev_dbg(dev, "channel %u reset failed\n", ch);
return err;
}
- }
- /* Set the controller into appropriate mode */
- rcar_canfd_set_mode(gpriv);
+ /* Set the controller into appropriate mode */
+ if (gpriv->info->ch_interface_mode) {
+ /* Do not set CLOE and FDOE simultaneously */
+ if (!gpriv->fdmode) {
+ rcar_canfd_clear_bit_reg(&gpriv->fcbase[ch].cfdcfg,
+ RCANFD_GEN4_FDCFG_FDOE);
+ rcar_canfd_set_bit_reg(&gpriv->fcbase[ch].cfdcfg,
+ RCANFD_GEN4_FDCFG_CLOE);
+ } else {
+ rcar_canfd_clear_bit_reg(&gpriv->fcbase[ch].cfdcfg,
+ RCANFD_GEN4_FDCFG_FDOE);
+ rcar_canfd_clear_bit_reg(&gpriv->fcbase[ch].cfdcfg,
+ RCANFD_GEN4_FDCFG_CLOE);
+ }
+ }
+ }
return 0;
}
Hi,
I see that a backport of commit [1] has been merged into the 6.12 LTS
tree as part of v6.12.59.
I suggest commit [2] to be backported to linux-6.12.y too since it's a
follow up fix for [1].
[1] 43b27d1bd88a ("net/mlx5e: Fix wraparound in rate limiting for values above 255 Gbps")
[2] d2099d9f16db ("net/mlx5e: Fix validation logic in rate limiting")
I tested the change and everything looks good.
It's a clean cherry-pick.
Thanks
Simon
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Christof Hellmis
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
From: Jan H. Schönherr <jschoenh(a)amazon.de>
It is possible to degrade host performance by manipulating performance
counters from a VM and tricking the host hypervisor to enable branch
tracing. When the guest programs a CPU to track branch instructions and
deliver an interrupt after exactly one branch instruction, the value one
is handled by the host KVM/perf subsystems and treated incorrectly as a
special value to enable the branch trace store (BTS) subsystem. It
should not be possible to enable BTS from a guest. When BTS is enabled,
it leads to general host performance degradation to both VMs and host.
Perf considers the combination of PERF_COUNT_HW_BRANCH_INSTRUCTIONS with
a sample_period of 1 a special case and handles this as a BTS event (see
intel_pmu_has_bts_period()) -- a deviation from the usual semantic,
where the sample_period represents the amount of branch instructions to
encounter before the overflow handler is invoked.
Nothing prevents a guest from programming its vPMU with the above
settings (count branch, interrupt after one branch), which causes KVM to
erroneously instruct perf to create a BTS event within
pmc_reprogram_counter(), which does not have the desired semantics.
The guest could also do more benign actions and request an interrupt
after a more reasonable number of branch instructions via its vPMU. In
that case counting works initially. However, KVM occasionally pauses and
resumes the created performance counters. If the remaining amount of
branch instructions until interrupt has reached 1 exactly,
pmc_resume_counter() fails to resume the counter and a BTS event is
created instead with its incorrect semantics.
Fix this behavior by not passing the special value "1" as sample_period
to perf. Instead, perform the same quirk that happens later in
x86_perf_event_set_period() anyway, when the performance counter is
transferred to the actual PMU: bump the sample_period to 2.
Testing:
From guest:
`./wrmsr -p 12 0x186 0x1100c4`
`./wrmsr -p 12 0xc1 0xffffffffffff`
`./wrmsr -p 12 0x186 0x5100c4`
This sequence sets up branch instruction counting, initializes the counter
to overflow after one event (0xffffffffffff), and then enables edge
detection (bit 18) for branch events.
./wrmsr -p 12 0x186 0x1100c4
Writes to IA32_PERFEVTSEL0 (0x186)
Value 0x1100c4 breaks down as:
Event = 0xC4 (Branch instructions)
Bits 16-17: 0x1 (User mode only)
Bit 22: 1 (Enable counter)
./wrmsr -p 12 0xc1 0xffffffffffff
Writes to IA32_PMC0 (0xC1)
Sets counter to maximum value (0xffffffffffff)
This effectively sets up the counter to overflow on the next branch
./wrmsr -p 12 0x186 0x5100c4
Updates IA32_PERFEVTSEL0 again
Similar to first command but adds bit 18 (0x4 to 0x5)
Enables edge detection (bit 18)
These MSR writes are trapped by the hypervisor in KVM and forwarded to
the perf subsystem to create corresponding monitoring events.
It is possible to repro this problem in a more realistic guest scenario:
`perf record -e branches:u -c 2 -a &`
`perf record -e branches:u -c 2 -a &`
This presumably triggers the issue by KVM pausing and resuming the
performance counter at the wrong moment, when its value is about to
overflow.
Signed-off-by: Jan H. Schönherr <jschoenh(a)amazon.de>
Signed-off-by: Fernand Sieber <sieberf(a)amazon.com>
Reviewed-by: David Woodhouse <dwmw(a)amazon.co.uk>
Reviewed-by: Hendrik Borghorst <hborghor(a)amazon.de>
Link: https://lore.kernel.org/r/20251124100220.238177-1-sieberf@amazon.com
---
arch/x86/kvm/pmu.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 487ad19a236e..547512028e24 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -225,6 +225,19 @@ static u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
{
u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
+ /*
+ * A sample_period of 1 might get mistaken by perf for a BTS event, see
+ * intel_pmu_has_bts_period(). This would prevent re-arming the counter
+ * via pmc_resume_counter(), followed by the accidental creation of an
+ * actual BTS event, which we do not want.
+ *
+ * Avoid this by bumping the sampling period. Note, that we do not lose
+ * any precision, because the same quirk happens later anyway (for
+ * different reasons) in x86_perf_event_set_period().
+ */
+ if (sample_period == 1)
+ sample_period = 2;
+
if (!sample_period)
sample_period = pmc_bitmask(pmc) + 1;
return sample_period;
--
2.43.0
Amazon Development Centre (South Africa) (Proprietary) Limited
29 Gogosoa Street, Observatory, Cape Town, Western Cape, 7925, South Africa
Registration Number: 2004 / 034463 / 07
From: Sarika Sharma <quic_sarishar(a)quicinc.com>
[ Upstream commit 4541b0c8c3c1b85564971d497224e57cf8076a02 ]
Currently, RX is_mcbc bit is set for packets sent from client as
destination address (DA) is multicast/broadcast address, but packets
are actually unicast as receiver address (RA) is not multicast address.
Hence, packets are not handled properly due to this is_mcbc bit.
Therefore, reset the is_mcbc bit if interface type is AP.
Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1
Signed-off-by: Sarika Sharma <quic_sarishar(a)quicinc.com>
Reviewed-by: Vasanthakumar Thiagarajan <vasanthakumar.thiagarajan(a)oss.qualcomm.com>
Link: https://patch.msgid.link/20250411061523.859387-3-quic_sarishar@quicinc.com
Signed-off-by: Jeff Johnson <jeff.johnson(a)oss.qualcomm.com>
[ Adjust context ]
Signed-off-by: Oliver Sedlbauer <os(a)dev.tdt.de>
---
Context:
The issue was introduced in 6.12.y by the backport of commit f66971c608c4
("wifi: ath12k: using msdu end descriptor to check for rx multicast packets"),
which was part of a patchset:
https://lore.kernel.org/all/20250411061523.859387-1-quic_sarishar@quicinc.c…
That commit, without this follow-up patch, causes mac80211 to drop encrypted
ARP request frames.
As a result, ARP resolution fails, and connectivity from a station to an AP does
not work reliably until traffic is initiated by the AP.
This follow-up commit is necessary to restore correct network functionality for
ath12k clients on 6.12.y.
Note:
The patch has been modified to apply cleanly, including adjustments for
API changes.
The copyright header change has been omitted.
drivers/net/wireless/ath/ath12k/dp_rx.c | 5 +++++
drivers/net/wireless/ath/ath12k/peer.c | 3 +++
drivers/net/wireless/ath/ath12k/peer.h | 2 ++
3 files changed, 10 insertions(+)
diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c
index eebdcc16e8fc..1c0d796ffc7a 100644
--- a/drivers/net/wireless/ath/ath12k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath12k/dp_rx.c
@@ -2214,6 +2214,11 @@ static void ath12k_dp_rx_h_mpdu(struct ath12k *ar,
spin_lock_bh(&ar->ab->base_lock);
peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu);
if (peer) {
+ /* resetting mcbc bit because mcbc packets are unicast
+ * packets only for AP as STA sends unicast packets.
+ */
+ rxcb->is_mcbc = rxcb->is_mcbc && !peer->ucast_ra_only;
+
if (rxcb->is_mcbc)
enctype = peer->sec_type_grp;
else
diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c
index 19c0626fbff1..461749b0f732 100644
--- a/drivers/net/wireless/ath/ath12k/peer.c
+++ b/drivers/net/wireless/ath/ath12k/peer.c
@@ -331,6 +331,9 @@ int ath12k_peer_create(struct ath12k *ar, struct ath12k_vif *arvif,
arvif->ast_idx = peer->hw_peer_id;
}
+ if (arvif->vif->type == NL80211_IFTYPE_AP)
+ peer->ucast_ra_only = true;
+
peer->sec_type = HAL_ENCRYPT_TYPE_OPEN;
peer->sec_type_grp = HAL_ENCRYPT_TYPE_OPEN;
diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h
index 7b3500b5c8c2..05d4fdd3f82d 100644
--- a/drivers/net/wireless/ath/ath12k/peer.h
+++ b/drivers/net/wireless/ath/ath12k/peer.h
@@ -47,6 +47,8 @@ struct ath12k_peer {
/* protected by ab->data_lock */
bool dp_setup_done;
+
+ bool ucast_ra_only;
};
void ath12k_peer_unmap_event(struct ath12k_base *ab, u16 peer_id);
--
2.39.5
From: luoguangfei <15388634752(a)163.com>
[ Upstream commit 01b9128c5db1b470575d07b05b67ffa3cb02ebf1 ]
When removing a macb device, the driver calls phy_exit() before
unregister_netdev(). This leads to a WARN from kernfs:
------------[ cut here ]------------
kernfs: can not remove 'attached_dev', no directory
WARNING: CPU: 1 PID: 27146 at fs/kernfs/dir.c:1683
Call trace:
kernfs_remove_by_name_ns+0xd8/0xf0
sysfs_remove_link+0x24/0x58
phy_detach+0x5c/0x168
phy_disconnect+0x4c/0x70
phylink_disconnect_phy+0x6c/0xc0 [phylink]
macb_close+0x6c/0x170 [macb]
...
macb_remove+0x60/0x168 [macb]
platform_remove+0x5c/0x80
...
The warning happens because the PHY is being exited while the netdev
is still registered. The correct order is to unregister the netdev
before shutting down the PHY and cleaning up the MDIO bus.
Fix this by moving unregister_netdev() ahead of phy_exit() in
macb_remove().
Fixes: 8b73fa3ae02b ("net: macb: Added ZynqMP-specific initialization")
Signed-off-by: luoguangfei <15388634752(a)163.com>
Link: https://patch.msgid.link/20250818232527.1316-1-15388634752@163.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[ Minor context change fixed. ]
Signed-off-by: Alva Lan <alvalan9(a)foxmail.com>
---
drivers/net/ethernet/cadence/macb_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 1ea7c86f7501..9da142efe9d4 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -5068,11 +5068,11 @@ static int macb_remove(struct platform_device *pdev)
if (dev) {
bp = netdev_priv(dev);
+ unregister_netdev(dev);
phy_exit(bp->sgmii_phy);
mdiobus_unregister(bp->mii_bus);
mdiobus_free(bp->mii_bus);
- unregister_netdev(dev);
tasklet_kill(&bp->hresp_err_tasklet);
pm_runtime_disable(&pdev->dev);
pm_runtime_dont_use_autosuspend(&pdev->dev);
--
2.43.0
syzbot reported a kernel BUG in ocfs2_find_victim_chain() because the
`cl_next_free_rec` field of the allocation chain list (next free slot in
the chain list) is 0, triggring the BUG_ON(!cl->cl_next_free_rec)
condition in ocfs2_find_victim_chain() and panicking the kernel.
To fix this, an if condition is introduced in ocfs2_claim_suballoc_bits(),
just before calling ocfs2_find_victim_chain(), the code block in it being
executed when either of the following conditions is true:
1. `cl_next_free_rec` is equal to 0, indicating that there are no free
chains in the allocation chain list
2. `cl_next_free_rec` is greater than `cl_count` (the total number of
chains in the allocation chain list)
Either of them being true is indicative of the fact that there are no
chains left for usage.
This is addressed using ocfs2_error(), which prints
the error log for debugging purposes, rather than panicking the kernel.
Reported-by: syzbot+96d38c6e1655c1420a72(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=96d38c6e1655c1420a72
Tested-by: syzbot+96d38c6e1655c1420a72(a)syzkaller.appspotmail.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Prithvi Tambewagh <activprithvi(a)gmail.com>
---
v2->v3
- Revise log message for reflecting changes from v1->v2
- Format code style as suggested in v2
v1->v2:
- Remove extra line before the if statement in patch
- Add upper limit check for cl->cl_next_free_rec in the if condition
fs/ocfs2/suballoc.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 6ac4dcd54588..e93fc842bb20 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1992,6 +1992,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
}
cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
+ if (!le16_to_cpu(cl->cl_next_free_rec) ||
+ le16_to_cpu(cl->cl_next_free_rec) > le16_to_cpu(cl->cl_count)) {
+ status = ocfs2_error(ac->ac_inode->i_sb,
+ "Chain allocator dinode %llu has invalid next "
+ "free chain record %u, but only %u total\n",
+ (unsigned long long)le64_to_cpu(fe->i_blkno),
+ le16_to_cpu(cl->cl_next_free_rec),
+ le16_to_cpu(cl->cl_count));
+ goto bail;
+ }
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
base-commit: 939f15e640f193616691d3bcde0089760e75b0d3
--
2.34.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x fbade4bd08ba52cbc74a71c4e86e736f059f99f7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112455-daughter-unsealed-699a@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fbade4bd08ba52cbc74a71c4e86e736f059f99f7 Mon Sep 17 00:00:00 2001
From: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Date: Tue, 11 Nov 2025 14:02:50 +0800
Subject: [PATCH] mptcp: Disallow MPTCP subflows from sockmap
The sockmap feature allows bpf syscall from userspace, or based on bpf
sockops, replacing the sk_prot of sockets during protocol stack processing
with sockmap's custom read/write interfaces.
'''
tcp_rcv_state_process()
subflow_syn_recv_sock()
tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
bpf_skops_established <== sockops
bpf_sock_map_update(sk) <== call bpf helper
tcp_bpf_update_proto() <== update sk_prot
'''
Consider two scenarios:
1. When the server has MPTCP enabled and the client also requests MPTCP,
the sk passed to the BPF program is a subflow sk. Since subflows only
handle partial data, replacing their sk_prot is meaningless and will
cause traffic disruption.
2. When the server has MPTCP enabled but the client sends a TCP SYN
without MPTCP, subflow_syn_recv_sock() performs a fallback on the
subflow, replacing the subflow sk's sk_prot with the native sk_prot.
'''
subflow_ulp_fallback()
subflow_drop_ctx()
mptcp_subflow_ops_undo_override()
'''
Subsequently, accept::mptcp_stream_accept::mptcp_fallback_tcp_ops()
converts the subflow to plain TCP.
For the first case, we should prevent it from being combined with sockmap
by setting sk_prot->psock_update_sk_prot to NULL, which will be blocked by
sockmap's own flow.
For the second case, since subflow_syn_recv_sock() has already restored
sk_prot to native tcp_prot/tcpv6_prot, no further action is needed.
Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections")
Signed-off-by: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Signed-off-by: Martin KaFai Lau <martin.lau(a)kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20251111060307.194196-2-jiayuan.chen@linux.dev
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e8325890a322..af707ce0f624 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -2144,6 +2144,10 @@ void __init mptcp_subflow_init(void)
tcp_prot_override = tcp_prot;
tcp_prot_override.release_cb = tcp_release_cb_override;
tcp_prot_override.diag_destroy = tcp_abort_override;
+#ifdef CONFIG_BPF_SYSCALL
+ /* Disable sockmap processing for subflows */
+ tcp_prot_override.psock_update_sk_prot = NULL;
+#endif
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
/* In struct mptcp_subflow_request_sock, we assume the TCP request sock
@@ -2180,6 +2184,10 @@ void __init mptcp_subflow_init(void)
tcpv6_prot_override = tcpv6_prot;
tcpv6_prot_override.release_cb = tcp_release_cb_override;
tcpv6_prot_override.diag_destroy = tcp_abort_override;
+#ifdef CONFIG_BPF_SYSCALL
+ /* Disable sockmap processing for subflows */
+ tcpv6_prot_override.psock_update_sk_prot = NULL;
+#endif
#endif
mptcp_diag_subflow_init(&subflow_ulp_ops);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x c77b3b79a92e3345aa1ee296180d1af4e7031f8f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112449-untaxed-cola-39b4@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c77b3b79a92e3345aa1ee296180d1af4e7031f8f Mon Sep 17 00:00:00 2001
From: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Date: Tue, 11 Nov 2025 14:02:51 +0800
Subject: [PATCH] mptcp: Fix proto fallback detection with BPF
The sockmap feature allows bpf syscall from userspace, or based
on bpf sockops, replacing the sk_prot of sockets during protocol stack
processing with sockmap's custom read/write interfaces.
'''
tcp_rcv_state_process()
syn_recv_sock()/subflow_syn_recv_sock()
tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
bpf_skops_established <== sockops
bpf_sock_map_update(sk) <== call bpf helper
tcp_bpf_update_proto() <== update sk_prot
'''
When the server has MPTCP enabled but the client sends a TCP SYN
without MPTCP, subflow_syn_recv_sock() performs a fallback on the
subflow, replacing the subflow sk's sk_prot with the native sk_prot.
'''
subflow_syn_recv_sock()
subflow_ulp_fallback()
subflow_drop_ctx()
mptcp_subflow_ops_undo_override()
'''
Then, this subflow can be normally used by sockmap, which replaces the
native sk_prot with sockmap's custom sk_prot. The issue occurs when the
user executes accept::mptcp_stream_accept::mptcp_fallback_tcp_ops().
Here, it uses sk->sk_prot to compare with the native sk_prot, but this
is incorrect when sockmap is used, as we may incorrectly set
sk->sk_socket->ops.
This fix uses the more generic sk_family for the comparison instead.
Additionally, this also prevents a WARNING from occurring:
result from ./scripts/decode_stacktrace.sh:
------------[ cut here ]------------
WARNING: CPU: 0 PID: 337 at net/mptcp/protocol.c:68 mptcp_stream_accept \
(net/mptcp/protocol.c:4005)
Modules linked in:
...
PKRU: 55555554
Call Trace:
<TASK>
do_accept (net/socket.c:1989)
__sys_accept4 (net/socket.c:2028 net/socket.c:2057)
__x64_sys_accept (net/socket.c:2067)
x64_sys_call (arch/x86/entry/syscall_64.c:41)
do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
RIP: 0033:0x7f87ac92b83d
---[ end trace 0000000000000000 ]---
Fixes: 0b4f33def7bb ("mptcp: fix tcp fallback crash")
Signed-off-by: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Signed-off-by: Martin KaFai Lau <martin.lau(a)kernel.org>
Reviewed-by: Jakub Sitnicki <jakub(a)cloudflare.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20251111060307.194196-3-jiayuan.chen@linux.dev
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2d6b8de35c44..90b4aeca2596 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -61,11 +61,13 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
+ unsigned short family = READ_ONCE(sk->sk_family);
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (sk->sk_prot == &tcpv6_prot)
+ if (family == AF_INET6)
return &inet6_stream_ops;
#endif
- WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ WARN_ON_ONCE(family != AF_INET);
return &inet_stream_ops;
}