The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 56b3c85e153b84f27e6cff39623ba40a1ad299d3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112016-arrogance-recast-439f@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 56b3c85e153b84f27e6cff39623ba40a1ad299d3 Mon Sep 17 00:00:00 2001
From: Song Liu <song(a)kernel.org>
Date: Mon, 27 Oct 2025 10:50:21 -0700
Subject: [PATCH] ftrace: Fix BPF fexit with livepatch
When livepatch is attached to the same function as bpf trampoline with
a fexit program, bpf trampoline code calls register_ftrace_direct()
twice. The first time will fail with -EAGAIN, and the second time it
will succeed. This requires register_ftrace_direct() to unregister
the address on the first attempt. Otherwise, the bpf trampoline cannot
attach. Here is an easy way to reproduce this issue:
insmod samples/livepatch/livepatch-sample.ko
bpftrace -e 'fexit:cmdline_proc_show {}'
ERROR: Unable to attach probe: fexit:vmlinux:cmdline_proc_show...
Fix this by cleaning up the hash when register_ftrace_function_nolock hits
errors.
Also, move the code that resets ops->func and ops->trampoline to the error
path of register_ftrace_direct(); and add a helper function reset_direct()
in register_ftrace_direct() and unregister_ftrace_direct().
Fixes: d05cb470663a ("ftrace: Fix modification of direct_function hash while in use")
Cc: stable(a)vger.kernel.org # v6.6+
Reported-by: Andrey Grodzovsky <andrey.grodzovsky(a)crowdstrike.com>
Closes: https://lore.kernel.org/live-patching/c5058315a39d4615b333e485893345be@crow…
Cc: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Cc: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky(a)crowdstrike.com>
Signed-off-by: Song Liu <song(a)kernel.org>
Reviewed-by: Jiri Olsa <jolsa(a)kernel.org>
Link: https://lore.kernel.org/r/20251027175023.1521602-2-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5949095e51c3..f2cb0b097093 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -479,11 +479,6 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
* BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
* trampoline again, and retry register.
*/
- /* reset fops->func and fops->trampoline for re-register */
- tr->fops->func = NULL;
- tr->fops->trampoline = 0;
-
- /* free im memory and reallocate later */
bpf_tramp_image_free(im);
goto again;
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 42bd2ba68a82..cbeb7e833131 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5953,6 +5953,17 @@ static void register_ftrace_direct_cb(struct rcu_head *rhp)
free_ftrace_hash(fhp);
}
+static void reset_direct(struct ftrace_ops *ops, unsigned long addr)
+{
+ struct ftrace_hash *hash = ops->func_hash->filter_hash;
+
+ remove_direct_functions_hash(hash, addr);
+
+ /* cleanup for possible another register call */
+ ops->func = NULL;
+ ops->trampoline = 0;
+}
+
/**
* register_ftrace_direct - Call a custom trampoline directly
* for multiple functions registered in @ops
@@ -6048,6 +6059,8 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
ops->direct_call = addr;
err = register_ftrace_function_nolock(ops);
+ if (err)
+ reset_direct(ops, addr);
out_unlock:
mutex_unlock(&direct_mutex);
@@ -6080,7 +6093,6 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct);
int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
bool free_filters)
{
- struct ftrace_hash *hash = ops->func_hash->filter_hash;
int err;
if (check_direct_multi(ops))
@@ -6090,13 +6102,9 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
mutex_lock(&direct_mutex);
err = unregister_ftrace_function(ops);
- remove_direct_functions_hash(hash, addr);
+ reset_direct(ops, addr);
mutex_unlock(&direct_mutex);
- /* cleanup for possible another register call */
- ops->func = NULL;
- ops->trampoline = 0;
-
if (free_filters)
ftrace_free_filter(ops);
return err;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 00fbff75c5acb4755f06f08bd1071879c63940c5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112029-arrogance-bondless-6a5b@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 00fbff75c5acb4755f06f08bd1071879c63940c5 Mon Sep 17 00:00:00 2001
From: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Date: Sun, 2 Nov 2025 01:07:41 +0530
Subject: [PATCH] crash: fix crashkernel resource shrink
When crashkernel is configured with a high reservation, shrinking its
value below the low crashkernel reservation causes two issues:
1. Invalid crashkernel resource objects
2. Kernel crash if crashkernel shrinking is done twice
For example, with crashkernel=200M,high, the kernel reserves 200MB of high
memory and some default low memory (say 256MB). The reservation appears
as:
cat /proc/iomem | grep -i crash
af000000-beffffff : Crash kernel
433000000-43f7fffff : Crash kernel
If crashkernel is then shrunk to 50MB (echo 52428800 >
/sys/kernel/kexec_crash_size), /proc/iomem still shows 256MB reserved:
af000000-beffffff : Crash kernel
Instead, it should show 50MB:
af000000-b21fffff : Crash kernel
Further shrinking crashkernel to 40MB causes a kernel crash with the
following trace (x86):
BUG: kernel NULL pointer dereference, address: 0000000000000038
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
<snip...>
Call Trace: <TASK>
? __die_body.cold+0x19/0x27
? page_fault_oops+0x15a/0x2f0
? search_module_extables+0x19/0x60
? search_bpf_extables+0x5f/0x80
? exc_page_fault+0x7e/0x180
? asm_exc_page_fault+0x26/0x30
? __release_resource+0xd/0xb0
release_resource+0x26/0x40
__crash_shrink_memory+0xe5/0x110
crash_shrink_memory+0x12a/0x190
kexec_crash_size_store+0x41/0x80
kernfs_fop_write_iter+0x141/0x1f0
vfs_write+0x294/0x460
ksys_write+0x6d/0xf0
<snip...>
This happens because __crash_shrink_memory()/kernel/crash_core.c
incorrectly updates the crashk_res resource object even when
crashk_low_res should be updated.
Fix this by ensuring the correct crashkernel resource object is updated
when shrinking crashkernel memory.
Link: https://lkml.kernel.org/r/20251101193741.289252-1-sourabhjain@linux.ibm.com
Fixes: 16c6006af4d4 ("kexec: enable kexec_crash_size to support two crash kernel regions")
Signed-off-by: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Acked-by: Baoquan He <bhe(a)redhat.com>
Cc: Zhen Lei <thunder.leizhen(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 3b1c43382eec..99dac1aa972a 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -373,7 +373,7 @@ static int __crash_shrink_memory(struct resource *old_res,
old_res->start = 0;
old_res->end = 0;
} else {
- crashk_res.end = ram_res->start - 1;
+ old_res->end = ram_res->start - 1;
}
crash_free_reserved_phys_range(ram_res->start, ram_res->end);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 05a1fc5efdd8560f34a3af39c9cf1e1526cc3ddf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112036-clever-sponsor-bfdf@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 05a1fc5efdd8560f34a3af39c9cf1e1526cc3ddf Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai(a)suse.de>
Date: Sun, 9 Nov 2025 10:12:07 +0100
Subject: [PATCH] ALSA: usb-audio: Fix potential overflow of PCM transfer
buffer
The PCM stream data in USB-audio driver is transferred over USB URB
packet buffers, and each packet size is determined dynamically. The
packet sizes are limited by some factors such as wMaxPacketSize USB
descriptor. OTOH, in the current code, the actually used packet sizes
are determined only by the rate and the PPS, which may be bigger than
the size limit above. This results in a buffer overflow, as reported
by syzbot.
Basically when the limit is smaller than the calculated packet size,
it implies that something is wrong, most likely a weird USB
descriptor. So the best option would be just to return an error at
the parameter setup time before doing any further operations.
This patch introduces such a sanity check, and returns -EINVAL when
the packet size is greater than maxpacksize. The comparison with
ep->packsize[1] alone should suffice since it's always equal or
greater than ep->packsize[0].
Reported-by: syzbot+bfd77469c8966de076f7(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=bfd77469c8966de076f7
Link: https://lore.kernel.org/690b6b46.050a0220.3d0d33.0054.GAE@google.com
Cc: Lizhi Xu <lizhi.xu(a)windriver.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20251109091211.12739-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 880f5afcce60..cc15624ecaff 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -1362,6 +1362,11 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
ep->sample_rem = ep->cur_rate % ep->pps;
ep->packsize[0] = ep->cur_rate / ep->pps;
ep->packsize[1] = (ep->cur_rate + (ep->pps - 1)) / ep->pps;
+ if (ep->packsize[1] > ep->maxpacksize) {
+ usb_audio_dbg(chip, "Too small maxpacksize %u for rate %u / pps %u\n",
+ ep->maxpacksize, ep->cur_rate, ep->pps);
+ return -EINVAL;
+ }
/* calculate the frequency in 16.16 format */
ep->freqm = ep->freqn;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 05a1fc5efdd8560f34a3af39c9cf1e1526cc3ddf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112037-brick-dreadful-388a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 05a1fc5efdd8560f34a3af39c9cf1e1526cc3ddf Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai(a)suse.de>
Date: Sun, 9 Nov 2025 10:12:07 +0100
Subject: [PATCH] ALSA: usb-audio: Fix potential overflow of PCM transfer
buffer
The PCM stream data in USB-audio driver is transferred over USB URB
packet buffers, and each packet size is determined dynamically. The
packet sizes are limited by some factors such as wMaxPacketSize USB
descriptor. OTOH, in the current code, the actually used packet sizes
are determined only by the rate and the PPS, which may be bigger than
the size limit above. This results in a buffer overflow, as reported
by syzbot.
Basically when the limit is smaller than the calculated packet size,
it implies that something is wrong, most likely a weird USB
descriptor. So the best option would be just to return an error at
the parameter setup time before doing any further operations.
This patch introduces such a sanity check, and returns -EINVAL when
the packet size is greater than maxpacksize. The comparison with
ep->packsize[1] alone should suffice since it's always equal or
greater than ep->packsize[0].
Reported-by: syzbot+bfd77469c8966de076f7(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=bfd77469c8966de076f7
Link: https://lore.kernel.org/690b6b46.050a0220.3d0d33.0054.GAE@google.com
Cc: Lizhi Xu <lizhi.xu(a)windriver.com>
Cc: <stable(a)vger.kernel.org>
Link: https://patch.msgid.link/20251109091211.12739-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 880f5afcce60..cc15624ecaff 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -1362,6 +1362,11 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
ep->sample_rem = ep->cur_rate % ep->pps;
ep->packsize[0] = ep->cur_rate / ep->pps;
ep->packsize[1] = (ep->cur_rate + (ep->pps - 1)) / ep->pps;
+ if (ep->packsize[1] > ep->maxpacksize) {
+ usb_audio_dbg(chip, "Too small maxpacksize %u for rate %u / pps %u\n",
+ ep->maxpacksize, ep->cur_rate, ep->pps);
+ return -EINVAL;
+ }
/* calculate the frequency in 16.16 format */
ep->freqm = ep->freqn;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x fa04f5b60fda62c98a53a60de3a1e763f11feb41
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112040-stucco-landed-445c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa04f5b60fda62c98a53a60de3a1e763f11feb41 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas(a)kernel.org>
Date: Mon, 27 Oct 2025 11:56:36 +0000
Subject: [PATCH] mm/truncate: unmap large folio on split failure
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
This behavior might not be respected on truncation.
During truncation, the kernel splits a large folio in order to reclaim
memory. As a side effect, it unmaps the folio and destroys PMD mappings
of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
are preserved.
However, if the split fails, PMD mappings are preserved and the user will
not receive SIGBUS on any accesses within the PMD.
Unmap the folio on split failure. It will lead to refault as PTEs and
preserve SIGBUS semantics.
Make an exception for shmem/tmpfs that for long time intentionally mapped
with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/truncate.c b/mm/truncate.c
index 9210cf808f5c..3c5a50ae3274 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
return 0;
}
+static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at,
+ unsigned long min_order)
+{
+ enum ttu_flags ttu_flags =
+ TTU_SYNC |
+ TTU_SPLIT_HUGE_PMD |
+ TTU_IGNORE_MLOCK;
+ int ret;
+
+ ret = try_folio_split_to_order(folio, split_at, min_order);
+
+ /*
+ * If the split fails, unmap the folio, so it will be refaulted
+ * with PTEs to respect SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ if (ret && !shmem_mapping(folio->mapping)) {
+ try_to_unmap(folio, ttu_flags);
+ WARN_ON(folio_mapped(folio));
+ }
+
+ return ret;
+}
+
/*
* Handle partial folios. The folio may be entirely within the
* range if a split has raced with us. If not, we zero the part of the
@@ -226,7 +252,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
min_order = mapping_min_folio_order(folio->mapping);
split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
- if (!try_folio_split_to_order(folio, split_at, min_order)) {
+ if (!try_folio_split_or_unmap(folio, split_at, min_order)) {
/*
* try to split at offset + length to make sure folios within
* the range can be dropped, especially to avoid memory waste
@@ -250,13 +276,10 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
if (!folio_trylock(folio2))
goto out;
- /*
- * make sure folio2 is large and does not change its mapping.
- * Its split result does not matter here.
- */
+ /* make sure folio2 is large and does not change its mapping */
if (folio_test_large(folio2) &&
folio2->mapping == folio->mapping)
- try_folio_split_to_order(folio2, split_at2, min_order);
+ try_folio_split_or_unmap(folio2, split_at2, min_order);
folio_unlock(folio2);
out:
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x fa04f5b60fda62c98a53a60de3a1e763f11feb41
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112039-theatrics-moodiness-1de3@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa04f5b60fda62c98a53a60de3a1e763f11feb41 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas(a)kernel.org>
Date: Mon, 27 Oct 2025 11:56:36 +0000
Subject: [PATCH] mm/truncate: unmap large folio on split failure
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
This behavior might not be respected on truncation.
During truncation, the kernel splits a large folio in order to reclaim
memory. As a side effect, it unmaps the folio and destroys PMD mappings
of the folio. The folio will be refaulted as PTEs and SIGBUS semantics
are preserved.
However, if the split fails, PMD mappings are preserved and the user will
not receive SIGBUS on any accesses within the PMD.
Unmap the folio on split failure. It will lead to refault as PTEs and
preserve SIGBUS semantics.
Make an exception for shmem/tmpfs that for long time intentionally mapped
with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/truncate.c b/mm/truncate.c
index 9210cf808f5c..3c5a50ae3274 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio)
return 0;
}
+static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at,
+ unsigned long min_order)
+{
+ enum ttu_flags ttu_flags =
+ TTU_SYNC |
+ TTU_SPLIT_HUGE_PMD |
+ TTU_IGNORE_MLOCK;
+ int ret;
+
+ ret = try_folio_split_to_order(folio, split_at, min_order);
+
+ /*
+ * If the split fails, unmap the folio, so it will be refaulted
+ * with PTEs to respect SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ if (ret && !shmem_mapping(folio->mapping)) {
+ try_to_unmap(folio, ttu_flags);
+ WARN_ON(folio_mapped(folio));
+ }
+
+ return ret;
+}
+
/*
* Handle partial folios. The folio may be entirely within the
* range if a split has raced with us. If not, we zero the part of the
@@ -226,7 +252,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
min_order = mapping_min_folio_order(folio->mapping);
split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE);
- if (!try_folio_split_to_order(folio, split_at, min_order)) {
+ if (!try_folio_split_or_unmap(folio, split_at, min_order)) {
/*
* try to split at offset + length to make sure folios within
* the range can be dropped, especially to avoid memory waste
@@ -250,13 +276,10 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
if (!folio_trylock(folio2))
goto out;
- /*
- * make sure folio2 is large and does not change its mapping.
- * Its split result does not matter here.
- */
+ /* make sure folio2 is large and does not change its mapping */
if (folio_test_large(folio2) &&
folio2->mapping == folio->mapping)
- try_folio_split_to_order(folio2, split_at2, min_order);
+ try_folio_split_or_unmap(folio2, split_at2, min_order);
folio_unlock(folio2);
out:
From: Andrey Vatoropin <a.vatoropin(a)crpt.ru>
be_insert_vlan_in_pkt() is called with the wrb_params argument being NULL
at be_send_pkt_to_bmc() call site. This may lead to dereferencing a NULL
pointer when processing a workaround for specific packet, as commit
bc0c3405abbb ("be2net: fix a Tx stall bug caused by a specific ipv6
packet") states.
The correct way would be to pass the wrb_params from be_xmit().
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 760c295e0e8d ("be2net: Support for OS2BMC.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrey Vatoropin <a.vatoropin(a)crpt.ru>
---
v2: - pass wrb_params from inside be_xmit() (Jakub Kicinski)
v1: https://lore.kernel.org/netdev/20251112092051.851163-1-a.vatoropin@crpt.ru/
drivers/net/ethernet/emulex/benet/be_main.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index cb004fd16252..5bb31c8fab39 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1296,7 +1296,8 @@ static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
- struct sk_buff **skb)
+ struct sk_buff **skb,
+ struct be_wrb_params *wrb_params)
{
struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
bool os2bmc = false;
@@ -1360,7 +1361,7 @@ static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
* to BMC, asic expects the vlan to be inline in the packet.
*/
if (os2bmc)
- *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
+ *skb = be_insert_vlan_in_pkt(adapter, *skb, wrb_params);
return os2bmc;
}
@@ -1387,7 +1388,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
/* if os2bmc is enabled and if the pkt is destined to bmc,
* enqueue the pkt a 2nd time with mgmt bit set.
*/
- if (be_send_pkt_to_bmc(adapter, &skb)) {
+ if (be_send_pkt_to_bmc(adapter, &skb, &wrb_params)) {
BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
if (unlikely(!wrb_cnt))
--
2.43.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 74207de2ba10c2973334906822dc94d2e859ffc5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025112026-substance-senator-8409@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74207de2ba10c2973334906822dc94d2e859ffc5 Mon Sep 17 00:00:00 2001
From: Kiryl Shutsemau <kas(a)kernel.org>
Date: Mon, 27 Oct 2025 11:56:35 +0000
Subject: [PATCH] mm/memory: do not populate page table entries beyond i_size
Patch series "Fix SIGBUS semantics with large folios", v3.
Accessing memory within a VMA, but beyond i_size rounded up to the next
page size, is supposed to generate SIGBUS.
Darrick reported[1] an xfstests regression in v6.18-rc1. generic/749
failed due to missing SIGBUS. This was caused by my recent changes that
try to fault in the whole folio where possible:
19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()")
357b92761d94 ("mm/filemap: map entire large folio faultaround")
These changes did not consider i_size when setting up PTEs, leading to
xfstest breakage.
However, the problem has been present in the kernel for a long time -
since huge tmpfs was introduced in 2016. The kernel happily maps
PMD-sized folios as PMD without checking i_size. And huge=always tmpfs
allocates PMD-size folios on any writes.
I considered this corner case when I implemented a large tmpfs, and my
conclusion was that no one in their right mind should rely on receiving a
SIGBUS signal when accessing beyond i_size. I cannot imagine how it could
be useful for the workload.
But apparently filesystem folks care a lot about preserving strict SIGBUS
semantics.
Generic/749 was introduced last year with reference to POSIX, but no real
workloads were mentioned. It also acknowledged the tmpfs deviation from
the test case.
POSIX indeed says[3]:
References within the address range starting at pa and
continuing for len bytes to whole pages following the end of an
object shall result in delivery of a SIGBUS signal.
The patchset fixes the regression introduced by recent changes as well as
more subtle SIGBUS breakage due to split failure on truncation.
This patch (of 2):
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.
Recent changes attempted to fault in full folio where possible. They did
not respect i_size, which led to populating PTEs beyond i_size and
breaking SIGBUS semantics.
Darrick reported generic/749 breakage because of this.
However, the problem existed before the recent changes. With huge=always
tmpfs, any write to a file leads to PMD-size allocation. Following the
fault-in of the folio will install PMD mapping regardless of i_size.
Fix filemap_map_pages() and finish_fault() to not install:
- PTEs beyond i_size;
- PMD mappings across i_size;
Make an exception for shmem/tmpfs that for long time intentionally
mapped with PMDs across i_size.
Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
Signed-off-by: Kiryl Shutsemau <kas(a)kernel.org>
Fixes: 6795801366da ("xfs: Support large folios")
Reported-by: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/filemap.c b/mm/filemap.c
index 13f0259d993c..2f1e7e283a51 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3681,7 +3681,8 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
unsigned long addr, unsigned int nr_pages,
- unsigned long *rss, unsigned short *mmap_miss)
+ unsigned long *rss, unsigned short *mmap_miss,
+ bool can_map_large)
{
unsigned int ref_from_caller = 1;
vm_fault_t ret = 0;
@@ -3696,7 +3697,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
* The folio must not cross VMA or page table boundary.
*/
addr0 = addr - start * PAGE_SIZE;
- if (folio_within_vma(folio, vmf->vma) &&
+ if (can_map_large && folio_within_vma(folio, vmf->vma) &&
(addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) {
vmf->pte -= start;
page -= start;
@@ -3811,13 +3812,27 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
unsigned long rss = 0;
unsigned int nr_pages = 0, folio_type;
unsigned short mmap_miss = 0, mmap_miss_saved;
+ bool can_map_large;
rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
if (!folio)
goto out;
- if (filemap_map_pmd(vmf, folio, start_pgoff)) {
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
+ end_pgoff = min(end_pgoff, file_end);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ can_map_large = shmem_mapping(mapping) ||
+ file_end >= folio_next_index(folio);
+
+ if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3830,10 +3845,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
goto out;
}
- file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1;
- if (end_pgoff > file_end)
- end_pgoff = file_end;
-
folio_type = mm_counter_file(folio);
do {
unsigned long end;
@@ -3850,7 +3861,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &rss, &mmap_miss);
+ nr_pages, &rss, &mmap_miss,
+ can_map_large);
folio_unlock(folio);
} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
diff --git a/mm/memory.c b/mm/memory.c
index 74b45e258323..b59ae7ce42eb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -65,6 +65,7 @@
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/string.h>
+#include <linux/shmem_fs.h>
#include <linux/memory-tiers.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
@@ -5501,8 +5502,25 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret;
}
+ if (!needs_fallback && vma->vm_file) {
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ pgoff_t file_end;
+
+ file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+
+ /*
+ * Do not allow to map with PTEs beyond i_size and with PMD
+ * across i_size to preserve SIGBUS semantics.
+ *
+ * Make an exception for shmem/tmpfs that for long time
+ * intentionally mapped with PMDs across i_size.
+ */
+ needs_fallback = !shmem_mapping(mapping) &&
+ file_end < folio_next_index(folio);
+ }
+
if (pmd_none(*vmf->pmd)) {
- if (folio_test_pmd_mappable(folio)) {
+ if (!needs_fallback && folio_test_pmd_mappable(folio)) {
ret = do_set_pmd(vmf, folio, page);
if (ret != VM_FAULT_FALLBACK)
return ret;