I'm announcing the release of the 4.19.315 kernel.
All users of the 4.19 kernel series must upgrade.
The updated 4.19.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.19.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/sphinx/kernel_include.py | 1
Makefile | 2
drivers/md/dm-core.h | 2
drivers/md/dm-ioctl.c | 3
drivers/md/dm-table.c | 9
drivers/tty/serial/kgdboc.c | 30
fs/btrfs/volumes.c | 1
include/linux/string.h | 20
include/linux/trace_events.h | 2
kernel/trace/Kconfig | 4
kernel/trace/Makefile | 1
kernel/trace/trace.c | 26
kernel/trace/trace_dynevent.c | 210 ++++++
kernel/trace/trace_dynevent.h | 119 +++
kernel/trace/trace_events.c | 32
kernel/trace/trace_events_hist.c | 1048 ++++++++++++++++++-------------
kernel/trace/trace_probe.c | 2
kernel/trace/trace_stack.c | 2
tools/testing/selftests/vm/map_hugetlb.c | 7
19 files changed, 1050 insertions(+), 471 deletions(-)
Akira Yokosawa (1):
docs: kernel_include.py: Cope with docutils 0.21
Daniel Thompson (1):
serial: kgdboc: Fix NMI-safety problems from keyboard reset code
Dominique Martinet (1):
btrfs: add missing mutex_unlock in btrfs_relocate_sys_chunks()
Greg Kroah-Hartman (1):
Linux 4.19.315
Harshit Mogalapalli (1):
Revert "selftests: mm: fix map_hugetlb failure on 64K page size systems"
Masami Hiramatsu (4):
tracing: Simplify creation and deletion of synthetic events
tracing: Add unified dynamic event framework
tracing: Use dyn_event framework for synthetic events
tracing: Remove unneeded synth_event_mutex
Mikulas Patocka (1):
dm: limit the number of targets and parameter size area
Steven Rostedt (VMware) (5):
tracing: Consolidate trace_add/remove_event_call back to the nolock functions
string.h: Add str_has_prefix() helper function
tracing: Use str_has_prefix() helper for histogram code
tracing: Use str_has_prefix() instead of using fixed sizes
tracing: Have the historgram use the result of str_has_prefix() for len of prefix
Tom Zanussi (4):
tracing: Refactor hist trigger action code
tracing: Split up onmatch action data
tracing: Generalize hist trigger onmax and save action
tracing: Remove unnecessary var_ref destroy in track_data_destroy()
Hey,
I got encouraged to send another email here from
https://github.com/tpwrules/nixos-apple-silicon/issues/200.
"arm64/fpsimd: Avoid erroneous elide of user state reload" /
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
fixes a data corruption issue with dm-crypt on aarch64, reproducible on
the mainline Linux kernel (not just asahi specific!).
This list has been included as Cc on this commit, but it'd be very nice
to make sure this already lands in 6.9.2, due to its data corruption
nature.
Thanks,
Florian
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: b84a8aba806261d2f759ccedf4a2a6a80a5e55ba
Gitweb: https://git.kernel.org/tip/b84a8aba806261d2f759ccedf4a2a6a80a5e55ba
Author: dicken.ding <dicken.ding(a)mediatek.com>
AuthorDate: Fri, 24 May 2024 17:17:39 +08:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Fri, 24 May 2024 12:49:35 +02:00
genirq/irqdesc: Prevent use-after-free in irq_find_at_or_after()
irq_find_at_or_after() dereferences the interrupt descriptor which is
returned by mt_find() while neither holding sparse_irq_lock nor RCU read
lock, which means the descriptor can be freed between mt_find() and the
dereference:
CPU0 CPU1
desc = mt_find()
delayed_free_desc(desc)
irq_desc_get_irq(desc)
The use-after-free is reported by KASAN:
Call trace:
irq_get_next_irq+0x58/0x84
show_stat+0x638/0x824
seq_read_iter+0x158/0x4ec
proc_reg_read_iter+0x94/0x12c
vfs_read+0x1e0/0x2c8
Freed by task 4471:
slab_free_freelist_hook+0x174/0x1e0
__kmem_cache_free+0xa4/0x1dc
kfree+0x64/0x128
irq_kobj_release+0x28/0x3c
kobject_put+0xcc/0x1e0
delayed_free_desc+0x14/0x2c
rcu_do_batch+0x214/0x720
Guard the access with a RCU read lock section.
Fixes: 721255b9826b ("genirq: Use a maple tree for interrupt descriptor management")
Signed-off-by: dicken.ding <dicken.ding(a)mediatek.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240524091739.31611-1-dicken.ding@mediatek.com
---
kernel/irq/irqdesc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 88ac365..07e99c9 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -160,7 +160,10 @@ static int irq_find_free_area(unsigned int from, unsigned int cnt)
static unsigned int irq_find_at_or_after(unsigned int offset)
{
unsigned long index = offset;
- struct irq_desc *desc = mt_find(&sparse_irqs, &index, nr_irqs);
+ struct irq_desc *desc;
+
+ guard(rcu)();
+ desc = mt_find(&sparse_irqs, &index, nr_irqs);
return desc ? irq_desc_get_irq(desc) : nr_irqs;
}
It appears that we don't allowed a vcpu to be restored in AArch32
System mode, as we *never* included it in the list of valid modes.
Just add it to the list of allowed modes.
Fixes: 0d854a60b1d7 ("arm64: KVM: enable initialization of a 32bit vcpu")
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/kvm/guest.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d9617b11f7a8..11098eb7eb44 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -251,6 +251,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case PSR_AA32_MODE_SVC:
case PSR_AA32_MODE_ABT:
case PSR_AA32_MODE_UND:
+ case PSR_AA32_MODE_SYS:
if (!vcpu_el1_is_32bit(vcpu))
return -EINVAL;
break;
--
2.39.2
When userspace writes to once of the core registers, we make
sure to narrow the corresponding GPRs if PSTATE indicates
an AArch32 context.
The code tries to check whether the context is EL0 or EL1 so
that it narrows the correct registers. But it does so by checking
the full PSTATE instead of PSTATE.M.
As a consequence, and if we are restoring an AArch32 EL0 context
in a 64bit guest, and that PSTATE has *any* bit set outside of
PSTATE.M, we narrow *all* registers instead of only the first 15,
destroying the 64bit state.
Obviously, this is not something the guest is likely to enjoy.
Correctly masking PSTATE to only evaluate PSTATE.M fixes it.
Fixes: 90c1f934ed71 ("KVM: arm64: Get rid of the AArch32 register mapping code")
Reported-by: Nina Schoetterl-Glausch <nsg(a)linux.ibm.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/kvm/guest.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index e2f762d959bb..d9617b11f7a8 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -276,7 +276,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
int i, nr_reg;
- switch (*vcpu_cpsr(vcpu)) {
+ switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) {
/*
* Either we are dealing with user mode, and only the
* first 15 registers (+ PC) must be narrowed to 32bit.
--
2.39.2
The quilt patch titled
Subject: mm/memory-failure: fix handling of dissolved but not taken off from buddy pages
has been removed from the -mm tree. Its filename was
mm-memory-failure-fix-handling-of-dissolved-but-not-taken-off-from-buddy-pages.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Miaohe Lin <linmiaohe(a)huawei.com>
Subject: mm/memory-failure: fix handling of dissolved but not taken off from buddy pages
Date: Thu, 23 May 2024 15:12:17 +0800
When I did memory failure tests recently, below panic occurs:
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x8cee00
flags: 0x6fffe0000000000(node=1|zone=2|lastcpupid=0x7fff)
raw: 06fffe0000000000 dead000000000100 dead000000000122 0000000000000000
raw: 0000000000000000 0000000000000009 00000000ffffffff 0000000000000000
page dumped because: VM_BUG_ON_PAGE(!PageBuddy(page))
------------[ cut here ]------------
kernel BUG at include/linux/page-flags.h:1009!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
RIP: 0010:__del_page_from_free_list+0x151/0x180
RSP: 0018:ffffa49c90437998 EFLAGS: 00000046
RAX: 0000000000000035 RBX: 0000000000000009 RCX: ffff8dd8dfd1c9c8
RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff8dd8dfd1c9c0
RBP: ffffd901233b8000 R08: ffffffffab5511f8 R09: 0000000000008c69
R10: 0000000000003c15 R11: ffffffffab5511f8 R12: ffff8dd8fffc0c80
R13: 0000000000000001 R14: ffff8dd8fffc0c80 R15: 0000000000000009
FS: 00007ff916304740(0000) GS:ffff8dd8dfd00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055eae50124c8 CR3: 00000008479e0000 CR4: 00000000000006f0
Call Trace:
<TASK>
__rmqueue_pcplist+0x23b/0x520
get_page_from_freelist+0x26b/0xe40
__alloc_pages_noprof+0x113/0x1120
__folio_alloc_noprof+0x11/0xb0
alloc_buddy_hugetlb_folio.isra.0+0x5a/0x130
__alloc_fresh_hugetlb_folio+0xe7/0x140
alloc_pool_huge_folio+0x68/0x100
set_max_huge_pages+0x13d/0x340
hugetlb_sysctl_handler_common+0xe8/0x110
proc_sys_call_handler+0x194/0x280
vfs_write+0x387/0x550
ksys_write+0x64/0xe0
do_syscall_64+0xc2/0x1d0
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7ff916114887
RSP: 002b:00007ffec8a2fd78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 000055eae500e350 RCX: 00007ff916114887
RDX: 0000000000000004 RSI: 000055eae500e390 RDI: 0000000000000003
RBP: 000055eae50104c0 R08: 0000000000000000 R09: 000055eae50104c0
R10: 0000000000000077 R11: 0000000000000246 R12: 0000000000000004
R13: 0000000000000004 R14: 00007ff916216b80 R15: 00007ff916216a00
</TASK>
Modules linked in: mce_inject hwpoison_inject
---[ end trace 0000000000000000 ]---
And before the panic, there had an warning about bad page state:
BUG: Bad page state in process page-types pfn:8cee00
page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x8cee00
flags: 0x6fffe0000000000(node=1|zone=2|lastcpupid=0x7fff)
page_type: 0xffffff7f(buddy)
raw: 06fffe0000000000 ffffd901241c0008 ffffd901240f8008 0000000000000000
raw: 0000000000000000 0000000000000009 00000000ffffff7f 0000000000000000
page dumped because: nonzero mapcount
Modules linked in: mce_inject hwpoison_inject
CPU: 8 PID: 154211 Comm: page-types Not tainted 6.9.0-rc4-00499-g5544ec3178e2-dirty #22
Call Trace:
<TASK>
dump_stack_lvl+0x83/0xa0
bad_page+0x63/0xf0
free_unref_page+0x36e/0x5c0
unpoison_memory+0x50b/0x630
simple_attr_write_xsigned.constprop.0.isra.0+0xb3/0x110
debugfs_attr_write+0x42/0x60
full_proxy_write+0x5b/0x80
vfs_write+0xcd/0x550
ksys_write+0x64/0xe0
do_syscall_64+0xc2/0x1d0
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f189a514887
RSP: 002b:00007ffdcd899718 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f189a514887
RDX: 0000000000000009 RSI: 00007ffdcd899730 RDI: 0000000000000003
RBP: 00007ffdcd8997a0 R08: 0000000000000000 R09: 00007ffdcd8994b2
R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffdcda199a8
R13: 0000000000404af1 R14: 000000000040ad78 R15: 00007f189a7a5040
</TASK>
The root cause should be the below race:
memory_failure
try_memory_failure_hugetlb
me_huge_page
__page_handle_poison
dissolve_free_hugetlb_folio
drain_all_pages -- Buddy page can be isolated e.g. for compaction.
take_page_off_buddy -- Failed as page is not in the buddy list.
-- Page can be putback into buddy after compaction.
page_ref_inc -- Leads to buddy page with refcnt = 1.
Then unpoison_memory() can unpoison the page and send the buddy page back
into buddy list again leading to the above bad page state warning. And
bad_page() will call page_mapcount_reset() to remove PageBuddy from buddy
page leading to later VM_BUG_ON_PAGE(!PageBuddy(page)) when trying to
allocate this page.
Fix this issue by only treating __page_handle_poison() as successful when
it returns 1.
Link: https://lkml.kernel.org/r/20240523071217.1696196-1-linmiaohe@huawei.com
Fixes: ceaf8fbea79a ("mm, hwpoison: skip raw hwpoison page in freeing 1GB hugepage")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-fix-handling-of-dissolved-but-not-taken-off-from-buddy-pages
+++ a/mm/memory-failure.c
@@ -1221,7 +1221,7 @@ static int me_huge_page(struct page_stat
* subpages.
*/
folio_put(folio);
- if (__page_handle_poison(p) >= 0) {
+ if (__page_handle_poison(p) > 0) {
page_ref_inc(p);
res = MF_RECOVERED;
} else {
@@ -2091,7 +2091,7 @@ retry:
*/
if (res == 0) {
folio_unlock(folio);
- if (__page_handle_poison(p) >= 0) {
+ if (__page_handle_poison(p) > 0) {
page_ref_inc(p);
res = MF_RECOVERED;
} else {
_
Patches currently in -mm which might be from linmiaohe(a)huawei.com are
The quilt patch titled
Subject: mm: /proc/pid/smaps_rollup: avoid skipping vma after getting mmap_lock again
has been removed from the -mm tree. Its filename was
mm-proc-pid-smaps_rollup-avoid-skipping-vma-after-getting-mmap_lock-again.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Yuanyuan Zhong <yzhong(a)purestorage.com>
Subject: mm: /proc/pid/smaps_rollup: avoid skipping vma after getting mmap_lock again
Date: Thu, 23 May 2024 12:35:31 -0600
After switching smaps_rollup to use VMA iterator, searching for next entry
is part of the condition expression of the do-while loop. So the current
VMA needs to be addressed before the continue statement.
Otherwise, with some VMAs skipped, userspace observed memory
consumption from /proc/pid/smaps_rollup will be smaller than the sum of
the corresponding fields from /proc/pid/smaps.
Link: https://lkml.kernel.org/r/20240523183531.2535436-1-yzhong@purestorage.com
Fixes: c4c84f06285e ("fs/proc/task_mmu: stop using linked list and highest_vm_end")
Signed-off-by: Yuanyuan Zhong <yzhong(a)purestorage.com>
Reviewed-by: Mohamed Khalfella <mkhalfella(a)purestorage.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/fs/proc/task_mmu.c~mm-proc-pid-smaps_rollup-avoid-skipping-vma-after-getting-mmap_lock-again
+++ a/fs/proc/task_mmu.c
@@ -970,12 +970,17 @@ static int show_smaps_rollup(struct seq_
break;
/* Case 1 and 2 above */
- if (vma->vm_start >= last_vma_end)
+ if (vma->vm_start >= last_vma_end) {
+ smap_gather_stats(vma, &mss, 0);
+ last_vma_end = vma->vm_end;
continue;
+ }
/* Case 4 above */
- if (vma->vm_end > last_vma_end)
+ if (vma->vm_end > last_vma_end) {
smap_gather_stats(vma, &mss, last_vma_end);
+ last_vma_end = vma->vm_end;
+ }
}
} for_each_vma(vmi, vma);
_
Patches currently in -mm which might be from yzhong(a)purestorage.com are
The quilt patch titled
Subject: nilfs2: fix potential hang in nilfs_detach_log_writer()
has been removed from the -mm tree. Its filename was
nilfs2-fix-potential-hang-in-nilfs_detach_log_writer.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix potential hang in nilfs_detach_log_writer()
Date: Mon, 20 May 2024 22:26:21 +0900
Syzbot has reported a potential hang in nilfs_detach_log_writer() called
during nilfs2 unmount.
Analysis revealed that this is because nilfs_segctor_sync(), which
synchronizes with the log writer thread, can be called after
nilfs_segctor_destroy() terminates that thread, as shown in the call trace
below:
nilfs_detach_log_writer
nilfs_segctor_destroy
nilfs_segctor_kill_thread --> Shut down log writer thread
flush_work
nilfs_iput_work_func
nilfs_dispose_list
iput
nilfs_evict_inode
nilfs_transaction_commit
nilfs_construct_segment (if inode needs sync)
nilfs_segctor_sync --> Attempt to synchronize with
log writer thread
*** DEADLOCK ***
Fix this issue by changing nilfs_segctor_sync() so that the log writer
thread returns normally without synchronizing after it terminates, and by
forcing tasks that are already waiting to complete once after the thread
terminates.
The skipped inode metadata flushout will then be processed together in the
subsequent cleanup work in nilfs_segctor_destroy().
Link: https://lkml.kernel.org/r/20240520132621.4054-4-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+e3973c409251e136fdd0(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=e3973c409251e136fdd0
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Cc: "Bai, Shuangpeng" <sjb7183(a)psu.edu>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
--- a/fs/nilfs2/segment.c~nilfs2-fix-potential-hang-in-nilfs_detach_log_writer
+++ a/fs/nilfs2/segment.c
@@ -2190,6 +2190,14 @@ static int nilfs_segctor_sync(struct nil
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * Synchronize only while the log writer thread is alive.
+ * Leave flushing out after the log writer thread exits to
+ * the cleanup work in nilfs_segctor_destroy().
+ */
+ if (!sci->sc_task)
+ break;
+
if (atomic_read(&wait_req.done)) {
err = wait_req.err;
break;
@@ -2205,7 +2213,7 @@ static int nilfs_segctor_sync(struct nil
return err;
}
-static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
+static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force)
{
struct nilfs_segctor_wait_request *wrq, *n;
unsigned long flags;
@@ -2213,7 +2221,7 @@ static void nilfs_segctor_wakeup(struct
spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
if (!atomic_read(&wrq->done) &&
- nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
+ (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) {
wrq->err = err;
atomic_set(&wrq->done, 1);
}
@@ -2362,7 +2370,7 @@ static void nilfs_segctor_notify(struct
if (mode == SC_LSEG_SR) {
sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
sci->sc_seq_done = sci->sc_seq_accepted;
- nilfs_segctor_wakeup(sci, err);
+ nilfs_segctor_wakeup(sci, err, false);
sci->sc_flush_request = 0;
} else {
if (mode == SC_FLUSH_FILE)
@@ -2746,6 +2754,13 @@ static void nilfs_segctor_destroy(struct
|| sci->sc_seq_request != sci->sc_seq_done);
spin_unlock(&sci->sc_state_lock);
+ /*
+ * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can
+ * be called from delayed iput() via nilfs_evict_inode() and can race
+ * with the above log writer thread termination.
+ */
+ nilfs_segctor_wakeup(sci, 0, true);
+
if (flush_work(&sci->sc_iput_work))
flag = true;
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are