May 2024 - Linux-stable-mirror

FAILED: patch "[PATCH] mm,swapops: update check in is_pfn_swap_entry for hwpoison" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 07a57a338adb6ec9e766d6a6790f76527f45ceb5 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042309-rural-overlying-190b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: 07a57a338adb ("mm,swapops: update check in is_pfn_swap_entry for hwpoison entries") d027122d8363 ("mm/hwpoison: move definitions of num_poisoned_pages_* to memory-failure.c") e591ef7d96d6 ("mm,hwpoison,hugetlb,memory_hotplug: hotremove memory section with hwpoisoned hugepage") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 07a57a338adb6ec9e766d6a6790f76527f45ceb5 Mon Sep 17 00:00:00 2001 From: Oscar Salvador <osalvador(a)suse.de> Date: Sun, 7 Apr 2024 15:05:37 +0200 Subject: [PATCH] mm,swapops: update check in is_pfn_swap_entry for hwpoison entries Tony reported that the Machine check recovery was broken in v6.9-rc1, as he was hitting a VM_BUG_ON when injecting uncorrectable memory errors to DRAM. After some more digging and debugging on his side, he realized that this went back to v6.1, with the introduction of 'commit 0d206b5d2e0d ("mm/swap: add swp_offset_pfn() to fetch PFN from swap entry")'. That commit, among other things, introduced swp_offset_pfn(), replacing hwpoison_entry_to_pfn() in its favour. The patch also introduced a VM_BUG_ON() check for is_pfn_swap_entry(), but is_pfn_swap_entry() never got updated to cover hwpoison entries, which means that we would hit the VM_BUG_ON whenever we would call swp_offset_pfn() for such entries on environments with CONFIG_DEBUG_VM set. Fix this by updating the check to cover hwpoison entries as well, and update the comment while we are it. Link: https://lkml.kernel.org/r/20240407130537.16977-1-osalvador@suse.de Fixes: 0d206b5d2e0d ("mm/swap: add swp_offset_pfn() to fetch PFN from swap entry") Signed-off-by: Oscar Salvador <osalvador(a)suse.de> Reported-by: Tony Luck <tony.luck(a)intel.com> Closes: https://lore.kernel.org/all/Zg8kLSl2yAlA3o5D@agluck-desk3/ Tested-by: Tony Luck <tony.luck(a)intel.com> Reviewed-by: Peter Xu <peterx(a)redhat.com> Reviewed-by: David Hildenbrand <david(a)redhat.com> Acked-by: Miaohe Lin <linmiaohe(a)huawei.com> Cc: <stable(a)vger.kernel.org> [6.1.x] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 48b700ba1d18..a5c560a2f8c2 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) } #endif /* CONFIG_MIGRATION */ +#ifdef CONFIG_MEMORY_FAILURE + +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} + +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) @@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) /* * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They are used to represent unaddressable device memory - * and to restrict access to a page undergoing migration. + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { @@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry) BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry); + is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); } struct page_vma_mapped_walk; @@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#ifdef CONFIG_MEMORY_FAILURE - -/* - * Support for hardware poisoned pages - */ -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - BUG_ON(!PageLocked(page)); - return swp_entry(SWP_HWPOISON, page_to_pfn(page)); -} - -static inline int is_hwpoison_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_HWPOISON; -} - -#else - -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - return swp_entry(0, 0); -} - -static inline int is_hwpoison_entry(swp_entry_t swp) -{ - return 0; -} -#endif - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES;

1 year, 2 months

3
2
0 0

[PATCH 6.6.y] kselftest: Add a ksft_perror() helper

by Edward Liaw

From: Mark Brown <broonie(a)kernel.org> [ Upstream commit 907f33028871fa7c9a3db1efd467b78ef82cce20 ] The standard library perror() function provides a convenient way to print an error message based on the current errno but this doesn't play nicely with KTAP output. Provide a helper which does an equivalent thing in a KTAP compatible format. nolibc doesn't have a strerror() and adding the table of strings required doesn't seem like a good fit for what it's trying to do so when we're using that only print the errno. Signed-off-by: Mark Brown <broonie(a)kernel.org> Reviewed-by: Kees Cook <keescook(a)chromium.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Stable-dep-of: 071af0c9e582 ("selftests: timers: Convert posix_timers test to generate KTAP output") Signed-off-by: Edward Liaw <edliaw(a)google.com> --- tools/testing/selftests/kselftest.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index e8eecbc83a60..ad7b97e16f37 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -48,6 +48,7 @@ #include <stdlib.h> #include <unistd.h> #include <stdarg.h> +#include <string.h> #include <stdio.h> #include <sys/utsname.h> #endif @@ -156,6 +157,19 @@ static inline void ksft_print_msg(const char *msg, ...) va_end(args); } +static inline void ksft_perror(const char *msg) +{ +#ifndef NOLIBC + ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno); +#else + /* + * nolibc doesn't provide strerror() and it seems + * inappropriate to add one, just print the errno. + */ + ksft_print_msg("%s: %d)\n", msg, errno); +#endif +} + static inline void ksft_test_result_pass(const char *msg, ...) { int saved_errno = errno; -- 2.44.0.769.g3c40516874-goog

1 year, 2 months

3
4
0 0

FAILED: patch "[PATCH] mm/hugetlb: fix DEBUG_LOCKS_WARN_ON(1) when" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 52ccdde16b6540abe43b6f8d8e1e1ec90b0983af # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024042912-visibly-carpool-70bd@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: 52ccdde16b65 ("mm/hugetlb: fix DEBUG_LOCKS_WARN_ON(1) when dissolve_free_hugetlb_folio()") 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap") d6ef19e25df2 ("mm/hugetlb: convert update_and_free_page() to folios") cfd5082b5147 ("mm/hugetlb: convert remove_hugetlb_page() to folios") 1a7cdab59b22 ("mm/hugetlb: convert dissolve_free_huge_page() to folios") 911565b82853 ("mm/hugetlb: convert destroy_compound_gigantic_page() to folios") cb67f4282bf9 ("mm,thp,rmap: simplify compound page mapcount handling") dad6a5eb5556 ("mm,hugetlb: use folio fields in second tail page") 0356c4b96f68 ("mm/hugetlb: convert free_huge_page to folios") de656ed376c4 ("mm/hugetlb_cgroup: convert set_hugetlb_cgroup*() to folios") f074732d599e ("mm/hugetlb_cgroup: convert hugetlb_cgroup_from_page() to folios") a098c977722c ("mm/hugetlb_cgroup: convert __set_hugetlb_cgroup() to folios") 4781593d5dba ("mm/hugetlb: unify clearing of RestoreReserve for private pages") 149562f75094 ("mm/hugetlb: add hugetlb_folio_subpool() helpers") d340625f4849 ("mm: add private field of first tail to struct page and struct folio") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 52ccdde16b6540abe43b6f8d8e1e1ec90b0983af Mon Sep 17 00:00:00 2001 From: Miaohe Lin <linmiaohe(a)huawei.com> Date: Fri, 19 Apr 2024 16:58:19 +0800 Subject: [PATCH] mm/hugetlb: fix DEBUG_LOCKS_WARN_ON(1) when dissolve_free_hugetlb_folio() When I did memory failure tests recently, below warning occurs: DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 8 PID: 1011 at kernel/locking/lockdep.c:232 __lock_acquire+0xccb/0x1ca0 Modules linked in: mce_inject hwpoison_inject CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 FS: 00007ff9f32aa740(0000) GS:ffffa1ce5fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff9f3134ba0 CR3: 00000008484e4000 CR4: 00000000000006f0 Call Trace: <TASK> lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 </TASK> Kernel panic - not syncing: kernel: panic_on_warn set ... CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> panic+0x326/0x350 check_panic_on_warn+0x4f/0x50 __warn+0x98/0x190 report_bug+0x18e/0x1a0 handle_bug+0x3d/0x70 exc_invalid_op+0x18/0x70 asm_exc_invalid_op+0x1a/0x20 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 </TASK> After git bisecting and digging into the code, I believe the root cause is that _deferred_list field of folio is unioned with _hugetlb_subpool field. In __update_and_free_hugetlb_folio(), folio->_deferred_list is initialized leading to corrupted folio->_hugetlb_subpool when folio is hugetlb. Later free_huge_folio() will use _hugetlb_subpool and above warning happens. But it is assumed hugetlb flag must have been cleared when calling folio_put() in update_and_free_hugetlb_folio(). This assumption is broken due to below race: CPU1 CPU2 dissolve_free_huge_page update_and_free_pages_bulk update_and_free_hugetlb_folio hugetlb_vmemmap_restore_folios folio_clear_hugetlb_vmemmap_optimized clear_flag = folio_test_hugetlb_vmemmap_optimized if (clear_flag) <-- False, it's already cleared. __folio_clear_hugetlb(folio) <-- Hugetlb is not cleared. folio_put free_huge_folio <-- free_the_page is expected. list_for_each_entry() __folio_clear_hugetlb <-- Too late. Fix this issue by checking whether folio is hugetlb directly instead of checking clear_flag to close the race window. Link: https://lkml.kernel.org/r/20240419085819.1901645-1-linmiaohe@huawei.com Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap") Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com> Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 05371bf54f96..ce7be5c24442 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1781,7 +1781,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, * If vmemmap pages were allocated above, then we need to clear the * hugetlb destructor under the hugetlb lock. */ - if (clear_dtor) { + if (folio_test_hugetlb(folio)) { spin_lock_irq(&hugetlb_lock); __clear_hugetlb_destructor(h, folio); spin_unlock_irq(&hugetlb_lock);

1 year, 2 months

4
5
0 0

[PATCH 6.6.x] btrfs: do not wait for short bulk allocation

by David Sterba

From: Qu Wenruo <wqu(a)suse.com> commit 1db7959aacd905e6487d0478ac01d89f86eb1e51 upstream. [BUG] There is a recent report that when memory pressure is high (including cached pages), btrfs can spend most of its time on memory allocation in btrfs_alloc_page_array() for compressed read/write. [CAUSE] For btrfs_alloc_page_array() we always go alloc_pages_bulk_array(), and even if the bulk allocation failed (fell back to single page allocation) we still retry but with extra memalloc_retry_wait(). If the bulk alloc only returned one page a time, we would spend a lot of time on the retry wait. The behavior was introduced in commit 395cb57e8560 ("btrfs: wait between incomplete batch memory allocations"). [FIX] Although the commit mentioned that other filesystems do the wait, it's not the case at least nowadays. All the mainlined filesystems only call memalloc_retry_wait() if they failed to allocate any page (not only for bulk allocation). If there is any progress, they won't call memalloc_retry_wait() at all. For example, xfs_buf_alloc_pages() would only call memalloc_retry_wait() if there is no allocation progress at all, and the call is not for metadata readahead. So I don't believe we should call memalloc_retry_wait() unconditionally for short allocation. Call memalloc_retry_wait() if it fails to allocate any page for tree block allocation (which goes with __GFP_NOFAIL and may not need the special handling anyway), and reduce the latency for btrfs_alloc_page_array(). Reported-by: Julian Taylor <julian.taylor(a)1und1.de> Tested-by: Julian Taylor <julian.taylor(a)1und1.de> Link: https://lore.kernel.org/all/8966c095-cbe7-4d22-9784-a647d1bf27c3@1und1.de/ Fixes: 395cb57e8560 ("btrfs: wait between incomplete batch memory allocations") CC: stable(a)vger.kernel.org # 6.1+ Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me> Reviewed-by: Filipe Manana <fdmanana(a)suse.com> Signed-off-by: Qu Wenruo <wqu(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> --- fs/btrfs/extent_io.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5acb2cb79d4b..9fbffd84b16c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -686,24 +686,14 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) unsigned int last = allocated; allocated = alloc_pages_bulk_array(GFP_NOFS, nr_pages, page_array); - - if (allocated == nr_pages) - return 0; - - /* - * During this iteration, no page could be allocated, even - * though alloc_pages_bulk_array() falls back to alloc_page() - * if it could not bulk-allocate. So we must be out of memory. - */ - if (allocated == last) { + if (unlikely(allocated == last)) { + /* No progress, fail and do cleanup. */ for (int i = 0; i < allocated; i++) { __free_page(page_array[i]); page_array[i] = NULL; } return -ENOMEM; } - - memalloc_retry_wait(GFP_NOFS); } return 0; } -- 2.45.0

1 year, 2 months

2
1
0 0

[alternative-merged] mm-huge_memory-mark-huge_zero_page-reserved.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/huge_memory: mark huge_zero_page reserved has been removed from the -mm tree. Its filename was mm-huge_memory-mark-huge_zero_page-reserved.patch This patch was dropped because an alternative patch was or shall be merged ------------------------------------------------------ From: Miaohe Lin <linmiaohe(a)huawei.com> Subject: mm/huge_memory: mark huge_zero_page reserved Date: Sat, 11 May 2024 11:54:35 +0800 When I did memory failure tests recently, below panic occurs: kernel BUG at include/linux/mm.h:1135! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 9 PID: 137 Comm: kswapd1 Not tainted 6.9.0-rc4-00491-gd5ce28f156fe-dirty #14 RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 Call Trace: <TASK> do_shrink_slab+0x14f/0x6a0 shrink_slab+0xca/0x8c0 shrink_node+0x2d0/0x7d0 balance_pgdat+0x33a/0x720 kswapd+0x1f3/0x410 kthread+0xd5/0x100 ret_from_fork+0x2f/0x50 ret_from_fork_asm+0x1a/0x30 </TASK> Modules linked in: mce_inject hwpoison_inject ---[ end trace 0000000000000000 ]--- RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 The root cause is that HWPoison flag will be set for huge_zero_page without increasing the page refcnt. But then unpoison_memory() will decrease the page refcnt unexpectly as it appears like a successfully hwpoisoned page leading to VM_BUG_ON_PAGE(page_ref_count(page) == 0) when releasing huge_zero_page. Fix this issue by marking huge_zero_page reserved. So unpoison_memory() will skip this page. This will make it consistent with ZERO_PAGE case too. Link: https://lkml.kernel.org/r/20240511035435.1477004-1-linmiaohe@huawei.com Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page") Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org> Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com> Cc: Xu Yu <xuyu(a)linux.alibaba.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/huge_memory.c | 2 ++ 1 file changed, 2 insertions(+) --- a/mm/huge_memory.c~mm-huge_memory-mark-huge_zero_page-reserved +++ a/mm/huge_memory.c @@ -212,6 +212,7 @@ retry: folio_put(zero_folio); goto retry; } + __SetPageReserved(zero_page); WRITE_ONCE(huge_zero_pfn, folio_pfn(zero_folio)); /* We take additional reference here. It will be put back by shrinker */ @@ -264,6 +265,7 @@ static unsigned long shrink_huge_zero_pa struct folio *zero_folio = xchg(&huge_zero_folio, NULL); BUG_ON(zero_folio == NULL); WRITE_ONCE(huge_zero_pfn, ~0UL); + __ClearPageReserved(zero_page); folio_put(zero_folio); return HPAGE_PMD_NR; } _ Patches currently in -mm which might be from linmiaohe(a)huawei.com are

1 year, 2 months

1
0
0 0

+ mm-huge_memory-mark-huge_zero_page-reserved.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: mm/huge_memory: mark huge_zero_page reserved has been added to the -mm mm-hotfixes-unstable branch. Its filename is mm-huge_memory-mark-huge_zero_page-reserved.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Miaohe Lin <linmiaohe(a)huawei.com> Subject: mm/huge_memory: mark huge_zero_page reserved Date: Sat, 11 May 2024 11:54:35 +0800 When I did memory failure tests recently, below panic occurs: kernel BUG at include/linux/mm.h:1135! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 9 PID: 137 Comm: kswapd1 Not tainted 6.9.0-rc4-00491-gd5ce28f156fe-dirty #14 RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 Call Trace: <TASK> do_shrink_slab+0x14f/0x6a0 shrink_slab+0xca/0x8c0 shrink_node+0x2d0/0x7d0 balance_pgdat+0x33a/0x720 kswapd+0x1f3/0x410 kthread+0xd5/0x100 ret_from_fork+0x2f/0x50 ret_from_fork_asm+0x1a/0x30 </TASK> Modules linked in: mce_inject hwpoison_inject ---[ end trace 0000000000000000 ]--- RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 The root cause is that HWPoison flag will be set for huge_zero_page without increasing the page refcnt. But then unpoison_memory() will decrease the page refcnt unexpectly as it appears like a successfully hwpoisoned page leading to VM_BUG_ON_PAGE(page_ref_count(page) == 0) when releasing huge_zero_page. Fix this issue by marking huge_zero_page reserved. So unpoison_memory() will skip this page. This will make it consistent with ZERO_PAGE case too. Link: https://lkml.kernel.org/r/20240511035435.1477004-1-linmiaohe@huawei.com Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page") Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com> Cc: Xu Yu <xuyu(a)linux.alibaba.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/huge_memory.c | 2 ++ 1 file changed, 2 insertions(+) --- a/mm/huge_memory.c~mm-huge_memory-mark-huge_zero_page-reserved +++ a/mm/huge_memory.c @@ -208,6 +208,7 @@ retry: __free_pages(zero_page, compound_order(zero_page)); goto retry; } + __SetPageReserved(zero_page); WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); /* We take additional reference here. It will be put back by shrinker */ @@ -260,6 +261,7 @@ static unsigned long shrink_huge_zero_pa struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); WRITE_ONCE(huge_zero_pfn, ~0UL); + __ClearPageReserved(zero_page); __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } _ Patches currently in -mm which might be from linmiaohe(a)huawei.com are mm-huge_memory-mark-huge_zero_page-reserved.patch

1 year, 2 months

1
0
0 0

+ mm-huge_memory-mark-huge_zero_page-reserved.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: mm/huge_memory: mark huge_zero_page reserved has been added to the -mm mm-hotfixes-unstable branch. Its filename is mm-huge_memory-mark-huge_zero_page-reserved.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Miaohe Lin <linmiaohe(a)huawei.com> Subject: mm/huge_memory: mark huge_zero_page reserved Date: Sat, 11 May 2024 11:54:35 +0800 When I did memory failure tests recently, below panic occurs: kernel BUG at include/linux/mm.h:1135! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 9 PID: 137 Comm: kswapd1 Not tainted 6.9.0-rc4-00491-gd5ce28f156fe-dirty #14 RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 Call Trace: <TASK> do_shrink_slab+0x14f/0x6a0 shrink_slab+0xca/0x8c0 shrink_node+0x2d0/0x7d0 balance_pgdat+0x33a/0x720 kswapd+0x1f3/0x410 kthread+0xd5/0x100 ret_from_fork+0x2f/0x50 ret_from_fork_asm+0x1a/0x30 </TASK> Modules linked in: mce_inject hwpoison_inject ---[ end trace 0000000000000000 ]--- RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 The root cause is that HWPoison flag will be set for huge_zero_page without increasing the page refcnt. But then unpoison_memory() will decrease the page refcnt unexpectly as it appears like a successfully hwpoisoned page leading to VM_BUG_ON_PAGE(page_ref_count(page) == 0) when releasing huge_zero_page. Fix this issue by marking huge_zero_page reserved. So unpoison_memory() will skip this page. This will make it consistent with ZERO_PAGE case too. Link: https://lkml.kernel.org/r/20240511035435.1477004-1-linmiaohe@huawei.com Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page") Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com> Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com> Cc: Xu Yu <xuyu(a)linux.alibaba.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/huge_memory.c | 2 ++ 1 file changed, 2 insertions(+) --- a/mm/huge_memory.c~mm-huge_memory-mark-huge_zero_page-reserved +++ a/mm/huge_memory.c @@ -208,6 +208,7 @@ retry: __free_pages(zero_page, compound_order(zero_page)); goto retry; } + __SetPageReserved(zero_page); WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); /* We take additional reference here. It will be put back by shrinker */ @@ -260,6 +261,7 @@ static unsigned long shrink_huge_zero_pa struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); WRITE_ONCE(huge_zero_pfn, ~0UL); + __ClearPageReserved(zero_page); __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } _ Patches currently in -mm which might be from linmiaohe(a)huawei.com are mm-huge_memory-mark-huge_zero_page-reserved.patch

1 year, 2 months

1
0
0 0

[PATCH 6.1.x] btrfs: do not wait for short bulk allocation

by David Sterba

From: Qu Wenruo <wqu(a)suse.com> commit 1db7959aacd905e6487d0478ac01d89f86eb1e51 upstream. [BUG] There is a recent report that when memory pressure is high (including cached pages), btrfs can spend most of its time on memory allocation in btrfs_alloc_page_array() for compressed read/write. [CAUSE] For btrfs_alloc_page_array() we always go alloc_pages_bulk_array(), and even if the bulk allocation failed (fell back to single page allocation) we still retry but with extra memalloc_retry_wait(). If the bulk alloc only returned one page a time, we would spend a lot of time on the retry wait. The behavior was introduced in commit 395cb57e8560 ("btrfs: wait between incomplete batch memory allocations"). [FIX] Although the commit mentioned that other filesystems do the wait, it's not the case at least nowadays. All the mainlined filesystems only call memalloc_retry_wait() if they failed to allocate any page (not only for bulk allocation). If there is any progress, they won't call memalloc_retry_wait() at all. For example, xfs_buf_alloc_pages() would only call memalloc_retry_wait() if there is no allocation progress at all, and the call is not for metadata readahead. So I don't believe we should call memalloc_retry_wait() unconditionally for short allocation. Call memalloc_retry_wait() if it fails to allocate any page for tree block allocation (which goes with __GFP_NOFAIL and may not need the special handling anyway), and reduce the latency for btrfs_alloc_page_array(). Reported-by: Julian Taylor <julian.taylor(a)1und1.de> Tested-by: Julian Taylor <julian.taylor(a)1und1.de> Link: https://lore.kernel.org/all/8966c095-cbe7-4d22-9784-a647d1bf27c3@1und1.de/ Fixes: 395cb57e8560 ("btrfs: wait between incomplete batch memory allocations") CC: stable(a)vger.kernel.org # 6.1+ Reviewed-by: Sweet Tea Dorminy <sweettea-kernel(a)dorminy.me> Reviewed-by: Filipe Manana <fdmanana(a)suse.com> Signed-off-by: Qu Wenruo <wqu(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> --- fs/btrfs/extent_io.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 539bc9bdcb93..5f923c9b773e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1324,19 +1324,14 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) unsigned int last = allocated; allocated = alloc_pages_bulk_array(GFP_NOFS, nr_pages, page_array); - - if (allocated == nr_pages) - return 0; - - /* - * During this iteration, no page could be allocated, even - * though alloc_pages_bulk_array() falls back to alloc_page() - * if it could not bulk-allocate. So we must be out of memory. - */ - if (allocated == last) + if (unlikely(allocated == last)) { + /* No progress, fail and do cleanup. */ + for (int i = 0; i < allocated; i++) { + __free_page(page_array[i]); + page_array[i] = NULL; + } return -ENOMEM; - - memalloc_retry_wait(GFP_NOFS); + } } return 0; } -- 2.45.0

1 year, 2 months

1
0
0 0

[for-next][PATCH 7/7] eventfs: Fix a possible null pointer dereference in eventfs_find_events()

by Steven Rostedt

From: Hao Ge <gehao(a)kylinos.cn> In function eventfs_find_events,there is a potential null pointer that may be caused by calling update_events_attr which will perform some operations on the members of the ei struct when ei is NULL. Hence,When ei->is_freed is set,return NULL directly. Link: https://lore.kernel.org/linux-trace-kernel/20240513053338.63017-1-hao.ge@li… Cc: stable(a)vger.kernel.org Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Hao Ge <gehao(a)kylinos.cn> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> --- fs/tracefs/event_inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index a878cea70f4c..0256afdd4acf 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -345,10 +345,9 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) * If the ei is being freed, the ownership of the children * doesn't matter. */ - if (ei->is_freed) { - ei = NULL; - break; - } + if (ei->is_freed) + return NULL; + // Walk upwards until you find the events inode } while (!ei->is_events); -- 2.43.0

1 year, 2 months

1
0
0 0

[for-next][PATCH 6/7] ftrace: Fix possible use-after-free issue in ftrace_location()

by Steven Rostedt

From: Zheng Yejian <zhengyejian1(a)huawei.com> KASAN reports a bug: BUG: KASAN: use-after-free in ftrace_location+0x90/0x120 Read of size 8 at addr ffff888141d40010 by task insmod/424 CPU: 8 PID: 424 Comm: insmod Tainted: G W 6.9.0-rc2+ [...] Call Trace: <TASK> dump_stack_lvl+0x68/0xa0 print_report+0xcf/0x610 kasan_report+0xb5/0xe0 ftrace_location+0x90/0x120 register_kprobe+0x14b/0xa40 kprobe_init+0x2d/0xff0 [kprobe_example] do_one_initcall+0x8f/0x2d0 do_init_module+0x13a/0x3c0 load_module+0x3082/0x33d0 init_module_from_file+0xd2/0x130 __x64_sys_finit_module+0x306/0x440 do_syscall_64+0x68/0x140 entry_SYSCALL_64_after_hwframe+0x71/0x79 The root cause is that, in lookup_rec(), ftrace record of some address is being searched in ftrace pages of some module, but those ftrace pages at the same time is being freed in ftrace_release_mod() as the corresponding module is being deleted: CPU1 | CPU2 register_kprobes() { | delete_module() { check_kprobe_address_safe() { | arch_check_ftrace_location() { | ftrace_location() { | lookup_rec() // USE! | ftrace_release_mod() // Free! To fix this issue: 1. Hold rcu lock as accessing ftrace pages in ftrace_location_range(); 2. Use ftrace_location_range() instead of lookup_rec() in ftrace_location(); 3. Call synchronize_rcu() before freeing any ftrace pages both in ftrace_process_locs()/ftrace_release_mod()/ftrace_free_mem(). Link: https://lore.kernel.org/linux-trace-kernel/20240509192859.1273558-1-zhengye… Cc: stable(a)vger.kernel.org Cc: <mhiramat(a)kernel.org> Cc: <mark.rutland(a)arm.com> Cc: <mathieu.desnoyers(a)efficios.com> Fixes: ae6aa16fdc16 ("kprobes: introduce ftrace based optimization") Suggested-by: Steven Rostedt <rostedt(a)goodmis.org> Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> --- kernel/trace/ftrace.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5a01d72f66db..2308c0a2fd29 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1595,12 +1595,15 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) unsigned long ftrace_location_range(unsigned long start, unsigned long end) { struct dyn_ftrace *rec; + unsigned long ip = 0; + rcu_read_lock(); rec = lookup_rec(start, end); if (rec) - return rec->ip; + ip = rec->ip; + rcu_read_unlock(); - return 0; + return ip; } /** @@ -1614,25 +1617,22 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) */ unsigned long ftrace_location(unsigned long ip) { - struct dyn_ftrace *rec; + unsigned long loc; unsigned long offset; unsigned long size; - rec = lookup_rec(ip, ip); - if (!rec) { + loc = ftrace_location_range(ip, ip); + if (!loc) { if (!kallsyms_lookup_size_offset(ip, &size, &offset)) goto out; /* map sym+0 to __fentry__ */ if (!offset) - rec = lookup_rec(ip, ip + size - 1); + loc = ftrace_location_range(ip, ip + size - 1); } - if (rec) - return rec->ip; - out: - return 0; + return loc; } /** @@ -6591,6 +6591,8 @@ static int ftrace_process_locs(struct module *mod, /* We should have used all pages unless we skipped some */ if (pg_unuse) { WARN_ON(!skipped); + /* Need to synchronize with ftrace_location_range() */ + synchronize_rcu(); ftrace_free_pages(pg_unuse); } return ret; @@ -6804,6 +6806,9 @@ void ftrace_release_mod(struct module *mod) out_unlock: mutex_unlock(&ftrace_lock); + /* Need to synchronize with ftrace_location_range() */ + if (tmp_page) + synchronize_rcu(); for (pg = tmp_page; pg; pg = tmp_page) { /* Needs to be called outside of ftrace_lock */ @@ -7137,6 +7142,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) unsigned long start = (unsigned long)(start_ptr); unsigned long end = (unsigned long)(end_ptr); struct ftrace_page **last_pg = &ftrace_pages_start; + struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; struct dyn_ftrace *rec; struct dyn_ftrace key; @@ -7178,12 +7184,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) ftrace_update_tot_cnt--; if (!pg->index) { *last_pg = pg->next; - if (pg->records) { - free_pages((unsigned long)pg->records, pg->order); - ftrace_number_of_pages -= 1 << pg->order; - } - ftrace_number_of_groups--; - kfree(pg); + pg->next = tmp_page; + tmp_page = pg; pg = container_of(last_pg, struct ftrace_page, next); if (!(*last_pg)) ftrace_pages = pg; @@ -7200,6 +7202,11 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) clear_func_from_hashes(func); kfree(func); } + /* Need to synchronize with ftrace_location_range() */ + if (tmp_page) { + synchronize_rcu(); + ftrace_free_pages(tmp_page); + } } void __init ftrace_free_init_mem(void) -- 2.43.0

1 year, 2 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror May 2024