August 2024 - Linux-stable-mirror

[PATCH 6.2.y] jfs: define xtree root and page independently

by Sergio González Collado

From: Dave Kleikamp <dave.kleikamp(a)oracle.com> [ Upstream commit a779ed754e52d582b8c0e17959df063108bd0656 ] In order to make array bounds checking sane, provide a separate definition of the in-inode xtree root and the external xtree page. Signed-off-by: Dave Kleikamp <dave.kleikamp(a)oracle.com> Tested-by: Manas Ghandat <ghandatmanas(a)gmail.com> (cherry picked from commit a779ed754e52d582b8c0e17959df063108bd0656) Signed-off-by: Sergio González Collado <sergio.collado(a)gmail.com> --- fs/jfs/jfs_dinode.h | 2 +- fs/jfs/jfs_imap.c | 6 +++--- fs/jfs/jfs_incore.h | 2 +- fs/jfs/jfs_txnmgr.c | 4 ++-- fs/jfs/jfs_xtree.c | 4 ++-- fs/jfs/jfs_xtree.h | 37 +++++++++++++++++++++++-------------- 6 files changed, 32 insertions(+), 23 deletions(-) diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 6b231d0d0071..603aae17a693 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -96,7 +96,7 @@ struct dinode { #define di_gengen u._file._u1._imap._gengen union { - xtpage_t _xtroot; + xtroot_t _xtroot; struct { u8 unused[16]; /* 16: */ dxd_t _dxd; /* 16: */ diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 390cbfce391f..85918c0d852b 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -669,7 +669,7 @@ int diWrite(tid_t tid, struct inode *ip) * This is the special xtree inside the directory for storing * the directory table */ - xtpage_t *p, *xp; + xtroot_t *p, *xp; xad_t *xad; jfs_ip->xtlid = 0; @@ -683,7 +683,7 @@ int diWrite(tid_t tid, struct inode *ip) * copy xtree root from inode to dinode: */ p = &jfs_ip->i_xtroot; - xp = (xtpage_t *) &dp->di_dirtable; + xp = (xtroot_t *) &dp->di_dirtable; lv = ilinelock->lv; for (n = 0; n < ilinelock->index; n++, lv++) { memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], @@ -712,7 +712,7 @@ int diWrite(tid_t tid, struct inode *ip) * regular file: 16 byte (XAD slot) granularity */ if (type & tlckXTREE) { - xtpage_t *p, *xp; + xtroot_t *p, *xp; xad_t *xad; /* diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 721def69e732..dd4264aa9bed 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -66,7 +66,7 @@ struct jfs_inode_info { lid_t xtlid; /* lid of xtree lock on directory */ union { struct { - xtpage_t _xtroot; /* 288: xtree root */ + xtroot_t _xtroot; /* 288: xtree root */ struct inomap *_imap; /* 4: inode map header */ } file; struct { diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index ffd4feece078..997cc1b9e628 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -778,7 +778,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, if (mp->xflag & COMMIT_PAGE) p = (xtpage_t *) mp->data; else - p = &jfs_ip->i_xtroot; + p = (xtpage_t *) &jfs_ip->i_xtroot; xtlck->lwm.offset = le16_to_cpu(p->header.nextindex); } @@ -1671,7 +1671,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, if (tlck->type & tlckBTROOT) { lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); - p = &JFS_IP(ip)->i_xtroot; + p = (xtpage_t *) &JFS_IP(ip)->i_xtroot; if (S_ISDIR(ip->i_mode)) lrd->log.redopage.type |= cpu_to_le16(LOG_DIR_XTREE); diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 2d304cee884c..5ee618d17e77 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -1213,7 +1213,7 @@ xtSplitRoot(tid_t tid, struct xtlock *xtlck; int rc; - sp = &JFS_IP(ip)->i_xtroot; + sp = (xtpage_t *) &JFS_IP(ip)->i_xtroot; INCREMENT(xtStat.split); @@ -2098,7 +2098,7 @@ int xtAppend(tid_t tid, /* transaction id */ */ void xtInitRoot(tid_t tid, struct inode *ip) { - xtpage_t *p; + xtroot_t *p; /* * acquire a transaction lock on the root diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h index ad7592191d76..0f6cf5a1ce75 100644 --- a/fs/jfs/jfs_xtree.h +++ b/fs/jfs/jfs_xtree.h @@ -65,24 +65,33 @@ struct xadlist { #define XTPAGEMAXSLOT 256 #define XTENTRYSTART 2 -/* - * xtree page: - */ -typedef union { - struct xtheader { - __le64 next; /* 8: */ - __le64 prev; /* 8: */ +struct xtheader { + __le64 next; /* 8: */ + __le64 prev; /* 8: */ - u8 flag; /* 1: */ - u8 rsrvd1; /* 1: */ - __le16 nextindex; /* 2: next index = number of entries */ - __le16 maxentry; /* 2: max number of entries */ - __le16 rsrvd2; /* 2: */ + u8 flag; /* 1: */ + u8 rsrvd1; /* 1: */ + __le16 nextindex; /* 2: next index = number of entries */ + __le16 maxentry; /* 2: max number of entries */ + __le16 rsrvd2; /* 2: */ - pxd_t self; /* 8: self */ - } header; /* (32) */ + pxd_t self; /* 8: self */ +}; +/* + * xtree root (in inode): + */ +typedef union { + struct xtheader header; xad_t xad[XTROOTMAXSLOT]; /* 16 * maxentry: xad array */ +} xtroot_t; + +/* + * xtree page: + */ +typedef union { + struct xtheader header; + xad_t xad[XTPAGEMAXSLOT]; /* 16 * maxentry: xad array */ } xtpage_t; /* base-commit: 46df6964c1a9eb72027710f626cb1c6bfb5d58c9 -- 2.39.2

10 months, 2 weeks

2
1
0 0

[PATCH 1/2] usb: dwc3: omap: add missing depopulate in probe error path

by Krzysztof Kozlowski

Depopulate device in probe error paths to fix leak of children resources. Fixes: ee249b455494 ("usb: dwc3: omap: remove IRQ_NOAUTOEN used with shared irq") Cc: <stable(a)vger.kernel.org> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> --- drivers/usb/dwc3/dwc3-omap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index d5c77db4daa9..2a11fc0ee84f 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -522,11 +522,13 @@ static int dwc3_omap_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to request IRQ #%d --> %d\n", omap->irq, ret); - goto err1; + goto err2; } dwc3_omap_enable_irqs(omap); return 0; +err2: + of_platform_depopulate(dev); err1: pm_runtime_put_sync(dev); pm_runtime_disable(dev); -- 2.43.0

10 months, 2 weeks

3
5
0 0

[merged mm-hotfixes-stable] alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: alloc_tag: mark pages reserved during CMA activation as not tagged has been removed from the -mm tree. Its filename was alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Suren Baghdasaryan <surenb(a)google.com> Subject: alloc_tag: mark pages reserved during CMA activation as not tagged Date: Tue, 13 Aug 2024 08:07:57 -0700 During CMA activation, pages in CMA area are prepared and then freed without being allocated. This triggers warnings when memory allocation debug config (CONFIG_MEM_ALLOC_PROFILING_DEBUG) is enabled. Fix this by marking these pages not tagged before freeing them. Link: https://lkml.kernel.org/r/20240813150758.855881-2-surenb@google.com Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Suren Baghdasaryan <surenb(a)google.com> Acked-by: David Hildenbrand <david(a)redhat.com> Cc: Kees Cook <keescook(a)chromium.org> Cc: Kent Overstreet <kent.overstreet(a)linux.dev> Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com> Cc: Sourav Panda <souravpanda(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: <stable(a)vger.kernel.org> [6.10] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mm_init.c | 2 ++ 1 file changed, 2 insertions(+) --- a/mm/mm_init.c~alloc_tag-mark-pages-reserved-during-cma-activation-as-not-tagged +++ a/mm/mm_init.c @@ -2244,6 +2244,8 @@ void __init init_cma_reserved_pageblock( set_pageblock_migratetype(page, MIGRATE_CMA); set_page_refcounted(page); + /* pages were reserved and not allocated */ + clear_page_tag_ref(page); __free_pages(page, pageblock_order); adjust_managed_page_count(page, pageblock_nr_pages); _ Patches currently in -mm which might be from surenb(a)google.com are

10 months, 2 weeks

1
0
0 0

[merged mm-hotfixes-stable] alloc_tag-introduce-clear_page_tag_ref-helper-function.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: alloc_tag: introduce clear_page_tag_ref() helper function has been removed from the -mm tree. Its filename was alloc_tag-introduce-clear_page_tag_ref-helper-function.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Suren Baghdasaryan <surenb(a)google.com> Subject: alloc_tag: introduce clear_page_tag_ref() helper function Date: Tue, 13 Aug 2024 08:07:56 -0700 In several cases we are freeing pages which were not allocated using common page allocators. For such cases, in order to keep allocation accounting correct, we should clear the page tag to indicate that the page being freed is expected to not have a valid allocation tag. Introduce clear_page_tag_ref() helper function to be used for this. Link: https://lkml.kernel.org/r/20240813150758.855881-1-surenb@google.com Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Suren Baghdasaryan <surenb(a)google.com> Suggested-by: David Hildenbrand <david(a)redhat.com> Acked-by: David Hildenbrand <david(a)redhat.com> Reviewed-by: Pasha Tatashin <pasha.tatashin(a)soleen.com> Cc: Kees Cook <keescook(a)chromium.org> Cc: Kent Overstreet <kent.overstreet(a)linux.dev> Cc: Sourav Panda <souravpanda(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: <stable(a)vger.kernel.org> [6.10] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/pgalloc_tag.h | 13 +++++++++++++ mm/mm_init.c | 10 +--------- mm/page_alloc.c | 9 +-------- 3 files changed, 15 insertions(+), 17 deletions(-) --- a/include/linux/pgalloc_tag.h~alloc_tag-introduce-clear_page_tag_ref-helper-function +++ a/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(unio page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} --- a/mm/mm_init.c~alloc_tag-introduce-clear_page_tag_ref-helper-function +++ a/mm/mm_init.c @@ -2459,15 +2459,7 @@ void __init memblock_free_pages(struct p } /* pages were reserved and not allocated */ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - + clear_page_tag_ref(page); __free_pages_core(page, order, MEMINIT_EARLY); } --- a/mm/page_alloc.c~alloc_tag-introduce-clear_page_tag_ref-helper-function +++ a/mm/page_alloc.c @@ -5815,14 +5815,7 @@ unsigned long free_reserved_area(void *s void free_reserved_page(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } + clear_page_tag_ref(page); ClearPageReserved(page); init_page_count(page); __free_page(page); _ Patches currently in -mm which might be from surenb(a)google.com are

10 months, 2 weeks

1
0
0 0

[merged mm-hotfixes-stable] selftests-memfd_secret-dont-build-memfd_secret-test-on-unsupported-arches.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: selftests: memfd_secret: don't build memfd_secret test on unsupported arches has been removed from the -mm tree. Its filename was selftests-memfd_secret-dont-build-memfd_secret-test-on-unsupported-arches.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Muhammad Usama Anjum <usama.anjum(a)collabora.com> Subject: selftests: memfd_secret: don't build memfd_secret test on unsupported arches Date: Fri, 9 Aug 2024 12:56:42 +0500 [1] mentions that memfd_secret is only supported on arm64, riscv, x86 and x86_64 for now. It doesn't support other architectures. I found the build error on arm and decided to send the fix as it was creating noise on KernelCI: memfd_secret.c: In function 'memfd_secret': memfd_secret.c:42:24: error: '__NR_memfd_secret' undeclared (first use in this function); did you mean 'memfd_secret'? 42 | return syscall(__NR_memfd_secret, flags); | ^~~~~~~~~~~~~~~~~ | memfd_secret Hence I'm adding condition that memfd_secret should only be compiled on supported architectures. Also check in run_vmtests script if memfd_secret binary is present before executing it. Link: https://lkml.kernel.org/r/20240812061522.1933054-1-usama.anjum@collabora.com Link: https://lore.kernel.org/all/20210518072034.31572-7-rppt@kernel.org/ [1] Link: https://lkml.kernel.org/r/20240809075642.403247-1-usama.anjum@collabora.com Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)") Signed-off-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com> Reviewed-by: Shuah Khan <skhan(a)linuxfoundation.org> Acked-by: Mike Rapoport (Microsoft) <rppt(a)kernel.org> Cc: Albert Ou <aou(a)eecs.berkeley.edu> Cc: James Bottomley <James.Bottomley(a)HansenPartnership.com> Cc: Mike Rapoport (Microsoft) <rppt(a)kernel.org> Cc: Palmer Dabbelt <palmer(a)dabbelt.com> Cc: Paul Walmsley <paul.walmsley(a)sifive.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- tools/testing/selftests/mm/Makefile | 2 ++ tools/testing/selftests/mm/run_vmtests.sh | 3 +++ 2 files changed, 5 insertions(+) --- a/tools/testing/selftests/mm/Makefile~selftests-memfd_secret-dont-build-memfd_secret-test-on-unsupported-arches +++ a/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test --- a/tools/testing/selftests/mm/run_vmtests.sh~selftests-memfd_secret-dont-build-memfd_secret-test-on-unsupported-arches +++ a/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm. # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 _ Patches currently in -mm which might be from usama.anjum(a)collabora.com are selftests-mm-fix-build-errors-on-armhf.patch

10 months, 2 weeks

1
0
0 0

[merged mm-hotfixes-stable] mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: fix endless reclaim on machines with unaccepted memory has been removed from the -mm tree. Its filename was mm-fix-endless-reclaim-on-machines-with-unaccepted-memory.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com> Subject: mm: fix endless reclaim on machines with unaccepted memory Date: Fri, 9 Aug 2024 14:48:47 +0300 Unaccepted memory is considered unusable free memory, which is not counted as free on the zone watermark check. This causes get_page_from_freelist() to accept more memory to hit the high watermark, but it creates problems in the reclaim path. The reclaim path encounters a failed zone watermark check and attempts to reclaim memory. This is usually successful, but if there is little or no reclaimable memory, it can result in endless reclaim with little to no progress. This can occur early in the boot process, just after start of the init process when the only reclaimable memory is the page cache of the init executable and its libraries. Make unaccepted memory free from watermark check point of view. This way unaccepted memory will never be the trigger of memory reclaim. Accept more memory in the get_page_from_freelist() if needed. Link: https://lkml.kernel.org/r/20240809114854.3745464-2-kirill.shutemov@linux.in… Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reported-by: Jianxiong Gao <jxgao(a)google.com> Acked-by: David Hildenbrand <david(a)redhat.com> Tested-by: Jianxiong Gao <jxgao(a)google.com> Cc: Borislav Petkov <bp(a)alien8.de> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Mel Gorman <mgorman(a)suse.de> Cc: Mike Rapoport (Microsoft) <rppt(a)kernel.org> Cc: Tom Lendacky <thomas.lendacky(a)amd.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: <stable(a)vger.kernel.org> [6.5+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/page_alloc.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) --- a/mm/page_alloc.c~mm-fix-endless-reclaim-on-machines-with-unaccepted-memory +++ a/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unus if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3368,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3393,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3450,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -6950,9 +6945,6 @@ static bool try_to_accept_memory_one(str struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6978,23 +6970,29 @@ static bool try_to_accept_memory_one(str return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7037,7 +7035,7 @@ static void accept_page(struct page *pag { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } _ Patches currently in -mm which might be from kirill.shutemov(a)linux.intel.com are mm-reduce-deferred-struct-page-init-ifdeffery.patch mm-accept-memory-in-__alloc_pages_bulk.patch mm-introduce-pageunaccepted-page-type.patch mm-rework-accept-memory-helpers.patch mm-add-a-helper-to-accept-page.patch mm-page_isolation-handle-unaccepted-memory-isolation.patch mm-accept-to-promo-watermark.patch

10 months, 2 weeks

1
0
0 0

[merged mm-hotfixes-stable] mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/numa: no task_numa_fault() call if PMD is changed has been removed from the -mm tree. Its filename was mm-numa-no-task_numa_fault-call-if-pmd-is-changed.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Zi Yan <ziy(a)nvidia.com> Subject: mm/numa: no task_numa_fault() call if PMD is changed Date: Fri, 9 Aug 2024 10:59:05 -0400 When handling a numa page fault, task_numa_fault() should be called by a process that restores the page table of the faulted folio to avoid duplicated stats counting. Commit c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling") restructured do_huge_pmd_numa_page() and did not avoid task_numa_fault() call in the second page table check after a numa migration failure. Fix it by making all !pmd_same() return immediately. This issue can cause task_numa_fault() being called more than necessary and lead to unexpected numa balancing results (It is hard to tell whether the issue will cause positive or negative performance impact due to duplicated numa fault counting). Link: https://lkml.kernel.org/r/20240809145906.1513458-3-ziy@nvidia.com Fixes: c5b5a3dd2c1f ("mm: thp: refactor NUMA fault handling") Reported-by: "Huang, Ying" <ying.huang(a)intel.com> Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.inte… Signed-off-by: Zi Yan <ziy(a)nvidia.com> Acked-by: David Hildenbrand <david(a)redhat.com> Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com> Cc: "Huang, Ying" <ying.huang(a)intel.com> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Mel Gorman <mgorman(a)suse.de> Cc: Yang Shi <shy828301(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/huge_memory.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) --- a/mm/huge_memory.c~mm-numa-no-task_numa_fault-call-if-pmd-is-changed +++ a/mm/huge_memory.c @@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct if (!migrate_misplaced_folio(folio, vma, target_nid)) { flags |= TNF_MIGRATED; nid = target_nid; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { - spin_unlock(vmf->ptl); - goto out; - } - goto out_map; - } - -out: - if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; + } - return 0; - + flags |= TNF_MIGRATE_FAIL; + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); + return 0; + } out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1753,7 +1747,10 @@ out_map: set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* _ Patches currently in -mm which might be from ziy(a)nvidia.com are memory-tiering-read-last_cpupid-correctly-in-do_huge_pmd_numa_page.patch memory-tiering-introduce-folio_use_access_time-check.patch memory-tiering-count-pgpromote_success-when-mem-tiering-is-enabled.patch mm-migrate-move-common-code-to-numa_migrate_check-was-numa_migrate_prep.patch

10 months, 2 weeks

1
0
0 0

[merged mm-hotfixes-stable] mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/numa: no task_numa_fault() call if PTE is changed has been removed from the -mm tree. Its filename was mm-numa-no-task_numa_fault-call-if-pte-is-changed.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Zi Yan <ziy(a)nvidia.com> Subject: mm/numa: no task_numa_fault() call if PTE is changed Date: Fri, 9 Aug 2024 10:59:04 -0400 When handling a numa page fault, task_numa_fault() should be called by a process that restores the page table of the faulted folio to avoid duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") restructured do_numa_page() and did not avoid task_numa_fault() call in the second page table check after a numa migration failure. Fix it by making all !pte_same() return immediately. This issue can cause task_numa_fault() being called more than necessary and lead to unexpected numa balancing results (It is hard to tell whether the issue will cause positive or negative performance impact due to duplicated numa fault counting). Link: https://lkml.kernel.org/r/20240809145906.1513458-2-ziy@nvidia.com Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault") Signed-off-by: Zi Yan <ziy(a)nvidia.com> Reported-by: "Huang, Ying" <ying.huang(a)intel.com> Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.inte… Acked-by: David Hildenbrand <david(a)redhat.com> Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com> Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com> Cc: Mel Gorman <mgorman(a)suse.de> Cc: Yang Shi <shy828301(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/memory.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) --- a/mm/memory.c~mm-numa-no-task_numa_fault-call-if-pte-is-changed +++ a/mm/memory.c @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm if (!migrate_misplaced_folio(folio, vma, target_nid)) { nid = target_nid; flags |= TNF_MIGRATED; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (unlikely(!vmf->pte)) - goto out; - if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!vmf->pte)) + return 0; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } out_map: /* * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map: numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) _ Patches currently in -mm which might be from ziy(a)nvidia.com are memory-tiering-read-last_cpupid-correctly-in-do_huge_pmd_numa_page.patch memory-tiering-introduce-folio_use_access_time-check.patch memory-tiering-count-pgpromote_success-when-mem-tiering-is-enabled.patch mm-migrate-move-common-code-to-numa_migrate_check-was-numa_migrate_prep.patch

10 months, 2 weeks

1
0
0 0

[merged mm-hotfixes-stable] mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0 has been removed from the -mm tree. Its filename was mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Hailong Liu <hailong.liu(a)oppo.com> Subject: mm/vmalloc: fix page mapping if vm_area_alloc_pages() with high order fallback to order 0 Date: Thu, 8 Aug 2024 20:19:56 +0800 The __vmap_pages_range_noflush() assumes its argument pages** contains pages with the same page shift. However, since commit e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations"), if gfp_flags includes __GFP_NOFAIL with high order in vm_area_alloc_pages() and page allocation failed for high order, the pages** may contain two different page shifts (high order and order-0). This could lead __vmap_pages_range_noflush() to perform incorrect mappings, potentially resulting in memory corruption. Users might encounter this as follows (vmap_allow_huge = true, 2M is for PMD_SIZE): kvmalloc(2M, __GFP_NOFAIL|GFP_X) __vmalloc_node_range_noprof(vm_flags=VM_ALLOW_HUGE_VMAP) vm_area_alloc_pages(order=9) ---> order-9 allocation failed and fallback to order-0 vmap_pages_range() vmap_pages_range_noflush() __vmap_pages_range_noflush(page_shift = 21) ----> wrong mapping happens We can remove the fallback code because if a high-order allocation fails, __vmalloc_node_range_noprof() will retry with order-0. Therefore, it is unnecessary to fallback to order-0 here. Therefore, fix this by removing the fallback code. Link: https://lkml.kernel.org/r/20240808122019.3361-1-hailong.liu@oppo.com Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations") Signed-off-by: Hailong Liu <hailong.liu(a)oppo.com> Reported-by: Tangquan Zheng <zhengtangquan(a)oppo.com> Reviewed-by: Baoquan He <bhe(a)redhat.com> Reviewed-by: Uladzislau Rezki (Sony) <urezki(a)gmail.com> Acked-by: Barry Song <baohua(a)kernel.org> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/vmalloc.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) --- a/mm/vmalloc.c~mm-vmalloc-fix-page-mapping-if-vm_area_alloc_pages-with-high-order-fallback-to-order-0 +++ a/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = alloc_pages_noprof(alloc_gfp, order); else page = alloc_pages_node_noprof(nid, alloc_gfp, order); - if (unlikely(!page)) { - if (!nofail) - break; - - /* fall back to the zero order allocations */ - alloc_gfp |= __GFP_NOFAIL; - order = 0; - continue; - } + if (unlikely(!page)) + break; /* * Higher order allocations must be able to be treated as _ Patches currently in -mm which might be from hailong.liu(a)oppo.com are

10 months, 2 weeks

1
0
0 0

[merged mm-hotfixes-stable] mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu has been removed from the -mm tree. Its filename was mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Waiman Long <longman(a)redhat.com> Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu Date: Tue, 6 Aug 2024 12:41:07 -0400 The memory_failure_cpu structure is a per-cpu structure. Access to its content requires the use of get_cpu_var() to lock in the current CPU and disable preemption. The use of a regular spinlock_t for locking purpose is fine for a non-RT kernel. Since the integration of RT spinlock support into the v5.15 kernel, a spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping lock in a preemption disabled context is illegal resulting in the following kind of warning. [12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0 [12135.732252] preempt_count: 1, expected: 0 [12135.732255] RCU nest depth: 2, expected: 2 : [12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021 [12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred [12135.732433] Call Trace: [12135.732436] <TASK> [12135.732450] dump_stack_lvl+0x57/0x81 [12135.732461] __might_resched.cold+0xf4/0x12f [12135.732479] rt_spin_lock+0x4c/0x100 [12135.732491] memory_failure_queue+0x40/0xe0 [12135.732503] ghes_do_memory_failure+0x53/0x390 [12135.732516] ghes_do_proc.constprop.0+0x229/0x3e0 [12135.732575] ghes_proc+0xf9/0x1a0 [12135.732591] ghes_notify_hed+0x6a/0x150 [12135.732602] notifier_call_chain+0x43/0xb0 [12135.732626] blocking_notifier_call_chain+0x43/0x60 [12135.732637] acpi_ev_notify_dispatch+0x47/0x70 [12135.732648] acpi_os_execute_deferred+0x13/0x20 [12135.732654] process_one_work+0x41f/0x500 [12135.732695] worker_thread+0x192/0x360 [12135.732715] kthread+0x111/0x140 [12135.732733] ret_from_fork+0x29/0x50 [12135.732779] </TASK> Fix it by using a raw_spinlock_t for locking instead. Also move the pr_err() out of the lock critical section and after put_cpu_ptr() to avoid indeterminate latency and the possibility of sleep with this call. [longman(a)redhat.com: don't hold percpu ref across pr_err(), per Miaohe] Link: https://lkml.kernel.org/r/20240807181130.1122660-1-longman@redhat.com Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com Fixes: 0f383b6dc96e ("locking/spinlock: Provide RT variant") Signed-off-by: Waiman Long <longman(a)redhat.com> Acked-by: Miaohe Lin <linmiaohe(a)huawei.com> Cc: "Huang, Ying" <ying.huang(a)intel.com> Cc: Juri Lelli <juri.lelli(a)redhat.com> Cc: Len Brown <len.brown(a)intel.com> Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/memory-failure.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) --- a/mm/memory-failure.c~mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu +++ a/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); - put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(str mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(vo for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } _ Patches currently in -mm which might be from longman(a)redhat.com are watchdog-handle-the-enodev-failure-case-of-lockup_detector_delay_init-separately.patch

10 months, 2 weeks

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror August 2024