The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 67bab13307c83fb742c2556b06cdc39dbad27f07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030908-defacing-rumor-448c@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Wed, 19 Feb 2025 11:46:44 +0800
Subject: [PATCH] mm/hugetlb: wait for hugetlb folios to be freed
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..dbe76d4f1bfc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 163190e89ea1..811b29f77abf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c608e9d72865..a051a29e95ad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct zone *zone;
int ret;
+ /*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
/*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 67bab13307c83fb742c2556b06cdc39dbad27f07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030907-blush-surname-f05c@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Wed, 19 Feb 2025 11:46:44 +0800
Subject: [PATCH] mm/hugetlb: wait for hugetlb folios to be freed
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..dbe76d4f1bfc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 163190e89ea1..811b29f77abf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c608e9d72865..a051a29e95ad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct zone *zone;
int ret;
+ /*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
/*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 67bab13307c83fb742c2556b06cdc39dbad27f07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030906-iodize-baboon-b1af@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Wed, 19 Feb 2025 11:46:44 +0800
Subject: [PATCH] mm/hugetlb: wait for hugetlb folios to be freed
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..dbe76d4f1bfc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 163190e89ea1..811b29f77abf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c608e9d72865..a051a29e95ad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct zone *zone;
int ret;
+ /*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
/*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 67bab13307c83fb742c2556b06cdc39dbad27f07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030905-parchment-riddance-0a09@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Wed, 19 Feb 2025 11:46:44 +0800
Subject: [PATCH] mm/hugetlb: wait for hugetlb folios to be freed
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..dbe76d4f1bfc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 163190e89ea1..811b29f77abf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c608e9d72865..a051a29e95ad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct zone *zone;
int ret;
+ /*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
/*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 67bab13307c83fb742c2556b06cdc39dbad27f07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030904-splendor-sly-a852@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Wed, 19 Feb 2025 11:46:44 +0800
Subject: [PATCH] mm/hugetlb: wait for hugetlb folios to be freed
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..dbe76d4f1bfc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 163190e89ea1..811b29f77abf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c608e9d72865..a051a29e95ad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct zone *zone;
int ret;
+ /*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
/*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 67bab13307c83fb742c2556b06cdc39dbad27f07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030903-simplify-blooming-c758@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Wed, 19 Feb 2025 11:46:44 +0800
Subject: [PATCH] mm/hugetlb: wait for hugetlb folios to be freed
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..dbe76d4f1bfc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 163190e89ea1..811b29f77abf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c608e9d72865..a051a29e95ad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct zone *zone;
int ret;
+ /*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
/*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x 67bab13307c83fb742c2556b06cdc39dbad27f07
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030902-guidance-kung-0573@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67bab13307c83fb742c2556b06cdc39dbad27f07 Mon Sep 17 00:00:00 2001
From: Ge Yang <yangge1116(a)126.com>
Date: Wed, 19 Feb 2025 11:46:44 +0800
Subject: [PATCH] mm/hugetlb: wait for hugetlb folios to be freed
Since the introduction of commit c77c0a8ac4c52 ("mm/hugetlb: defer freeing
of huge pages if in non-task context"), which supports deferring the
freeing of hugetlb pages, the allocation of contiguous memory through
cma_alloc() may fail probabilistically.
In the CMA allocation process, if it is found that the CMA area is
occupied by in-use hugetlb folios, these in-use hugetlb folios need to be
migrated to another location. When there are no available hugetlb folios
in the free hugetlb pool during the migration of in-use hugetlb folios,
new folios are allocated from the buddy system. A temporary state is set
on the newly allocated folio. Upon completion of the hugetlb folio
migration, the temporary state is transferred from the new folios to the
old folios. Normally, when the old folios with the temporary state are
freed, it is directly released back to the buddy system. However, due to
the deferred freeing of hugetlb pages, the PageBuddy() check fails,
ultimately leading to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugetlb folios
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_freed_hugetlb_folios(). This function ensures that the hugetlb
folios are properly released back to the buddy system after their
migration is completed. By invoking wait_for_freed_hugetlb_folios()
before calling PageBuddy(), we ensure that PageBuddy() will succeed.
Link: https://lkml.kernel.org/r/1739936804-18199-1-git-send-email-yangge1116@126.…
Fixes: c77c0a8ac4c5 ("mm/hugetlb: defer freeing of huge pages if in non-task context")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ec8c0ccc8f95..dbe76d4f1bfc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1066,6 +1067,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_freed_hugetlb_folios(void)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 163190e89ea1..811b29f77abf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_freed_hugetlb_folios(void)
+{
+ if (llist_empty(&hpage_freelist))
+ return;
+
+ flush_work(&free_hpage_work);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c608e9d72865..a051a29e95ad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -607,6 +607,16 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct zone *zone;
int ret;
+ /*
+ * Due to the deferred freeing of hugetlb folios, the hugepage folios may
+ * not immediately release to the buddy system. This can cause PageBuddy()
+ * to fail in __test_page_isolated_in_pageblock(). To ensure that the
+ * hugetlb folios are properly released back to the buddy system, we
+ * invoke the wait_for_freed_hugetlb_folios() function to wait for the
+ * release to complete.
+ */
+ wait_for_freed_hugetlb_folios();
+
/*
* Note: pageblock_nr_pages != MAX_PAGE_ORDER. Then, chunks of free
* pages are not aligned to pageblock_nr_pages.
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 058313515d5aab10d0a01dd634f92ed4a4e71d4c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030954-polish-overeater-d2be@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 058313515d5aab10d0a01dd634f92ed4a4e71d4c Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Date: Tue, 25 Feb 2025 17:52:55 +0800
Subject: [PATCH] mm: shmem: fix potential data corruption during shmem swapin
Alex and Kairui reported some issues (system hang or data corruption) when
swapping out or swapping in large shmem folios. This is especially easy
to reproduce when the tmpfs is mount with the 'huge=within_size'
parameter. Thanks to Kairui's reproducer, the issue can be easily
replicated.
The root cause of the problem is that swap readahead may asynchronously
swap in order 0 folios into the swap cache, while the shmem mapping can
still store large swap entries. Then an order 0 folio is inserted into
the shmem mapping without splitting the large swap entry, which overwrites
the original large swap entry, leading to data corruption.
When getting a folio from the swap cache, we should split the large swap
entry stored in the shmem mapping if the orders do not match, to fix this
issue.
Link: https://lkml.kernel.org/r/2fe47c557e74e9df5fe2437ccdc6c9115fa1bf70.17404769…
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reported-by: Alex Xu (Hello71) <alex_y_xu(a)yahoo.ca>
Reported-by: Kairui Song <ryncsn(a)gmail.com>
Closes: https://lore.kernel.org/all/1738717785.im3r5g2vxc.none@localhost/
Tested-by: Kairui Song <kasong(a)tencent.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Lance Yang <ioworker0(a)gmail.com>
Cc: Matthew Wilcow <willy(a)infradead.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/shmem.c b/mm/shmem.c
index 4ea6109a8043..cebbac97a221 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2253,7 +2253,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
struct folio *folio = NULL;
bool skip_swapcache = false;
swp_entry_t swap;
- int error, nr_pages;
+ int error, nr_pages, order, split_order;
VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
swap = radix_to_swp_entry(*foliop);
@@ -2272,10 +2272,9 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
/* Look it up and read it in.. */
folio = swap_cache_get_folio(swap, NULL, 0);
+ order = xa_get_order(&mapping->i_pages, index);
if (!folio) {
- int order = xa_get_order(&mapping->i_pages, index);
bool fallback_order0 = false;
- int split_order;
/* Or update major stats only when swapin succeeds?? */
if (fault_type) {
@@ -2339,6 +2338,29 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
+ } else if (order != folio_order(folio)) {
+ /*
+ * Swap readahead may swap in order 0 folios into swapcache
+ * asynchronously, while the shmem mapping can still stores
+ * large swap entries. In such cases, we should split the
+ * large swap entry to prevent possible data corruption.
+ */
+ split_order = shmem_split_large_entry(inode, index, swap, gfp);
+ if (split_order < 0) {
+ error = split_order;
+ goto failed;
+ }
+
+ /*
+ * If the large swap entry has already been split, it is
+ * necessary to recalculate the new swap entry based on
+ * the old order alignment.
+ */
+ if (split_order > 0) {
+ pgoff_t offset = index - round_down(index, 1 << split_order);
+
+ swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
+ }
}
alloced:
@@ -2346,7 +2368,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap)) {
+ !shmem_confirm_swap(mapping, index, swap) ||
+ xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
error = -EEXIST;
goto unlock;
}
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x 058313515d5aab10d0a01dd634f92ed4a4e71d4c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025030953-alkalize-eardrum-de40@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 058313515d5aab10d0a01dd634f92ed4a4e71d4c Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Date: Tue, 25 Feb 2025 17:52:55 +0800
Subject: [PATCH] mm: shmem: fix potential data corruption during shmem swapin
Alex and Kairui reported some issues (system hang or data corruption) when
swapping out or swapping in large shmem folios. This is especially easy
to reproduce when the tmpfs is mount with the 'huge=within_size'
parameter. Thanks to Kairui's reproducer, the issue can be easily
replicated.
The root cause of the problem is that swap readahead may asynchronously
swap in order 0 folios into the swap cache, while the shmem mapping can
still store large swap entries. Then an order 0 folio is inserted into
the shmem mapping without splitting the large swap entry, which overwrites
the original large swap entry, leading to data corruption.
When getting a folio from the swap cache, we should split the large swap
entry stored in the shmem mapping if the orders do not match, to fix this
issue.
Link: https://lkml.kernel.org/r/2fe47c557e74e9df5fe2437ccdc6c9115fa1bf70.17404769…
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reported-by: Alex Xu (Hello71) <alex_y_xu(a)yahoo.ca>
Reported-by: Kairui Song <ryncsn(a)gmail.com>
Closes: https://lore.kernel.org/all/1738717785.im3r5g2vxc.none@localhost/
Tested-by: Kairui Song <kasong(a)tencent.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Lance Yang <ioworker0(a)gmail.com>
Cc: Matthew Wilcow <willy(a)infradead.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/shmem.c b/mm/shmem.c
index 4ea6109a8043..cebbac97a221 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2253,7 +2253,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
struct folio *folio = NULL;
bool skip_swapcache = false;
swp_entry_t swap;
- int error, nr_pages;
+ int error, nr_pages, order, split_order;
VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
swap = radix_to_swp_entry(*foliop);
@@ -2272,10 +2272,9 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
/* Look it up and read it in.. */
folio = swap_cache_get_folio(swap, NULL, 0);
+ order = xa_get_order(&mapping->i_pages, index);
if (!folio) {
- int order = xa_get_order(&mapping->i_pages, index);
bool fallback_order0 = false;
- int split_order;
/* Or update major stats only when swapin succeeds?? */
if (fault_type) {
@@ -2339,6 +2338,29 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
+ } else if (order != folio_order(folio)) {
+ /*
+ * Swap readahead may swap in order 0 folios into swapcache
+ * asynchronously, while the shmem mapping can still stores
+ * large swap entries. In such cases, we should split the
+ * large swap entry to prevent possible data corruption.
+ */
+ split_order = shmem_split_large_entry(inode, index, swap, gfp);
+ if (split_order < 0) {
+ error = split_order;
+ goto failed;
+ }
+
+ /*
+ * If the large swap entry has already been split, it is
+ * necessary to recalculate the new swap entry based on
+ * the old order alignment.
+ */
+ if (split_order > 0) {
+ pgoff_t offset = index - round_down(index, 1 << split_order);
+
+ swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
+ }
}
alloced:
@@ -2346,7 +2368,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap)) {
+ !shmem_confirm_swap(mapping, index, swap) ||
+ xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
error = -EEXIST;
goto unlock;
}