 
            On 28.08.25 19:28, Lorenzo Stoakes wrote:
On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote:
Let's disallow handing out PFN ranges with non-contiguous pages, so we can remove the nth-page usage in __cma_alloc(), and so any callers don't have to worry about that either when wanting to blindly iterate pages.
This is really only a problem in configs with SPARSEMEM but without SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some cases.
I'm guessing this is something that we don't need to worry about in reality?
That my theory yes.
Will this cause harm? Probably not, because it's mostly 32bit that does not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could look into allocating the memmap for the memory sections spanned by a single CMA region in one go from memblock.
Reviewed-by: Alexandru Elisei alexandru.elisei@arm.com Signed-off-by: David Hildenbrand david@redhat.com
LGTM other than refactoring point below.
CMA stuff looks fine afaict after staring at it for a while, on proviso that handing out ranges within the same section is always going to be the case.
Anyway overall,
LGTM, so:
Reviewed-by: Lorenzo Stoakes lorenzo.stoakes@oracle.com
include/linux/mm.h | 6 ++++++ mm/cma.c | 39 ++++++++++++++++++++++++--------------- mm/util.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index f6880e3225c5c..2ca1eb2db63ec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes;
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +bool page_range_contiguous(const struct page *page, unsigned long nr_pages); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #else #define nth_page(page,n) ((page) + (n)) +static inline bool page_range_contiguous(const struct page *page,
unsigned long nr_pages)+{
- return true;
+} #endif
/* to align the pointer to the (next) page boundary */ diff --git a/mm/cma.c b/mm/cma.c index e56ec64d0567e..813e6dc7b0954 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, unsigned long count, unsigned int align, struct page **pagep, gfp_t gfp) {
- unsigned long mask, offset;
- unsigned long pfn = -1;
- unsigned long start = 0; unsigned long bitmap_maxno, bitmap_no, bitmap_count;
- unsigned long start, pfn, mask, offset; int ret = -EBUSY; struct page *page = NULL;
@@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, if (bitmap_count > bitmap_maxno) goto out;
- for (;;) {
- for (start = 0; ; start = bitmap_no + mask + 1) { spin_lock_irq(&cma->lock); /*
- If the request is larger than the available number
@@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, spin_unlock_irq(&cma->lock); break; }
pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
page = pfn_to_page(pfn);
/*
* Do not hand out page ranges that are not contiguous, so
* callers can just iterate the pages without having to worry
* about these corner cases.
*/
if (!page_range_contiguous(page, count)) {
spin_unlock_irq(&cma->lock);
pr_warn_ratelimited("%s: %s: skipping incompatible area [0x%lx-0x%lx]",
__func__, cma->name, pfn, pfn + count - 1);
continue;
}- bitmap_set(cmr->bitmap, bitmap_no, bitmap_count); cma->available_count -= count; /*
@@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, */ spin_unlock_irq(&cma->lock);
mutex_lock(&cma->alloc_mutex); ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp); mutex_unlock(&cma->alloc_mutex);
pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
if (ret == 0) {
page = pfn_to_page(pfn);
if (!ret) break;
}cma_clear_bitmap(cma, cmr, pfn, count); if (ret != -EBUSY) break;
pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
__func__, pfn, pfn_to_page(pfn));
__func__, pfn, page);
trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
count, align);
/* try again with a bit different memory target */
start = bitmap_no + mask + 1;
} out:
trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
- *pagep = page;
- if (!ret)
return ret; }
*pagep = page;@@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, */ if (page) { for (i = 0; i < count; i++)
page_kasan_tag_reset(nth_page(page, i));
page_kasan_tag_reset(page + i);}
if (ret && !(gfp & __GFP_NOWARN)) {
diff --git a/mm/util.c b/mm/util.c index d235b74f7aff7..0bf349b19b652 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, { return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0); }
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) +/**
- page_range_contiguous - test whether the page range is contiguous
- @page: the start of the page range.
- @nr_pages: the number of pages in the range.
- Test whether the page range is contiguous, such that they can be iterated
- naively, corresponding to iterating a contiguous PFN range.
- This function should primarily only be used for debug checks, or when
- working with page ranges that are not naturally contiguous (e.g., pages
- within a folio are).
- Returns true if contiguous, otherwise false.
- */
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages) +{
- const unsigned long start_pfn = page_to_pfn(page);
- const unsigned long end_pfn = start_pfn + nr_pages;
- unsigned long pfn;
- /*
* The memmap is allocated per memory section. We need to check
* each involved memory section once.
*/- for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
pfn < end_pfn; pfn += PAGES_PER_SECTION)
if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
return false;I find this pretty confusing, my test for this is how many times I have to read the code to understand what it's doing :)
So we have something like:
(pfn of page) start_pfn pfn = align UP | | v v | section | <-----------------> pfn - start_pfn
Then check page + (pfn - start_pfn) == pfn_to_page(pfn)
And loop such that:
(pfn of page) start_pfn pfn | | v v | section | section | <------------------------------------------> pfn - start_pfn
Again check page + (pfn - start_pfn) == pfn_to_page(pfn)
And so on.
So the logic looks good, but it's just... that took me a hot second to parse :)
I think a few simple fixups
bool page_range_contiguous(const struct page *page, unsigned long nr_pages) { const unsigned long start_pfn = page_to_pfn(page); const unsigned long end_pfn = start_pfn + nr_pages; /* The PFN of the start of the next section. */ unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION); /* The page we'd expected to see if the range were contiguous. */ struct page *expected = page + (pfn - start_pfn);
/* * The memmap is allocated per memory section. We need to check * each involved memory section once. */ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += PAGES_PER_SECTION) if (unlikely(expected != pfn_to_page(pfn))) return false; return true; }
Hm, I prefer my variant, especially where the pfn is calculated in the for loop. Likely a matter of personal taste.
But I can see why skipping the first section might be a surprise when not having the semantics of ALIGN() in the cache.
So I'll add the following on top:
diff --git a/mm/util.c b/mm/util.c index 0bf349b19b652..fbdb73aaf35fe 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1303,8 +1303,10 @@ bool page_range_contiguous(const struct page *page, unsigned long nr_pages) unsigned long pfn;
/* - * The memmap is allocated per memory section. We need to check - * each involved memory section once. + * The memmap is allocated per memory section, so no need to check + * within the first section. However, we need to check each other + * spanned memory section once, making sure the first page in a + * section could similarly be reached by just iterating pages. */ for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION); pfn < end_pfn; pfn += PAGES_PER_SECTION)
Thanks!