[PATCH] mm/gup: Use try_grab_page() instead of try_grab_folio() in gup slow path - Linux-stable-mirror

27 Jun 2024

From: yangge yangge1116@126.com
If a large number of CMA memory are configured in system (for
example, the CMA memory accounts for 50% of the system memory),
starting a SEV virtual machine will fail. During starting the SEV
virtual machine, it will call pin_user_pages_fast(..., FOLL_LONGTERM,
...) to pin memory. Normally if a page is present and in CMA area,
pin_user_pages_fast() will first call __get_user_pages_locked() to
pin the page in CMA area, and then call
check_and_migrate_movable_pages() to migrate the page from CMA area
to non-CMA area. But the current code calling __get_user_pages_locked()
will fail, because it call try_grab_folio() to pin page in gup slow
path.
The commit 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages
!= NULL"") uses try_grab_folio() in gup slow path, which seems to be
problematic because try_grap_folio() will check if the page can be
longterm pinned. This check may fail and cause __get_user_pages_lock()
to fail. However, these checks are not required in gup slow path,
seems we can use try_grab_page() instead of try_grab_folio(). In
addition, in the current code, try_grab_page() can only add 1 to the
page's refcount. We extend this function so that the page's refcount
can be increased according to the parameters passed in.
The following log reveals it:
[  464.325306] WARNING: CPU: 13 PID: 6734 at mm/gup.c:1313 __get_user_pages+0x423/0x520
[  464.325464] CPU: 13 PID: 6734 Comm: qemu-kvm Kdump: loaded Not tainted 6.6.33+ #6
[  464.325477] RIP: 0010:__get_user_pages+0x423/0x520
[  464.325515] Call Trace:
[  464.325520]  <TASK>
[  464.325523]  ? __get_user_pages+0x423/0x520
[  464.325528]  ? __warn+0x81/0x130
[  464.325536]  ? __get_user_pages+0x423/0x520
[  464.325541]  ? report_bug+0x171/0x1a0
[  464.325549]  ? handle_bug+0x3c/0x70
[  464.325554]  ? exc_invalid_op+0x17/0x70
[  464.325558]  ? asm_exc_invalid_op+0x1a/0x20
[  464.325567]  ? __get_user_pages+0x423/0x520
[  464.325575]  __gup_longterm_locked+0x212/0x7a0
[  464.325583]  internal_get_user_pages_fast+0xfb/0x190
[  464.325590]  pin_user_pages_fast+0x47/0x60
[  464.325598]  sev_pin_memory+0xca/0x170 [kvm_amd]
[  464.325616]  sev_mem_enc_register_region+0x81/0x130 [kvm_amd]
Fixes: 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages != NULL"")
Cc: stable@vger.kernel.org
Signed-off-by: yangge yangge1116@126.com
---
 mm/gup.c         | 26 ++++++++++++--------------
 mm/huge_memory.c |  2 +-
 mm/internal.h    |  2 +-
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 6ff9f95..bb58909 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -222,7 +222,7 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
  *   -ENOMEM		FOLL_GET or FOLL_PIN was set, but the page could not
  *			be grabbed.
  */
-int __must_check try_grab_page(struct page *page, unsigned int flags)
+int __must_check try_grab_page(struct page *page, int refs, unsigned int flags)
 {
    struct folio *folio = page_folio(page);
@@ -233,7 +233,7 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
    	return -EREMOTEIO;
if (flags & FOLL_GET)
-		folio_ref_inc(folio);
+		folio_ref_add(folio, refs);
    else if (flags & FOLL_PIN) {
    	/*
    	 * Don't take a pin on the zero page - it's not going anywhere
@@ -248,13 +248,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
    	 * so that the page really is pinned.
    	 */
    	if (folio_test_large(folio)) {
-			folio_ref_add(folio, 1);
-			atomic_add(1, &folio->_pincount);
+			folio_ref_add(folio, refs);
+			atomic_add(refs, &folio->_pincount);
    	} else {
-			folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+			folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);
    	}
-		node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+		node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
    }
return 0;
@@ -729,7 +729,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
        gup_must_unshare(vma, flags, page))
    	return ERR_PTR(-EMLINK);
-	ret = try_grab_page(page, flags);
+	ret = try_grab_page(page, 1, flags);
    if (ret)
    	page = ERR_PTR(ret);
    else
@@ -806,7 +806,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma,
    VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
    		!PageAnonExclusive(page), page);
-	ret = try_grab_page(page, flags);
+	ret = try_grab_page(page, 1, flags);
    if (ret)
    	return ERR_PTR(ret);
@@ -969,7 +969,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
    	       !PageAnonExclusive(page), page);
/* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
-	ret = try_grab_page(page, flags);
+	ret = try_grab_page(page, 1, flags);
    if (unlikely(ret)) {
    	page = ERR_PTR(ret);
    	goto out;
@@ -1233,7 +1233,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
    		goto unmap;
    	*page = pte_page(entry);
    }
-	ret = try_grab_page(*page, gup_flags);
+	ret = try_grab_page(*page, 1, gup_flags);
    if (unlikely(ret))
    	goto unmap;
 out:
@@ -1636,22 +1636,20 @@ static long __get_user_pages(struct mm_struct *mm,
    		 * pages.
    		 */
    		if (page_increm > 1) {
-				struct folio *folio;
/*
    			 * Since we already hold refcount on the
    			 * large folio, this should never fail.
    			 */
-				folio = try_grab_folio(page, page_increm - 1,
+				ret = try_grab_page(page, page_increm - 1,
    					       foll_flags);
-				if (WARN_ON_ONCE(!folio)) {
+				if (WARN_ON_ONCE(ret)) {
    				/*
    				 * Release the 1st page ref if the
    				 * folio is problematic, fail hard.
    				 */
    				gup_put_folio(page_folio(page), 1,
    					      foll_flags);
-					ret = -EFAULT;
    				goto out;
    			}
    		}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 425374a..18604e4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1332,7 +1332,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
    if (!*pgmap)
    	return ERR_PTR(-EFAULT);
    page = pfn_to_page(pfn);
-	ret = try_grab_page(page, flags);
+	ret = try_grab_page(page, 1, flags);
    if (ret)
    	page = ERR_PTR(ret);
diff --git a/mm/internal.h b/mm/internal.h
index 2ea9a88..5305bbf 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1227,7 +1227,7 @@ int migrate_device_coherent_page(struct page *page);
  * mm/gup.c
  */
 struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
-int __must_check try_grab_page(struct page *page, unsigned int flags);
+int __must_check try_grab_page(struct page *page, int refs, unsigned int flags);
/*
  * mm/huge_memory.c
-- 
2.7.4