New subject: [PATCH 4.9 2/4] KVM: do not assume PTE is writable after follow_pfn

24 Jan 2022

From: Ross Zwisler ross.zwisler@linux.intel.com
commit 097963959594c5eccaba42510f7033f703211bda upstream.
Patch series "Write protect DAX PMDs in *sync path".
Currently dax_mapping_entry_mkclean() fails to clean and write protect
the pmd_t of a DAX PMD entry during an *sync operation.  This can result
in data loss, as detailed in patch 2.
This series is based on Dan's "libnvdimm-pending" branch, which is the
current home for Jan's "dax: Page invalidation fixes" series.  You can
find a working tree here:
https://git.kernel.org/cgit/linux/kernel/git/zwisler/linux.git/log/?h=dax_pm...
This patch (of 2):
Similar to follow_pte(), follow_pte_pmd() allows either a PTE leaf or a
huge page PMD leaf to be found and returned.
Link: http://lkml.kernel.org/r/1482272586-21177-2-git-send-email-ross.zwisler@linu...
Signed-off-by: Ross Zwisler ross.zwisler@linux.intel.com
Suggested-by: Dave Hansen dave.hansen@intel.com
Cc: Alexander Viro viro@zeniv.linux.org.uk
Cc: Christoph Hellwig hch@lst.de
Cc: Dan Williams dan.j.williams@intel.com
Cc: Dave Chinner david@fromorbit.com
Cc: Jan Kara jack@suse.cz
Cc: Matthew Wilcox mawilcox@microsoft.com
Signed-off-by: Andrew Morton akpm@linux-foundation.org
Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
[bwh: Backported to 4.9: adjust context]
Signed-off-by: Ben Hutchings ben@decadent.org.uk
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 37 ++++++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7a4c035b187f..81ee5d0b2642 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1269,6 +1269,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
    		struct vm_area_struct *vma);
 void unmap_mapping_range(struct address_space *mapping,
    	loff_t const holebegin, loff_t const holelen, int even_cows);
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
    unsigned long *pfn);
 int follow_phys(struct vm_area_struct *vma, unsigned long address,
diff --git a/mm/memory.c b/mm/memory.c
index c2890dc104d9..2b2cc69ddcce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3780,8 +3780,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
-static int __follow_pte(struct mm_struct *mm, unsigned long address,
-		pte_t **ptepp, spinlock_t **ptlp)
+static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+		pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
    pgd_t *pgd;
    pud_t *pud;
@@ -3798,11 +3798,20 @@ static int __follow_pte(struct mm_struct *mm, unsigned long address,
pmd = pmd_offset(pud, address);
    VM_BUG_ON(pmd_trans_huge(*pmd));
-	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
-		goto out;
-	/* We cannot handle huge page PFN maps. Luckily they don't exist. */
-	if (pmd_huge(*pmd))
+	if (pmd_huge(*pmd)) {
+		if (!pmdpp)
+			goto out;
+
+		*ptlp = pmd_lock(mm, pmd);
+		if (pmd_huge(*pmd)) {
+			*pmdpp = pmd;
+			return 0;
+		}
+		spin_unlock(*ptlp);
+	}
+
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
    	goto out;
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
@@ -3825,9 +3834,23 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address,
/* (void) is needed to make gcc happy */
    (void) __cond_lock(*ptlp,
-			   !(res = __follow_pte(mm, address, ptepp, ptlp)));
+			   !(res = __follow_pte_pmd(mm, address, ptepp, NULL,
+					   ptlp)));
+	return res;
+}
+
+int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
+			     pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
+{
+	int res;
+
+	/* (void) is needed to make gcc happy */
+	(void) __cond_lock(*ptlp,
+			   !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
+					   ptlp)));
    return res;
 }
+EXPORT_SYMBOL(follow_pte_pmd);
/**
  * follow_pfn - look up PFN at a user virtual address

    

[PATCH 4.9 1/4] mm: add follow_pte_pmd()