New subject: [PATCH] mm/uffd: Always wr-protect pte in pte_mkuffd_wp()

2 Dec 2022

Currently, we don't enable writenotify when enabling userfaultfd-wp on
a shared writable mapping (for now we only support SHMEM). The consequence
is that vma->vm_page_prot will still include write permissions, to be set
as default for all PTEs that get remapped (e.g., mprotect(), NUMA hinting,
page migration, ...).
This is problematic for uffd-wp: we'd have to manually check for
a uffd-wp PTE and manually write-protect that PTE, which is error prone
and the logic is the wrong way around. Prone to such issues is any code
that uses vma->vm_page_prot to set PTE permissions: primarily pte_modify()
and mk_pte(), but there might be more (move_pte() looked suspicious at
first but the protection parameter is essentially unused).
Instead, let's enable writenotify -- just like we do for softdirty
tracking -- such that PTEs will be mapped write-protected as default
and we will only allow selected PTEs that are defintly safe to be
mapped without write-protection (see can_change_pte_writable()) to be
writable. This reverses the logic and implicitly fixes and prevents any
such uffd-wp issues.
Note that when enabling userfaultfd-wp, there is no need to walk page
tables to enforce the new default protection for the PTEs: we know that
they cannot be uffd-wp'ed yet, because that can only happen afterwards.
For example, this fixes page migration and mprotect() to not map a
uffd-wp'ed PTE writable. In theory, this should also fix when NUMA-hinting
remaps pages in such (shmem) mappings -- if NUMA-hinting is applicable to
shmem with uffd as well.
Running the mprotect() reproducer [1] without this commit:
  $ ./uffd-wp-mprotect
  FAIL: uffd-wp did not fire
Running the mprotect() reproducer with this commit:
  $ ./uffd-wp-mprotect
  PASS: uffd-wp fired
[1] https://lore.kernel.org/all/222fc0b2-6ec0-98e7-833f-ea868b248446@redhat.com/...
Reported-by: Ives van Hoorne ives@codesandbox.io
Debugged-by: Peter Xu peterx@redhat.com
Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs")
Cc: stable@vger.kernel.org
Cc: Andrew Morton akpm@linux-foundation.org
Cc: Hugh Dickins hugh@veritas.com
Cc: Alistair Popple apopple@nvidia.com
Cc: Mike Rapoport rppt@linux.vnet.ibm.com
Cc: Nadav Amit nadav.amit@gmail.com
Cc: Andrea Arcangeli aarcange@redhat.com
Signed-off-by: David Hildenbrand david@redhat.com
---
Based on latest upstream. userfaultfd selftests seem to pass.
---
 fs/userfaultfd.c | 28 ++++++++++++++++++++++------
 mm/mmap.c        |  4 ++++
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 98ac37e34e3d..fb0733f2e623 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
    return ctx->features & UFFD_FEATURE_INITIALIZED;
 }
+static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
+				     vm_flags_t flags)
+{
+	const bool uffd_wp = !!((vma->vm_flags | flags) & VM_UFFD_WP);
+
+	vma->vm_flags = flags;
+	/*
+	 * For shared mappings, we want to enable writenotify while
+	 * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply
+	 * recalculate vma->vm_page_prot whenever userfaultfd-wp is involved.
+	 */
+	if ((vma->vm_flags & VM_SHARED) && uffd_wp)
+		vma_set_page_prot(vma);
+}
+
 static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
    			     int wake_flags, void *key)
 {
@@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
    	for_each_vma(vmi, vma) {
    		if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
    			vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
-				vma->vm_flags &= ~__VM_UFFD_FLAGS;
+				userfaultfd_set_vm_flags(vma,
+							 vma->vm_flags & ~__VM_UFFD_FLAGS);
    		}
    	}
    	mmap_write_unlock(mm);
@@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
    octx = vma->vm_userfaultfd_ctx.ctx;
    if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
    	vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
-		vma->vm_flags &= ~__VM_UFFD_FLAGS;
+		userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
    	return 0;
    }
@@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
    } else {
    	/* Drop uffd context if remap feature not enabled */
    	vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
-		vma->vm_flags &= ~__VM_UFFD_FLAGS;
+		userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
    }
 }
@@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
    		prev = vma;
    	}
-		vma->vm_flags = new_flags;
+		userfaultfd_set_vm_flags(vma, new_flags);
    	vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
    }
    mmap_write_unlock(mm);
@@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
    	 * the next vma was merged into the current one and
    	 * the current one has not been updated yet.
    	 */
-		vma->vm_flags = new_flags;
+		userfaultfd_set_vm_flags(vma, new_flags);
    	vma->vm_userfaultfd_ctx.ctx = ctx;
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
@@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
    	 * the next vma was merged into the current one and
    	 * the current one has not been updated yet.
    	 */
-		vma->vm_flags = new_flags;
+		userfaultfd_set_vm_flags(vma, new_flags);
    	vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
skip:
diff --git a/mm/mmap.c b/mm/mmap.c
index 74a84eb33b90..ce7526aa5d61 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1525,6 +1525,10 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
    if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
    	return 1;
+	/* Do we need write faults for uffd-wp tracking? */
+	if (userfaultfd_wp(vma))
+		return 1;
+
    /* Specialty mapping? */
    if (vm_flags & VM_PFNMAP)
    	return 0;
base-commit: a4412fdd49dc011bcc2c0d81ac4cab7457092650
-- 
2.38.1


    

[PATCH RFC] mm/userfaultfd: enable writenotify while userfaultfd-wp is enabled for a VMA

base-commit: a4412fdd49dc011bcc2c0d81ac4cab7457092650