On Mon, Apr 29, 2019 at 11:11:49AM +0200, gregkh@linuxfoundation.org wrote:
This is a note to let you know that I've just added the patch titled
RDMA/ucontext: Fix regression with disassociate
to the 5.0-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git%3Ba=su...
The filename of the patch is: rdma-ucontext-fix-regression-with-disassociate.patch and it can be found in the queue-5.0 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree, please let stable@vger.kernel.org know about it.
Greg,
Please be aware that this patch has compilation issues on s390 platform. https://patchwork.kernel.org/patch/10920895/#22610993
Thanks
From 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe jgg@mellanox.com Date: Tue, 16 Apr 2019 14:07:28 +0300 Subject: RDMA/ucontext: Fix regression with disassociate
From: Jason Gunthorpe jgg@mellanox.com
commit 67f269b37f9b4d52c5e7f97acea26c0852e9b8a1 upstream.
When this code was consolidated the intention was that the VMA would become backed by anonymous zero pages after the zap_vma_pte - however this very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED bits to transform the VMA into an anonymous VMA. Since the vm_ops was removed this broke.
Now userspace gets a SIGBUS if it touches the vma after disassociation.
Instead of converting the VMA to anonymous provide a fault handler that puts a zero'd page into the VMA when user-space touches it after disassociation.
Cc: stable@vger.kernel.org Suggested-by: Andrea Arcangeli aarcange@redhat.com Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory") Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Leon Romanovsky leonro@mellanox.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
drivers/infiniband/core/uverbs.h | 1 drivers/infiniband/core/uverbs_main.c | 52 ++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-)
--- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -160,6 +160,7 @@ struct ib_uverbs_file {
struct mutex umap_lock; struct list_head umaps;
struct page *disassociate_page;
struct idr idr; /* spinlock protects write access to idr */
--- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); put_device(&file->device->dev);
- if (file->disassociate_page)
kfree(file);__free_pages(file->disassociate_page, 0);
}
@@ -876,9 +879,50 @@ static void rdma_umap_close(struct vm_ar kfree(priv); }
+/*
- Once the zap_vma_ptes has been called touches to the VMA will come here and
- we return a dummy writable zero page for all the pfns.
- */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf) +{
- struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
- struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
- vm_fault_t ret = 0;
- if (!priv)
return VM_FAULT_SIGBUS;
- /* Read only pages can just use the system zero page. */
- if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
vmf->page = ZERO_PAGE(vmf->vm_start);
get_page(vmf->page);
return 0;
- }
- mutex_lock(&ufile->umap_lock);
- if (!ufile->disassociate_page)
ufile->disassociate_page =
alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
- if (ufile->disassociate_page) {
/*
* This VMA is forced to always be shared so this doesn't have
* to worry about COW.
*/
vmf->page = ufile->disassociate_page;
get_page(vmf->page);
- } else {
ret = VM_FAULT_SIGBUS;
- }
- mutex_unlock(&ufile->umap_lock);
- return ret;
+}
static const struct vm_operations_struct rdma_umap_ops = { .open = rdma_umap_open, .close = rdma_umap_close,
- .fault = rdma_umap_fault,
};
static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext, @@ -888,6 +932,9 @@ static struct rdma_umap_priv *rdma_user_ struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv;
- if (!(vma->vm_flags & VM_SHARED))
return ERR_PTR(-EINVAL);
- if (vma->vm_end - vma->vm_start != size) return ERR_PTR(-EINVAL);
@@ -991,7 +1038,7 @@ void uverbs_user_mmap_disassociate(struc * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal. */
down_write(&mm->mmap_sem);
if (!mmget_still_valid(mm)) goto skip_mm; mutex_lock(&ufile->umap_lock);down_read(&mm->mmap_sem);
@@ -1005,11 +1052,10 @@ void uverbs_user_mmap_disassociate(struc
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
} mutex_unlock(&ufile->umap_lock); skip_mm:vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
up_write(&mm->mmap_sem);
mmput(mm); }up_read(&mm->mmap_sem);
}
Patches currently in stable-queue which might be from jgg@mellanox.com are
queue-5.0/rdma-ucontext-fix-regression-with-disassociate.patch queue-5.0/ib-rdmavt-fix-frwr-memory-registration.patch queue-5.0/rdma-mlx5-use-rdma_user_map_io-for-mapping-bar-pages.patch queue-5.0/rdma-mlx5-do-not-allow-the-user-to-write-to-the-clock-page.patch