This reverts commit 9bbd42e79720122334226afad9ddcac1c3e6d373, which
was commit 17839856fd588f4ab6b789f482ed3ffd7c403e1f upstream. The
backport was incorrect and incomplete:
* It forced the write flag on in the generic __get_user_pages_fast(),
whereas only get_user_pages_fast() was supposed to do that.
* It only fixed the generic RCU-based implementation used by arm,
arm64, and powerpc. Before Linux 4.13, several other architectures
had their own implementations: mips, s390, sparc, sh, and x86.
This will be followed by a (hopefully) correct backport.
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: stable(a)vger.kernel.org
---
mm/gup.c | 48 ++++++++----------------------------------------
mm/huge_memory.c | 7 ++++---
2 files changed, 12 insertions(+), 43 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index 301dd96ef176..6bb7a8eb7f82 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -61,22 +61,13 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
}
/*
- * FOLL_FORCE or a forced COW break can write even to unwritable pte's,
- * but only after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
*/
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
- return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte));
-}
-
-/*
- * A (separate) COW fault might break the page the other way and
- * get_user_pages() would return the page from what is now the wrong
- * VM. So we need to force a COW break at GUP time even for reads.
- */
-static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags)
-{
- return is_cow_mapping(vma->vm_flags) && (flags & FOLL_GET);
+ return pte_write(pte) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -586,18 +577,12 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (!vma || check_vma_flags(vma, gup_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
- if (should_force_cow_break(vma, foll_flags))
- foll_flags |= FOLL_WRITE;
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
- foll_flags);
+ gup_flags);
continue;
}
}
-
- if (should_force_cow_break(vma, foll_flags))
- foll_flags |= FOLL_WRITE;
-
retry:
/*
* If we have a pending SIGKILL, don't keep faulting pages and
@@ -1518,10 +1503,6 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
- *
- * Careful, careful! COW breaking can go either way, so a non-write
- * access can get ambiguous page results. If you call this function without
- * 'write' set, you'd better be sure that you're ok with that ambiguity.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
@@ -1551,12 +1532,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
*
* We do not adopt an rcu_read_lock(.) here as we also want to
* block IPIs that come from THPs splitting.
- *
- * NOTE! We allow read-only gup_fast() here, but you'd better be
- * careful about possible COW pages. You'll get _a_ COW page, but
- * not necessarily the one you intended to get depending on what
- * COW event happens after this. COW may break the page copy in a
- * random direction.
*/
local_irq_save(flags);
@@ -1567,22 +1542,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
- /*
- * The FAST_GUP case requires FOLL_WRITE even for pure reads,
- * because get_user_pages() may need to cause an early COW in
- * order to avoid confusing the normal COW routines. So only
- * targets that are already writable are safe to do by just
- * looking at the page tables.
- */
if (unlikely(pgd_huge(pgd))) {
- if (!gup_huge_pgd(pgd, pgdp, addr, next, 1,
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
pages, &nr))
break;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, 1, pages, &nr))
+ PGDIR_SHIFT, next, write, pages, &nr))
break;
- } else if (!gup_pud_range(pgd, addr, next, 1, pages, &nr))
+ } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3f3a86cc62b6..91f33bb43f17 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1135,12 +1135,13 @@ int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd)
}
/*
- * FOLL_FORCE or a forced COW break can write even to unwritable pmd's,
- * but only after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE can write to even unwritable pmd's, but only
+ * after we've gone through a COW cycle and they are dirty.
*/
static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{
- return pmd_write(pmd) || ((flags & FOLL_COW) && pmd_dirty(pmd));
+ return pmd_write(pmd) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
This is a backport of the recently merged "[PATCH v3 0/4] KVM: x86:
Partially allow KVM_SET_CPUID{,2} after KVM_RUN for CPU hotplug"
(https://lore.kernel.org/kvm/20220118141801.2219924-1-vkuznets@redhat.com/)
Original description:
Recently, KVM made it illegal to change CPUID after KVM_RUN but
unfortunately this change is not fully compatible with existing VMMs.
In particular, QEMU reuses vCPU fds for CPU hotplug after unplug and it
calls KVM_SET_CPUID2. Relax the requirement by implementing an allowing
KVM_SET_CPUID{,2} with the exact same data.
Vitaly Kuznetsov (4):
KVM: x86: Do runtime CPUID update before updating
vcpu->arch.cpuid_entries
KVM: x86: Partially allow KVM_SET_CPUID{,2} after KVM_RUN
KVM: selftests: Rename 'get_cpuid_test' to 'cpuid_test'
KVM: selftests: Test KVM_SET_CPUID2 after KVM_RUN
arch/x86/kvm/cpuid.c | 90 ++++++++++++++-----
arch/x86/kvm/x86.c | 19 ----
tools/testing/selftests/kvm/.gitignore | 2 +-
tools/testing/selftests/kvm/Makefile | 4 +-
.../selftests/kvm/include/x86_64/processor.h | 7 ++
.../selftests/kvm/lib/x86_64/processor.c | 33 ++++++-
.../x86_64/{get_cpuid_test.c => cpuid_test.c} | 30 +++++++
7 files changed, 139 insertions(+), 46 deletions(-)
rename tools/testing/selftests/kvm/x86_64/{get_cpuid_test.c => cpuid_test.c} (83%)
--
2.34.1
Hi stable maintainers,
On 06.04.21 03:47, Yoshio Furuyama wrote:
> From: "Doyle, Patrick" <pdoyle(a)irobot.com>
>
> In the unlikely event that both blocks 10 and 11 are marked as bad (on a
> 32 bit machine), then the process of marking block 10 as bad stomps on
> cached entry for block 11. There are (of course) other examples.
>
> Signed-off-by: Patrick Doyle <pdoyle(a)irobot.com>
> Reviewed-by: Richard Weinberger <richard(a)nod.at>
We have systems on which this patch fixes real failures. Could you
please add the upstream patch fd0d8d85f723 ("mtd: nand: bbt: Fix corner
case in bad block table handling") to the stable queues for 4.19, 5.4, 5.10?
Thanks!
Cc: stable(a)vger.kernel.org
Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with
NAND devices")
> ---
> drivers/mtd/nand/bbt.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/mtd/nand/bbt.c b/drivers/mtd/nand/bbt.c
> index 044adf913854..64af6898131d 100644
> --- a/drivers/mtd/nand/bbt.c
> +++ b/drivers/mtd/nand/bbt.c
> @@ -123,7 +123,7 @@ int nanddev_bbt_set_block_status(struct nand_device *nand, unsigned int entry,
> unsigned int rbits = bits_per_block + offs - BITS_PER_LONG;
>
> pos[1] &= ~GENMASK(rbits - 1, 0);
> - pos[1] |= val >> rbits;
> + pos[1] |= val >> (bits_per_block - rbits);
> }
>
> return 0;
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 802d4d207e75d7208ff75adb712b556c1e91cf1c Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc(a)marvell.com>
Date: Fri, 17 Dec 2021 08:55:52 -0800
Subject: [PATCH] bnx2x: Invalidate fastpath HSI version for VFs
Commit 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
added validation for fastpath HSI versions for different
client init which was not meant for SR-IOV VF clients, which
resulted in firmware asserts when running VF clients with
different fastpath HSI version.
This patch along with the new firmware support in patch #1
fixes this behavior in order to not validate fastpath HSI
version for the VFs.
Fixes: 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
Signed-off-by: Manish Chopra <manishc(a)marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha(a)marvell.com>
Signed-off-by: Alok Prasad <palok(a)marvell.com>
Signed-off-by: Ariel Elior <aelior(a)marvell.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 74a8931ce1d1..11d15cd03600 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
{
+ u16 abs_fid;
+
+ abs_fid = FW_VF_HANDLE(abs_vfid);
+
/* set the VF-PF association in the FW */
- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
+ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
+ storm_memset_func_en(bp, abs_fid, 1);
+
+ /* Invalidate fp_hsi version for vfs */
+ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
+ REG_WR8(bp, BAR_XSTRORM_INTMEM +
+ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
/* clear vf errors*/
bnx2x_vf_semi_clear_err(bp, abs_vfid);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 802d4d207e75d7208ff75adb712b556c1e91cf1c Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc(a)marvell.com>
Date: Fri, 17 Dec 2021 08:55:52 -0800
Subject: [PATCH] bnx2x: Invalidate fastpath HSI version for VFs
Commit 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
added validation for fastpath HSI versions for different
client init which was not meant for SR-IOV VF clients, which
resulted in firmware asserts when running VF clients with
different fastpath HSI version.
This patch along with the new firmware support in patch #1
fixes this behavior in order to not validate fastpath HSI
version for the VFs.
Fixes: 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
Signed-off-by: Manish Chopra <manishc(a)marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha(a)marvell.com>
Signed-off-by: Alok Prasad <palok(a)marvell.com>
Signed-off-by: Ariel Elior <aelior(a)marvell.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 74a8931ce1d1..11d15cd03600 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf)
void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
{
+ u16 abs_fid;
+
+ abs_fid = FW_VF_HANDLE(abs_vfid);
+
/* set the VF-PF association in the FW */
- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
+ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
+ storm_memset_func_en(bp, abs_fid, 1);
+
+ /* Invalidate fp_hsi version for vfs */
+ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
+ REG_WR8(bp, BAR_XSTRORM_INTMEM +
+ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
/* clear vf errors*/
bnx2x_vf_semi_clear_err(bp, abs_vfid);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 87c01d57fa23de82fff593a7d070933d08755801 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple(a)nvidia.com>
Date: Fri, 14 Jan 2022 14:09:31 -0800
Subject: [PATCH] mm/hmm.c: allow VM_MIXEDMAP to work with hmm_range_fault
hmm_range_fault() can be used instead of get_user_pages() for devices
which allow faulting however unlike get_user_pages() it will return an
error when used on a VM_MIXEDMAP range.
To make hmm_range_fault() more closely match get_user_pages() remove
this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
case in hmm_vma_handle_pte(). Rather than replicating the logic of
vm_normal_page() call it directly and do a check for the zero pfn
similar to what get_user_pages() currently does.
Also add a test to hmm selftest to verify functionality.
Link: https://lkml.kernel.org/r/20211104012001.2555676-1-apopple@nvidia.com
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Alistair Popple <apopple(a)nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: Ralph Campbell <rcampbell(a)nvidia.com>
Cc: Felix Kuehling <Felix.Kuehling(a)amd.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e2ce8f9b7605..767538089a62 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1086,9 +1086,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
return 0;
}
+static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ struct page *page;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ ret = vm_insert_page(vma, addr, page);
+ if (ret) {
+ __free_page(page);
+ return ret;
+ }
+ put_page(page);
+ }
+
+ return 0;
+}
+
static const struct file_operations dmirror_fops = {
.open = dmirror_fops_open,
.release = dmirror_fops_release,
+ .mmap = dmirror_fops_mmap,
.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
diff --git a/mm/hmm.c b/mm/hmm.c
index 842e26599238..bd56641c79d4 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -300,7 +300,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
* Since each architecture defines a struct page for the zero page, just
* fall through and treat it like a normal page.
*/
- if (pte_special(pte) && !pte_devmap(pte) &&
+ if (!vm_normal_page(walk->vma, addr, pte) &&
+ !pte_devmap(pte) &&
!is_zero_pfn(pte_pfn(pte))) {
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
pte_unmap(ptep);
@@ -518,7 +519,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
vma->vm_flags & VM_READ)
return 0;
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1248,6 +1248,48 @@ TEST_F(hmm, anon_teardown)
}
}
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/