The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f8914a14623a79b73f72b2b1ee4cd9b2cb91b735 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey(a)gmail.com>
Date: Fri, 6 Sep 2019 23:54:23 +0200
Subject: [PATCH] ath10k: restore QCA9880-AR1A (v1) detection
This patch restores the old behavior that read
the chip_id on the QCA988x before resetting the
chip. This needs to be done in this order since
the unsupported QCA988x AR1A chips fall off the
bus when resetted. Otherwise the next MMIO Op
after the reset causes a BUS ERROR and panic.
Cc: stable(a)vger.kernel.org
Fixes: 1a7fecb766c8 ("ath10k: reset chip before reading chip_id in probe")
Signed-off-by: Christian Lamparter <chunkeey(a)gmail.com>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index bc3dc79de01a..bb44f5a0941b 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -3486,7 +3486,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
struct ath10k_pci *ar_pci;
enum ath10k_hw_rev hw_rev;
struct ath10k_bus_params bus_params = {};
- bool pci_ps;
+ bool pci_ps, is_qca988x = false;
int (*pci_soft_reset)(struct ath10k *ar);
int (*pci_hard_reset)(struct ath10k *ar);
u32 (*targ_cpu_to_ce_addr)(struct ath10k *ar, u32 addr);
@@ -3496,6 +3496,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
case QCA988X_2_0_DEVICE_ID:
hw_rev = ATH10K_HW_QCA988X;
pci_ps = false;
+ is_qca988x = true;
pci_soft_reset = ath10k_pci_warm_reset;
pci_hard_reset = ath10k_pci_qca988x_chip_reset;
targ_cpu_to_ce_addr = ath10k_pci_qca988x_targ_cpu_to_ce_addr;
@@ -3615,25 +3616,34 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
goto err_deinit_irq;
}
+ bus_params.dev_type = ATH10K_DEV_TYPE_LL;
+ bus_params.link_can_suspend = true;
+ /* Read CHIP_ID before reset to catch QCA9880-AR1A v1 devices that
+ * fall off the bus during chip_reset. These chips have the same pci
+ * device id as the QCA9880 BR4A or 2R4E. So that's why the check.
+ */
+ if (is_qca988x) {
+ bus_params.chip_id =
+ ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
+ if (bus_params.chip_id != 0xffffffff) {
+ if (!ath10k_pci_chip_is_supported(pdev->device,
+ bus_params.chip_id))
+ goto err_unsupported;
+ }
+ }
+
ret = ath10k_pci_chip_reset(ar);
if (ret) {
ath10k_err(ar, "failed to reset chip: %d\n", ret);
goto err_free_irq;
}
- bus_params.dev_type = ATH10K_DEV_TYPE_LL;
- bus_params.link_can_suspend = true;
bus_params.chip_id = ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
- if (bus_params.chip_id == 0xffffffff) {
- ath10k_err(ar, "failed to get chip id\n");
- goto err_free_irq;
- }
+ if (bus_params.chip_id == 0xffffffff)
+ goto err_unsupported;
- if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id)) {
- ath10k_err(ar, "device %04x with chip_id %08x isn't supported\n",
- pdev->device, bus_params.chip_id);
+ if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id))
goto err_free_irq;
- }
ret = ath10k_core_register(ar, &bus_params);
if (ret) {
@@ -3643,6 +3653,10 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
return 0;
+err_unsupported:
+ ath10k_err(ar, "device %04x with chip_id %08x isn't supported\n",
+ pdev->device, bus_params.chip_id);
+
err_free_irq:
ath10k_pci_free_irq(ar);
ath10k_pci_rx_retry_sync(ar);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f8914a14623a79b73f72b2b1ee4cd9b2cb91b735 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey(a)gmail.com>
Date: Fri, 6 Sep 2019 23:54:23 +0200
Subject: [PATCH] ath10k: restore QCA9880-AR1A (v1) detection
This patch restores the old behavior that read
the chip_id on the QCA988x before resetting the
chip. This needs to be done in this order since
the unsupported QCA988x AR1A chips fall off the
bus when resetted. Otherwise the next MMIO Op
after the reset causes a BUS ERROR and panic.
Cc: stable(a)vger.kernel.org
Fixes: 1a7fecb766c8 ("ath10k: reset chip before reading chip_id in probe")
Signed-off-by: Christian Lamparter <chunkeey(a)gmail.com>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index bc3dc79de01a..bb44f5a0941b 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -3486,7 +3486,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
struct ath10k_pci *ar_pci;
enum ath10k_hw_rev hw_rev;
struct ath10k_bus_params bus_params = {};
- bool pci_ps;
+ bool pci_ps, is_qca988x = false;
int (*pci_soft_reset)(struct ath10k *ar);
int (*pci_hard_reset)(struct ath10k *ar);
u32 (*targ_cpu_to_ce_addr)(struct ath10k *ar, u32 addr);
@@ -3496,6 +3496,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
case QCA988X_2_0_DEVICE_ID:
hw_rev = ATH10K_HW_QCA988X;
pci_ps = false;
+ is_qca988x = true;
pci_soft_reset = ath10k_pci_warm_reset;
pci_hard_reset = ath10k_pci_qca988x_chip_reset;
targ_cpu_to_ce_addr = ath10k_pci_qca988x_targ_cpu_to_ce_addr;
@@ -3615,25 +3616,34 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
goto err_deinit_irq;
}
+ bus_params.dev_type = ATH10K_DEV_TYPE_LL;
+ bus_params.link_can_suspend = true;
+ /* Read CHIP_ID before reset to catch QCA9880-AR1A v1 devices that
+ * fall off the bus during chip_reset. These chips have the same pci
+ * device id as the QCA9880 BR4A or 2R4E. So that's why the check.
+ */
+ if (is_qca988x) {
+ bus_params.chip_id =
+ ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
+ if (bus_params.chip_id != 0xffffffff) {
+ if (!ath10k_pci_chip_is_supported(pdev->device,
+ bus_params.chip_id))
+ goto err_unsupported;
+ }
+ }
+
ret = ath10k_pci_chip_reset(ar);
if (ret) {
ath10k_err(ar, "failed to reset chip: %d\n", ret);
goto err_free_irq;
}
- bus_params.dev_type = ATH10K_DEV_TYPE_LL;
- bus_params.link_can_suspend = true;
bus_params.chip_id = ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
- if (bus_params.chip_id == 0xffffffff) {
- ath10k_err(ar, "failed to get chip id\n");
- goto err_free_irq;
- }
+ if (bus_params.chip_id == 0xffffffff)
+ goto err_unsupported;
- if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id)) {
- ath10k_err(ar, "device %04x with chip_id %08x isn't supported\n",
- pdev->device, bus_params.chip_id);
+ if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id))
goto err_free_irq;
- }
ret = ath10k_core_register(ar, &bus_params);
if (ret) {
@@ -3643,6 +3653,10 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
return 0;
+err_unsupported:
+ ath10k_err(ar, "device %04x with chip_id %08x isn't supported\n",
+ pdev->device, bus_params.chip_id);
+
err_free_irq:
ath10k_pci_free_irq(ar);
ath10k_pci_rx_retry_sync(ar);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f8914a14623a79b73f72b2b1ee4cd9b2cb91b735 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey(a)gmail.com>
Date: Fri, 6 Sep 2019 23:54:23 +0200
Subject: [PATCH] ath10k: restore QCA9880-AR1A (v1) detection
This patch restores the old behavior that read
the chip_id on the QCA988x before resetting the
chip. This needs to be done in this order since
the unsupported QCA988x AR1A chips fall off the
bus when resetted. Otherwise the next MMIO Op
after the reset causes a BUS ERROR and panic.
Cc: stable(a)vger.kernel.org
Fixes: 1a7fecb766c8 ("ath10k: reset chip before reading chip_id in probe")
Signed-off-by: Christian Lamparter <chunkeey(a)gmail.com>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index bc3dc79de01a..bb44f5a0941b 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -3486,7 +3486,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
struct ath10k_pci *ar_pci;
enum ath10k_hw_rev hw_rev;
struct ath10k_bus_params bus_params = {};
- bool pci_ps;
+ bool pci_ps, is_qca988x = false;
int (*pci_soft_reset)(struct ath10k *ar);
int (*pci_hard_reset)(struct ath10k *ar);
u32 (*targ_cpu_to_ce_addr)(struct ath10k *ar, u32 addr);
@@ -3496,6 +3496,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
case QCA988X_2_0_DEVICE_ID:
hw_rev = ATH10K_HW_QCA988X;
pci_ps = false;
+ is_qca988x = true;
pci_soft_reset = ath10k_pci_warm_reset;
pci_hard_reset = ath10k_pci_qca988x_chip_reset;
targ_cpu_to_ce_addr = ath10k_pci_qca988x_targ_cpu_to_ce_addr;
@@ -3615,25 +3616,34 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
goto err_deinit_irq;
}
+ bus_params.dev_type = ATH10K_DEV_TYPE_LL;
+ bus_params.link_can_suspend = true;
+ /* Read CHIP_ID before reset to catch QCA9880-AR1A v1 devices that
+ * fall off the bus during chip_reset. These chips have the same pci
+ * device id as the QCA9880 BR4A or 2R4E. So that's why the check.
+ */
+ if (is_qca988x) {
+ bus_params.chip_id =
+ ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
+ if (bus_params.chip_id != 0xffffffff) {
+ if (!ath10k_pci_chip_is_supported(pdev->device,
+ bus_params.chip_id))
+ goto err_unsupported;
+ }
+ }
+
ret = ath10k_pci_chip_reset(ar);
if (ret) {
ath10k_err(ar, "failed to reset chip: %d\n", ret);
goto err_free_irq;
}
- bus_params.dev_type = ATH10K_DEV_TYPE_LL;
- bus_params.link_can_suspend = true;
bus_params.chip_id = ath10k_pci_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
- if (bus_params.chip_id == 0xffffffff) {
- ath10k_err(ar, "failed to get chip id\n");
- goto err_free_irq;
- }
+ if (bus_params.chip_id == 0xffffffff)
+ goto err_unsupported;
- if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id)) {
- ath10k_err(ar, "device %04x with chip_id %08x isn't supported\n",
- pdev->device, bus_params.chip_id);
+ if (!ath10k_pci_chip_is_supported(pdev->device, bus_params.chip_id))
goto err_free_irq;
- }
ret = ath10k_core_register(ar, &bus_params);
if (ret) {
@@ -3643,6 +3653,10 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
return 0;
+err_unsupported:
+ ath10k_err(ar, "device %04x with chip_id %08x isn't supported\n",
+ pdev->device, bus_params.chip_id);
+
err_free_irq:
ath10k_pci_free_irq(ar);
ath10k_pci_rx_retry_sync(ar);
[ Upstream commit a78986aae9b2988f8493f9f65a587ee433e83bc3 ]
Explicitly exempt ZONE_DEVICE pages from kvm_is_reserved_pfn() and
instead manually handle ZONE_DEVICE on a case-by-case basis. For things
like page refcounts, KVM needs to treat ZONE_DEVICE pages like normal
pages, e.g. put pages grabbed via gup(). But for flows such as setting
A/D bits or shifting refcounts for transparent huge pages, KVM needs to
to avoid processing ZONE_DEVICE pages as the flows in question lack the
underlying machinery for proper handling of ZONE_DEVICE pages.
This fixes a hang reported by Adam Borowski[*] in dev_pagemap_cleanup()
when running a KVM guest backed with /dev/dax memory, as KVM straight up
doesn't put any references to ZONE_DEVICE pages acquired by gup().
Note, Dan Williams proposed an alternative solution of doing put_page()
on ZONE_DEVICE pages immediately after gup() in order to simplify the
auditing needed to ensure is_zone_device_page() is called if and only if
the backing device is pinned (via gup()). But that approach would break
kvm_vcpu_{un}map() as KVM requires the page to be pinned from map() 'til
unmap() when accessing guest memory, unlike KVM's secondary MMU, which
coordinates with mmu_notifier invalidations to avoid creating stale
page references, i.e. doesn't rely on pages being pinned.
[*] http://lkml.kernel.org/r/20190919115547.GA17963@angband.pl
Reported-by: Adam Borowski <kilobyte(a)angband.pl>
Analyzed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: stable(a)vger.kernel.org
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
[sean: backport to 4.x; resolve conflict in mmu.c]
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
---
arch/x86/kvm/mmu.c | 8 ++++----
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 26 +++++++++++++++++++++++---
3 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8cd26e50d41c..c0b0135ef07f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3177,7 +3177,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
* here.
*/
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- level == PT_PAGE_TABLE_LEVEL &&
+ !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
PageTransCompoundMap(pfn_to_page(pfn)) &&
!mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
@@ -5344,9 +5344,9 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct &&
- !kvm_is_reserved_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+ !kvm_is_zone_device_pfn(pfn) &&
+ PageTransCompoundMap(pfn_to_page(pfn))) {
drop_spte(kvm, sptep);
need_tlb_flush = 1;
goto restart;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bb4758ffd403..7668c68ddb5b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -890,6 +890,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ea61162b2b53..cdaacdf7bc87 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -142,10 +142,30 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
{
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -1730,7 +1750,7 @@ static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
@@ -1741,7 +1761,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
--
2.24.0
[ Upstream commit a78986aae9b2988f8493f9f65a587ee433e83bc3 ]
Explicitly exempt ZONE_DEVICE pages from kvm_is_reserved_pfn() and
instead manually handle ZONE_DEVICE on a case-by-case basis. For things
like page refcounts, KVM needs to treat ZONE_DEVICE pages like normal
pages, e.g. put pages grabbed via gup(). But for flows such as setting
A/D bits or shifting refcounts for transparent huge pages, KVM needs to
to avoid processing ZONE_DEVICE pages as the flows in question lack the
underlying machinery for proper handling of ZONE_DEVICE pages.
This fixes a hang reported by Adam Borowski[*] in dev_pagemap_cleanup()
when running a KVM guest backed with /dev/dax memory, as KVM straight up
doesn't put any references to ZONE_DEVICE pages acquired by gup().
Note, Dan Williams proposed an alternative solution of doing put_page()
on ZONE_DEVICE pages immediately after gup() in order to simplify the
auditing needed to ensure is_zone_device_page() is called if and only if
the backing device is pinned (via gup()). But that approach would break
kvm_vcpu_{un}map() as KVM requires the page to be pinned from map() 'til
unmap() when accessing guest memory, unlike KVM's secondary MMU, which
coordinates with mmu_notifier invalidations to avoid creating stale
page references, i.e. doesn't rely on pages being pinned.
[*] http://lkml.kernel.org/r/20190919115547.GA17963@angband.pl
Reported-by: Adam Borowski <kilobyte(a)angband.pl>
Analyzed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: stable(a)vger.kernel.org
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
[sean: backport to 4.x; resolve conflict in mmu.c]
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
---
arch/x86/kvm/mmu.c | 8 ++++----
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 26 +++++++++++++++++++++++---
3 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f0f180158c26..3a281a2decde 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2934,7 +2934,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
* here.
*/
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- level == PT_PAGE_TABLE_LEVEL &&
+ !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
PageTransCompoundMap(pfn_to_page(pfn)) &&
!mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
@@ -4890,9 +4890,9 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct &&
- !kvm_is_reserved_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+ !kvm_is_zone_device_pfn(pfn) &&
+ PageTransCompoundMap(pfn_to_page(pfn))) {
drop_spte(kvm, sptep);
need_tlb_flush = 1;
goto restart;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0590e7d47b02..ab90a8541aaa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -843,6 +843,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0fc93519e63e..c0dff5337a50 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -131,10 +131,30 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
{
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -1758,7 +1778,7 @@ static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
@@ -1769,7 +1789,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
--
2.24.0
[ Upstream commit a78986aae9b2988f8493f9f65a587ee433e83bc3 ]
Explicitly exempt ZONE_DEVICE pages from kvm_is_reserved_pfn() and
instead manually handle ZONE_DEVICE on a case-by-case basis. For things
like page refcounts, KVM needs to treat ZONE_DEVICE pages like normal
pages, e.g. put pages grabbed via gup(). But for flows such as setting
A/D bits or shifting refcounts for transparent huge pages, KVM needs to
to avoid processing ZONE_DEVICE pages as the flows in question lack the
underlying machinery for proper handling of ZONE_DEVICE pages.
This fixes a hang reported by Adam Borowski[*] in dev_pagemap_cleanup()
when running a KVM guest backed with /dev/dax memory, as KVM straight up
doesn't put any references to ZONE_DEVICE pages acquired by gup().
Note, Dan Williams proposed an alternative solution of doing put_page()
on ZONE_DEVICE pages immediately after gup() in order to simplify the
auditing needed to ensure is_zone_device_page() is called if and only if
the backing device is pinned (via gup()). But that approach would break
kvm_vcpu_{un}map() as KVM requires the page to be pinned from map() 'til
unmap() when accessing guest memory, unlike KVM's secondary MMU, which
coordinates with mmu_notifier invalidations to avoid creating stale
page references, i.e. doesn't rely on pages being pinned.
[*] http://lkml.kernel.org/r/20190919115547.GA17963@angband.pl
Reported-by: Adam Borowski <kilobyte(a)angband.pl>
Analyzed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Dan Williams <dan.j.williams(a)intel.com>
Cc: stable(a)vger.kernel.org
Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
[sean: backport to 4.x; resolve conflict in mmu.c]
Signed-off-by: Sean Christopherson <sean.j.christopherson(a)intel.com>
---
arch/x86/kvm/mmu.c | 8 ++++----
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 26 +++++++++++++++++++++++---
3 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d7db7608de5f..eddf91a0e363 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3261,7 +3261,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
* here.
*/
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- level == PT_PAGE_TABLE_LEVEL &&
+ !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
PageTransCompoundMap(pfn_to_page(pfn)) &&
!mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
@@ -5709,9 +5709,9 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct &&
- !kvm_is_reserved_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+ !kvm_is_zone_device_pfn(pfn) &&
+ PageTransCompoundMap(pfn_to_page(pfn))) {
drop_spte(kvm, sptep);
need_tlb_flush = 1;
goto restart;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 96207939d862..748016ae01e3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -911,6 +911,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7a0d86d52230..df3fc0f214ec 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -147,10 +147,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
return 0;
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -1727,7 +1747,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
@@ -1738,7 +1758,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
--
2.24.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
Commit: 7058bd1431da - net/mlx5: Update the list of the PCI supported devices
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/307910
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ selinux-policy: serge-testsuite
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Memory function: kaslr
✅ AMTU (Abstract Machine Test Utility)
✅ LTP: openposix test suite
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ stress: stress-ng
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Memory function: kaslr
✅ AMTU (Abstract Machine Test Utility)
✅ LTP: openposix test suite
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 2:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ selinux-policy: serge-testsuite
🚧 ✅ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ Storage SAN device stress - megaraid_sas
Host 2:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ selinux-policy: serge-testsuite
🚧 ✅ Storage blktests
Host 3:
✅ Boot test
✅ Storage SAN device stress - mpt3sas driver
Host 4:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Memory function: kaslr
✅ AMTU (Abstract Machine Test Utility)
✅ LTP: openposix test suite
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ stress: stress-ng
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ❌ jvm test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
Commit: 3f746337e557 - drm/i915/userptr: Try to acquire the page lock around set_page_dirty()
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/308040
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ selinux-policy: serge-testsuite
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Memory function: kaslr
✅ AMTU (Abstract Machine Test Utility)
✅ LTP: openposix test suite
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ stress: stress-ng
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Memory function: kaslr
✅ AMTU (Abstract Machine Test Utility)
✅ LTP: openposix test suite
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 2:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ selinux-policy: serge-testsuite
🚧 ✅ Storage blktests
x86_64:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.