The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 81f7eb00ff5bb8326e82503a32809421d14abb8a Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm(a)suse.com>
Date: Tue, 11 Feb 2020 15:25:37 +0800
Subject: [PATCH] btrfs: destroy qgroup extent records on transaction abort
We clean up the delayed references when we abort a transaction but we
leave the pending qgroup extent records behind, leaking memory.
This patch destroys the extent records when we destroy the delayed refs
and makes sure ensure they're gone before releasing the transaction.
Fixes: 3368d001ba5d ("btrfs: qgroup: Record possible quota-related extent for qgroup.")
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Jeff Mahoney <jeffm(a)suse.com>
[ Rebased to latest upstream, remove to_qgroup() helper, use
rbtree_postorder_for_each_entry_safe() wrapper ]
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 89422aa8e9d1..d7fec89974cb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4276,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
cond_resched();
spin_lock(&delayed_refs->lock);
}
+ btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98d9a50352d6..ff1870ff3474 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4002,3 +4002,16 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
}
return ret;
}
+
+void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
+{
+ struct btrfs_qgroup_extent_record *entry;
+ struct btrfs_qgroup_extent_record *next;
+ struct rb_root *root;
+
+ root = &trans->delayed_refs.dirty_extent_root;
+ rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
+ ulist_free(entry->old_roots);
+ kfree(entry);
+ }
+}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 236f12224d52..1bc654459469 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -414,5 +414,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
u64 last_snapshot);
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb);
+void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 33dcc88b428a..beb6c69cd1e5 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -121,6 +121,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
BUG_ON(!list_empty(&transaction->list));
WARN_ON(!RB_EMPTY_ROOT(
&transaction->delayed_refs.href_root.rb_root));
+ WARN_ON(!RB_EMPTY_ROOT(
+ &transaction->delayed_refs.dirty_extent_root));
if (transaction->delayed_refs.pending_csums)
btrfs_err(transaction->fs_info,
"pending csums is %llu",
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 81f7eb00ff5bb8326e82503a32809421d14abb8a Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm(a)suse.com>
Date: Tue, 11 Feb 2020 15:25:37 +0800
Subject: [PATCH] btrfs: destroy qgroup extent records on transaction abort
We clean up the delayed references when we abort a transaction but we
leave the pending qgroup extent records behind, leaking memory.
This patch destroys the extent records when we destroy the delayed refs
and makes sure ensure they're gone before releasing the transaction.
Fixes: 3368d001ba5d ("btrfs: qgroup: Record possible quota-related extent for qgroup.")
CC: stable(a)vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: Jeff Mahoney <jeffm(a)suse.com>
[ Rebased to latest upstream, remove to_qgroup() helper, use
rbtree_postorder_for_each_entry_safe() wrapper ]
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 89422aa8e9d1..d7fec89974cb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4276,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
cond_resched();
spin_lock(&delayed_refs->lock);
}
+ btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98d9a50352d6..ff1870ff3474 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4002,3 +4002,16 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
}
return ret;
}
+
+void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
+{
+ struct btrfs_qgroup_extent_record *entry;
+ struct btrfs_qgroup_extent_record *next;
+ struct rb_root *root;
+
+ root = &trans->delayed_refs.dirty_extent_root;
+ rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
+ ulist_free(entry->old_roots);
+ kfree(entry);
+ }
+}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 236f12224d52..1bc654459469 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -414,5 +414,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
u64 last_snapshot);
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb);
+void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 33dcc88b428a..beb6c69cd1e5 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -121,6 +121,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
BUG_ON(!list_empty(&transaction->list));
WARN_ON(!RB_EMPTY_ROOT(
&transaction->delayed_refs.href_root.rb_root));
+ WARN_ON(!RB_EMPTY_ROOT(
+ &transaction->delayed_refs.dirty_extent_root));
if (transaction->delayed_refs.pending_csums)
btrfs_err(transaction->fs_info,
"pending csums is %llu",
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 91a5f413af596ad01097e59bf487eb07cb3f1331 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Date: Thu, 20 Feb 2020 18:22:05 +0100
Subject: [PATCH] KVM: nVMX: handle nested posted interrupts when apicv is
disabled for L1
Even when APICv is disabled for L1 it can (and, actually, is) still
available for L2, this means we need to always call
vmx_deliver_nested_posted_interrupt() when attempting an interrupt
delivery.
Suggested-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 40a0c0fd95ca..a84e8c5acda8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1146,7 +1146,7 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afcd30d44cbb..cc8ee8125712 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1046,11 +1046,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (vcpu->arch.apicv_active)
- kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
- else {
+ if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
-
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a391b29138f0..8787a123b8e7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5258,8 +5258,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
+ if (!vcpu->arch.apicv_active)
+ return -1;
+
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
@@ -5271,6 +5274,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
+
+ return 0;
}
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index dafe4df893c8..63ccc435a602 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3822,24 +3822,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
* 2. If target vcpu isn't running(root mode), kick it to pick up the
* interrupt from PIR in next vmentry.
*/
-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int r;
r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
if (!r)
- return;
+ return 0;
+
+ if (!vcpu->arch.apicv_active)
+ return -1;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
- return;
+ return 0;
/* If a previous notification has sent the IPI, nothing to do. */
if (pi_test_and_set_on(&vmx->pi_desc))
- return;
+ return 0;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
+
+ return 0;
}
/*
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 91a5f413af596ad01097e59bf487eb07cb3f1331 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Date: Thu, 20 Feb 2020 18:22:05 +0100
Subject: [PATCH] KVM: nVMX: handle nested posted interrupts when apicv is
disabled for L1
Even when APICv is disabled for L1 it can (and, actually, is) still
available for L2, this means we need to always call
vmx_deliver_nested_posted_interrupt() when attempting an interrupt
delivery.
Suggested-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 40a0c0fd95ca..a84e8c5acda8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1146,7 +1146,7 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afcd30d44cbb..cc8ee8125712 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1046,11 +1046,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (vcpu->arch.apicv_active)
- kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
- else {
+ if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
-
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a391b29138f0..8787a123b8e7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5258,8 +5258,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
+ if (!vcpu->arch.apicv_active)
+ return -1;
+
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
@@ -5271,6 +5274,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
+
+ return 0;
}
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index dafe4df893c8..63ccc435a602 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3822,24 +3822,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
* 2. If target vcpu isn't running(root mode), kick it to pick up the
* interrupt from PIR in next vmentry.
*/
-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int r;
r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
if (!r)
- return;
+ return 0;
+
+ if (!vcpu->arch.apicv_active)
+ return -1;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
- return;
+ return 0;
/* If a previous notification has sent the IPI, nothing to do. */
if (pi_test_and_set_on(&vmx->pi_desc))
- return;
+ return 0;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
+
+ return 0;
}
/*
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4443267800af240072280c44521caab61924e55 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Date: Thu, 20 Feb 2020 18:22:04 +0100
Subject: [PATCH] KVM: nVMX: clear PIN_BASED_POSTED_INTR from nested
pinbased_ctls only when apicv is globally disabled
When apicv is disabled on a vCPU (e.g. by enabling KVM_CAP_HYPERV_SYNIC*),
nothing happens to VMX MSRs on the already existing vCPUs, however, all new
ones are created with PIN_BASED_POSTED_INTR filtered out. This is very
confusing and results in the following picture inside the guest:
$ rdmsr -ax 0x48d
ff00000016
7f00000016
7f00000016
7f00000016
This is observed with QEMU and 4-vCPU guest: QEMU creates vCPU0, does
KVM_CAP_HYPERV_SYNIC2 and then creates the remaining three.
L1 hypervisor may only check CPU0's controls to find out what features
are available and it will be very confused later. Switch to setting
PIN_BASED_POSTED_INTR control based on global 'enable_apicv' setting.
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 283bdb7071af..f486e2606247 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
+extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a5757b0b80f9..2b3ba7d27be4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5949,8 +5949,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5977,7 +5976,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index fc874d4ead0f..1c5fbff45d69 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv);
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 63ccc435a602..404dafedd778 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -6769,8 +6769,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(vcpu));
+ vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@@ -6851,8 +6850,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
- enable_apicv);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -7714,7 +7712,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept, enable_apicv);
+ vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4443267800af240072280c44521caab61924e55 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Date: Thu, 20 Feb 2020 18:22:04 +0100
Subject: [PATCH] KVM: nVMX: clear PIN_BASED_POSTED_INTR from nested
pinbased_ctls only when apicv is globally disabled
When apicv is disabled on a vCPU (e.g. by enabling KVM_CAP_HYPERV_SYNIC*),
nothing happens to VMX MSRs on the already existing vCPUs, however, all new
ones are created with PIN_BASED_POSTED_INTR filtered out. This is very
confusing and results in the following picture inside the guest:
$ rdmsr -ax 0x48d
ff00000016
7f00000016
7f00000016
7f00000016
This is observed with QEMU and 4-vCPU guest: QEMU creates vCPU0, does
KVM_CAP_HYPERV_SYNIC2 and then creates the remaining three.
L1 hypervisor may only check CPU0's controls to find out what features
are available and it will be very confused later. Switch to setting
PIN_BASED_POSTED_INTR control based on global 'enable_apicv' setting.
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 283bdb7071af..f486e2606247 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
+extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a5757b0b80f9..2b3ba7d27be4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5949,8 +5949,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5977,7 +5976,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index fc874d4ead0f..1c5fbff45d69 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv);
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 63ccc435a602..404dafedd778 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -6769,8 +6769,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(vcpu));
+ vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@@ -6851,8 +6850,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
- enable_apicv);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -7714,7 +7712,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept, enable_apicv);
+ vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4443267800af240072280c44521caab61924e55 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Date: Thu, 20 Feb 2020 18:22:04 +0100
Subject: [PATCH] KVM: nVMX: clear PIN_BASED_POSTED_INTR from nested
pinbased_ctls only when apicv is globally disabled
When apicv is disabled on a vCPU (e.g. by enabling KVM_CAP_HYPERV_SYNIC*),
nothing happens to VMX MSRs on the already existing vCPUs, however, all new
ones are created with PIN_BASED_POSTED_INTR filtered out. This is very
confusing and results in the following picture inside the guest:
$ rdmsr -ax 0x48d
ff00000016
7f00000016
7f00000016
7f00000016
This is observed with QEMU and 4-vCPU guest: QEMU creates vCPU0, does
KVM_CAP_HYPERV_SYNIC2 and then creates the remaining three.
L1 hypervisor may only check CPU0's controls to find out what features
are available and it will be very confused later. Switch to setting
PIN_BASED_POSTED_INTR control based on global 'enable_apicv' setting.
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 283bdb7071af..f486e2606247 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
+extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a5757b0b80f9..2b3ba7d27be4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5949,8 +5949,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5977,7 +5976,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index fc874d4ead0f..1c5fbff45d69 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv);
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 63ccc435a602..404dafedd778 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -6769,8 +6769,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(vcpu));
+ vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@@ -6851,8 +6850,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
- enable_apicv);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -7714,7 +7712,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept, enable_apicv);
+ vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4443267800af240072280c44521caab61924e55 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Date: Thu, 20 Feb 2020 18:22:04 +0100
Subject: [PATCH] KVM: nVMX: clear PIN_BASED_POSTED_INTR from nested
pinbased_ctls only when apicv is globally disabled
When apicv is disabled on a vCPU (e.g. by enabling KVM_CAP_HYPERV_SYNIC*),
nothing happens to VMX MSRs on the already existing vCPUs, however, all new
ones are created with PIN_BASED_POSTED_INTR filtered out. This is very
confusing and results in the following picture inside the guest:
$ rdmsr -ax 0x48d
ff00000016
7f00000016
7f00000016
7f00000016
This is observed with QEMU and 4-vCPU guest: QEMU creates vCPU0, does
KVM_CAP_HYPERV_SYNIC2 and then creates the remaining three.
L1 hypervisor may only check CPU0's controls to find out what features
are available and it will be very confused later. Switch to setting
PIN_BASED_POSTED_INTR control based on global 'enable_apicv' setting.
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 283bdb7071af..f486e2606247 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
+extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a5757b0b80f9..2b3ba7d27be4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5949,8 +5949,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5977,7 +5976,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index fc874d4ead0f..1c5fbff45d69 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv);
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 63ccc435a602..404dafedd778 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -6769,8 +6769,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(vcpu));
+ vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@@ -6851,8 +6850,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
- enable_apicv);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -7714,7 +7712,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept, enable_apicv);
+ vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
The patch below does not apply to the 5.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2e0a576511f656933adfe56ef03b9cf3e64b21b7 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Thu, 13 Feb 2020 16:04:11 +0200
Subject: [PATCH] drm/i915/dsc: force full modeset whenever DSC is enabled at
probe
We lack full state readout of DSC config, which may lead to DSC enable
using a config that's all zeros, failing spectacularly. Force full
modeset and thus compute config at probe to get a sane state, until we
implement DSC state readout. Any fastset that did appear to work with
DSC at probe, worked by coincidence. [1] is an example of a change that
triggered the issue on TGL DSI DSC.
[1] http://patchwork.freedesktop.org/patch/msgid/20200212150102.7600-1-ville.sy…
Cc: Manasi Navare <manasi.d.navare(a)intel.com>
Cc: Vandita Kulkarni <vandita.kulkarni(a)intel.com>
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Fixes: fbacb15ea814 ("drm/i915/dsc: add basic hardware state readout support")
Acked-by: Matt Roper <matthew.d.roper(a)intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200213140412.32697-3-stanis…
(cherry picked from commit a4277aa398d76db109d6b8420934f68daf69a6c3)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 064dd99bbc49..e68ec25fc97c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -17433,6 +17433,24 @@ static int intel_initial_commit(struct drm_device *dev)
* have readout for pipe gamma enable.
*/
crtc_state->uapi.color_mgmt_changed = true;
+
+ /*
+ * FIXME hack to force full modeset when DSC is being
+ * used.
+ *
+ * As long as we do not have full state readout and
+ * config comparison of crtc_state->dsc, we have no way
+ * to ensure reliable fastset. Remove once we have
+ * readout for DSC.
+ */
+ if (crtc_state->dsc.compression_enable) {
+ ret = drm_atomic_add_affected_connectors(state,
+ &crtc->base);
+ if (ret)
+ goto out;
+ crtc_state->uapi.mode_changed = true;
+ drm_dbg_kms(dev, "Force full modeset for DSC\n");
+ }
}
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b1339ecac661e1cf3e1dc78ac56bff3aeeaeb92c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Fri, 7 Feb 2020 21:14:52 +0000
Subject: [PATCH] drm/i915/execlists: Always force a context reload when
rewinding RING_TAIL
If we rewind the RING_TAIL on a context, due to a preemption event, we
must force the context restore for the RING_TAIL update to be properly
handled. Rather than note which preemption events may cause us to rewind
the tail, compare the new request's tail with the previously submitted
RING_TAIL, as it turns out that timeslicing was causing unexpected
rewinds.
<idle>-0 0d.s2 1280851190us : __execlists_submission_tasklet: 0000:00:02.0 rcs0: expired last=130:4698, prio=3, hint=3
<idle>-0 0d.s2 1280851192us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 66:119966, current 119964
<idle>-0 0d.s2 1280851195us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4698, current 4695
<idle>-0 0d.s2 1280851198us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4696, current 4695
^---- Note we unwind 2 requests from the same context
<idle>-0 0d.s2 1280851208us : __i915_request_submit: 0000:00:02.0 rcs0: fence 130:4696, current 4695
<idle>-0 0d.s2 1280851213us : __i915_request_submit: 0000:00:02.0 rcs0: fence 134:1508, current 1506
^---- But to apply the new timeslice, we have to replay the first request
before the new client can start -- the unexpected RING_TAIL rewind
<idle>-0 0d.s2 1280851219us : trace_ports: 0000:00:02.0 rcs0: submit { 130:4696*, 134:1508 }
synmark2-5425 2..s. 1280851239us : process_csb: 0000:00:02.0 rcs0: cs-irq head=5, tail=0
synmark2-5425 2..s. 1280851240us : process_csb: 0000:00:02.0 rcs0: csb[0]: status=0x00008002:0x00000000
^---- Preemption event for the ELSP update; note the lite-restore
synmark2-5425 2..s. 1280851243us : trace_ports: 0000:00:02.0 rcs0: preempted { 130:4698, 66:119966 }
synmark2-5425 2..s. 1280851246us : trace_ports: 0000:00:02.0 rcs0: promote { 130:4696*, 134:1508 }
synmark2-5425 2.... 1280851462us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4700, current 4695
synmark2-5425 2.... 1280852111us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4702, current 4695
synmark2-5425 2.Ns1 1280852296us : process_csb: 0000:00:02.0 rcs0: cs-irq head=0, tail=2
synmark2-5425 2.Ns1 1280852297us : process_csb: 0000:00:02.0 rcs0: csb[1]: status=0x00000814:0x00000000
synmark2-5425 2.Ns1 1280852299us : trace_ports: 0000:00:02.0 rcs0: completed { 130:4696!, 134:1508 }
synmark2-5425 2.Ns1 1280852301us : process_csb: 0000:00:02.0 rcs0: csb[2]: status=0x00000818:0x00000040
synmark2-5425 2.Ns1 1280852302us : trace_ports: 0000:00:02.0 rcs0: completed { 134:1508, 0:0 }
synmark2-5425 2.Ns1 1280852313us : process_csb: process_csb:2336 GEM_BUG_ON(!i915_request_completed(*execlists->active) && !reset_in_progress(execlists))
Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Referenecs: 82c69bf58650 ("drm/i915/gt: Detect if we miss WaIdleLiteRestore")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v5.4+
Link: https://patchwork.freedesktop.org/patch/msgid/20200207211452.2860634-1-chri…
(cherry picked from commit 5ba32c7be81e53ea8a27190b0f6be98e6c6779af)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0a8a2c8026f1..438d7a97d45c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1321,7 +1321,7 @@ static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
u64 desc = ce->lrc_desc;
- u32 tail;
+ u32 tail, prev;
/*
* WaIdleLiteRestore:bdw,skl
@@ -1334,9 +1334,15 @@ static u64 execlists_update_context(struct i915_request *rq)
* subsequent resubmissions (for lite restore). Should that fail us,
* and we try and submit the same tail again, force the context
* reload.
+ *
+ * If we need to return to a preempted context, we need to skip the
+ * lite-restore and force it to reload the RING_TAIL. Otherwise, the
+ * HW has a tendency to ignore us rewinding the TAIL to the end of
+ * an earlier request.
*/
tail = intel_ring_set_tail(rq->ring, rq->tail);
- if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ prev = ce->lrc_reg_state[CTX_RING_TAIL];
+ if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
desc |= CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state[CTX_RING_TAIL] = tail;
rq->tail = rq->wa_tail;
@@ -1839,14 +1845,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
__unwind_incomplete_requests(engine);
- /*
- * If we need to return to the preempted context, we
- * need to skip the lite-restore and force it to
- * reload the RING_TAIL. Otherwise, the HW has a
- * tendency to ignore us rewinding the TAIL to the
- * end of an earlier request.
- */
- last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
last = NULL;
} else if (need_timeslice(engine, last) &&
timer_expired(&engine->execlists.timer)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 374b28f13ca0..6ff803f397c4 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -145,6 +145,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
kref_init(&ring->ref);
ring->size = size;
+ ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size);
/*
* Workaround an erratum on the i830 which causes a hang if
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
index ea2839d9e044..5bdce24994aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -56,6 +56,14 @@ static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
return pos & (ring->size - 1);
}
+static inline int intel_ring_direction(const struct intel_ring *ring,
+ u32 next, u32 prev)
+{
+ typecheck(typeof(ring->size), next);
+ typecheck(typeof(ring->size), prev);
+ return (next - prev) << ring->wrap;
+}
+
static inline bool
intel_ring_offset_valid(const struct intel_ring *ring,
unsigned int pos)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
index d9f17f38e0cc..3cd7fec7fd8d 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -45,6 +45,7 @@ struct intel_ring {
u32 space;
u32 size;
+ u32 wrap;
u32 effective_size;
};