On a system that implements FEAT_EPAN, read/write access to the idmap
is denied because UXN is not set on the swapper PTEs. As a result,
idmap_kpti_install_ng_mappings panics the kernel when accessing
__idmap_kpti_flag. Fix it by setting UXN on these PTEs.
Fixes: 18107f8a2df6 ("arm64: Support execute-only permissions with Enhanced PAN")
Cc: <stable(a)vger.kernel.org> # 5.15
Link: https://linux-review.googlesource.com/id/Ic452fa4b4f74753e54f71e61027e7222a…
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
---
This fix is no longer needed since commit c3cee924bd85 ("arm64: head:
cover entire kernel image in initial ID map"), which moved __idmap_kpti_flag
to .data, but that commit is currently only present in next.
arch/arm64/include/asm/kernel-pgtable.h | 4 ++--
arch/arm64/kernel/head.S | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 96dc0f7da258..a971d462f531 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -103,8 +103,8 @@
/*
* Initial memory map attributes.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
+#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PMD_SECT_UXN)
#if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 6a98f1a38c29..8a93a0a7489b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -285,7 +285,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
subs x1, x1, #64
b.ne 1b
- mov x7, SWAPPER_MM_MMUFLAGS
+ mov_q x7, SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
--
2.37.0.170.g444d1eabd0-goog
From: Nico Boehr <nrb(a)linux.ibm.com>
When the SIGP interpretation facility is present and a VCPU sends an
ecall to another VCPU in enabled wait, the sending VCPU receives a 56
intercept (partial execution), so KVM can wake up the receiving CPU.
Note that the SIGP interpretation facility will take care of the
interrupt delivery and KVM's only job is to wake the receiving VCPU.
For PV, the sending VCPU will receive a 108 intercept (pv notify) and
should continue like in the non-PV case, i.e. wake the receiving VCPU.
For PV and non-PV guests the interrupt delivery will occur through the
SIGP interpretation facility on SIE entry when SIE finds the X bit in
the status field set.
However, in handle_pv_notification(), there was no special handling for
SIGP, which leads to interrupt injection being requested by KVM for the
next SIE entry. This results in the interrupt being delivered twice:
once by the SIGP interpretation facility and once by KVM through the
IICTL.
Add the necessary special handling in handle_pv_notification(), similar
to handle_partial_execution(), which simply wakes the receiving VCPU and
leave interrupt delivery to the SIGP interpretation facility.
In contrast to external calls, emergency calls are not interpreted but
also cause a 108 intercept, which is why we still need to call
handle_instruction() for SIGP orders other than ecall.
Since kvm_s390_handle_sigp_pei() is now called for all SIGP orders which
cause a 108 intercept - even if they are actually handled by
handle_instruction() - move the tracepoint in kvm_s390_handle_sigp_pei()
to avoid possibly confusing trace messages.
Signed-off-by: Nico Boehr <nrb(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org> # 5.7
Fixes: da24a0cc58ed ("KVM: s390: protvirt: Instruction emulation")
Reviewed-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com>
Reviewed-by: Janosch Frank <frankja(a)linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20220718130434.73302-1-nrb@linux.ibm.com
Message-Id: <20220718130434.73302-1-nrb(a)linux.ibm.com>
Signed-off-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com>
---
arch/s390/kvm/intercept.c | 15 +++++++++++++++
arch/s390/kvm/sigp.c | 4 ++--
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8bd42a20d924..88112065d941 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
static int handle_pv_notification(struct kvm_vcpu *vcpu)
{
+ int ret;
+
if (vcpu->arch.sie_block->ipa == 0xb210)
return handle_pv_spx(vcpu);
if (vcpu->arch.sie_block->ipa == 0xb220)
return handle_pv_sclp(vcpu);
if (vcpu->arch.sie_block->ipa == 0xb9a4)
return handle_pv_uvc(vcpu);
+ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
+ /*
+ * Besides external call, other SIGP orders also cause a
+ * 108 (pv notify) intercept. In contrast to external call,
+ * these orders need to be emulated and hence the appropriate
+ * place to handle them is in handle_instruction().
+ * So first try kvm_s390_handle_sigp_pei() and if that isn't
+ * successful, go on with handle_instruction().
+ */
+ ret = kvm_s390_handle_sigp_pei(vcpu);
+ if (!ret)
+ return ret;
+ }
return handle_instruction(vcpu);
}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 8aaee2892ec3..cb747bf6c798 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
struct kvm_vcpu *dest_vcpu;
u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
- trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
-
if (order_code == SIGP_EXTERNAL_CALL) {
+ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
--
2.36.1
Tasks the are being deboosted from SCHED_DEADLINE might enter
enqueue_task_dl() one last time and hit an erroneous BUG_ON condition:
since they are not boosted anymore, the if (is_dl_boosted()) branch is
not taken, but the else if (!dl_prio) is and inside this one we
BUG_ON(!is_dl_boosted), which is of course false (BUG_ON triggered)
otherwise we had entered the if branch above. Long story short, the
current condition doesn't make sense and always leads to triggering of a
BUG.
Fix this by only checking enqueue flags, properly: ENQUEUE_REPLENISH has
to be present, but additional flags are not a problem.
Fixes: 64be6f1f5f71 ("sched/deadline: Don't replenish from a !SCHED_DEADLINE entity")
Cc: stable(a)vger.kernel.org
Signed-off-by: Juri Lelli <juri.lelli(a)redhat.com>
---
v1->v2
- Make detection of faulty condition less fatal [Peter Zijlstra]
- Cc stable and update fixes tag [Srivatsa S. Bhat]
---
kernel/sched/deadline.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5867e186c39a..0ab79d819a0d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1703,7 +1703,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* the throttle.
*/
p->dl.dl_throttled = 0;
- BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
+ if (!(flags & ENQUEUE_REPLENISH))
+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
+ task_pid_nr(p));
+
return;
}
--
2.36.1
Currently spectre_v2=ibrs forces write to MSR_IA32_SPEC_CTRL at every
entry and exit. On Enhanced IBRS parts setting MSR_IA32_SPEC_CTRL[IBRS]
only once at bootup is sufficient. MSR write at every kernel entry/exit
incur unnecessary penalty that can be avoided.
When Enhanced IBRS feature is present, switch from "ibrs" to "auto" mode
so that appropriate mitigation is selected.
Fixes: 7c693f54c873 ("x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS")
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
arch/x86/kernel/cpu/bugs.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0dd04713434b..7d7ebfdfbeda 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1303,6 +1303,12 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+ pr_err("%s selected but CPU supports Enhanced IBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
base-commit: 72a8e05d4f66b5af7854df4490e3135168694b6b
--
2.35.3
From: Kan Liang <kan.liang(a)linux.intel.com>
The fuzzer triggers the below trace.
[ 7763.384369] unchecked MSR access error: WRMSR to 0x689
(tried to write 0x1fffffff8101349e) at rIP: 0xffffffff810704a4
(native_write_msr+0x4/0x20)
[ 7763.397420] Call Trace:
[ 7763.399881] <TASK>
[ 7763.401994] intel_pmu_lbr_restore+0x9a/0x1f0
[ 7763.406363] intel_pmu_lbr_sched_task+0x91/0x1c0
[ 7763.410992] __perf_event_task_sched_in+0x1cd/0x240
On a machine with the LBR format LBR_FORMAT_EIP_FLAGS2, when the TSX is
disabled, a TSX quirk is required to access LBR from registers.
The lbr_from_signext_quirk_needed() is introduced to determine whether
the TSX quirk should be applied. However, the
lbr_from_signext_quirk_needed() is invoked before the
intel_pmu_lbr_init(), which parses the LBR format information. Without
the correct LBR format information, the TSX quirk never be applied.
Move the lbr_from_signext_quirk_needed() into the intel_pmu_lbr_init().
Checking x86_pmu.lbr_has_tsx in the lbr_from_signext_quirk_needed() is
not required anymore.
Both LBR_FORMAT_EIP_FLAGS2 and LBR_FORMAT_INFO have LBR_TSX flag, but
only the LBR_FORMAT_EIP_FLAGS2 requirs the quirk. Update the comments
accordingly.
Fixes: 1ac7fd8159a8 ("perf/x86/intel/lbr: Support LBR format V7")
Reported-by: Vince Weaver <vincent.weaver(a)maine.edu>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/events/intel/lbr.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 13179f31fe10..4f70fb6c2c1e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -278,9 +278,9 @@ enum {
};
/*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
@@ -288,7 +288,7 @@ enum {
*
* Therefore, if:
*
- * 1) LBR has TSX format
+ * 1) LBR format LBR_FORMAT_EIP_FLAGS2
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
@@ -300,7 +300,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && x86_pmu.lbr_has_tsx;
+ return !tsx_support;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -1609,9 +1609,6 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
- if (lbr_from_signext_quirk_needed())
- static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1702,7 +1699,11 @@ void intel_pmu_lbr_init(void)
switch (x86_pmu.intel_cap.lbr_format) {
case LBR_FORMAT_EIP_FLAGS2:
x86_pmu.lbr_has_tsx = 1;
- fallthrough;
+ x86_pmu.lbr_from_flags = 1;
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
+ break;
+
case LBR_FORMAT_EIP_FLAGS:
x86_pmu.lbr_from_flags = 1;
break;
--
2.35.1