The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 602cae04c4864bb3487dfe4c2126c8d9e7e1614a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Wed, 19 Dec 2018 17:53:50 +0100
Subject: [PATCH] perf/x86/intel: Delay memory deallocation until
x86_pmu_dead_cpu()
intel_pmu_cpu_prepare() allocated memory for ->shared_regs among other
members of struct cpu_hw_events. This memory is released in
intel_pmu_cpu_dying() which is wrong. The counterpart of the
intel_pmu_cpu_prepare() callback is x86_pmu_dead_cpu().
Otherwise if the CPU fails on the UP path between CPUHP_PERF_X86_PREPARE
and CPUHP_AP_PERF_X86_STARTING then it won't release the memory but
allocate new memory on the next attempt to online the CPU (leaking the
old memory).
Also, if the CPU down path fails between CPUHP_AP_PERF_X86_STARTING and
CPUHP_PERF_X86_PREPARE then the CPU will go back online but never
allocate the memory that was released in x86_pmu_dying_cpu().
Make the memory allocation/free symmetrical in regard to the CPU hotplug
notifier by moving the deallocation to intel_pmu_cpu_dead().
This started in commit:
a7e3ed1e47011 ("perf: Add support for supplementary event registers").
In principle the bug was introduced in v2.6.39 (!), but it will almost
certainly not backport cleanly across the big CPU hotplug rewrite between v4.7-v4.15...
[ bigeasy: Added patch description. ]
[ mingo: Added backporting guidance. ]
Reported-by: He Zhe <zhe.he(a)windriver.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> # With developer hat on
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> # With maintainer hat on
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Cc: Jiri Olsa <jolsa(a)redhat.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: acme(a)kernel.org
Cc: bp(a)alien8.de
Cc: hpa(a)zytor.com
Cc: jolsa(a)kernel.org
Cc: kan.liang(a)linux.intel.com
Cc: namhyung(a)kernel.org
Cc: <stable(a)vger.kernel.org>
Fixes: a7e3ed1e47011 ("perf: Add support for supplementary event registers").
Link: https://lkml.kernel.org/r/20181219165350.6s3jvyxbibpvlhtq@linutronix.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f6..daafb893449b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
}
static void intel_pmu_cpu_dying(int cpu)
+{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
}
free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
};
static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 602cae04c4864bb3487dfe4c2126c8d9e7e1614a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Wed, 19 Dec 2018 17:53:50 +0100
Subject: [PATCH] perf/x86/intel: Delay memory deallocation until
x86_pmu_dead_cpu()
intel_pmu_cpu_prepare() allocated memory for ->shared_regs among other
members of struct cpu_hw_events. This memory is released in
intel_pmu_cpu_dying() which is wrong. The counterpart of the
intel_pmu_cpu_prepare() callback is x86_pmu_dead_cpu().
Otherwise if the CPU fails on the UP path between CPUHP_PERF_X86_PREPARE
and CPUHP_AP_PERF_X86_STARTING then it won't release the memory but
allocate new memory on the next attempt to online the CPU (leaking the
old memory).
Also, if the CPU down path fails between CPUHP_AP_PERF_X86_STARTING and
CPUHP_PERF_X86_PREPARE then the CPU will go back online but never
allocate the memory that was released in x86_pmu_dying_cpu().
Make the memory allocation/free symmetrical in regard to the CPU hotplug
notifier by moving the deallocation to intel_pmu_cpu_dead().
This started in commit:
a7e3ed1e47011 ("perf: Add support for supplementary event registers").
In principle the bug was introduced in v2.6.39 (!), but it will almost
certainly not backport cleanly across the big CPU hotplug rewrite between v4.7-v4.15...
[ bigeasy: Added patch description. ]
[ mingo: Added backporting guidance. ]
Reported-by: He Zhe <zhe.he(a)windriver.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> # With developer hat on
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> # With maintainer hat on
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Cc: Jiri Olsa <jolsa(a)redhat.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: acme(a)kernel.org
Cc: bp(a)alien8.de
Cc: hpa(a)zytor.com
Cc: jolsa(a)kernel.org
Cc: kan.liang(a)linux.intel.com
Cc: namhyung(a)kernel.org
Cc: <stable(a)vger.kernel.org>
Fixes: a7e3ed1e47011 ("perf: Add support for supplementary event registers").
Link: https://lkml.kernel.org/r/20181219165350.6s3jvyxbibpvlhtq@linutronix.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f6..daafb893449b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
}
static void intel_pmu_cpu_dying(int cpu)
+{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
}
free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
};
static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 602cae04c4864bb3487dfe4c2126c8d9e7e1614a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Wed, 19 Dec 2018 17:53:50 +0100
Subject: [PATCH] perf/x86/intel: Delay memory deallocation until
x86_pmu_dead_cpu()
intel_pmu_cpu_prepare() allocated memory for ->shared_regs among other
members of struct cpu_hw_events. This memory is released in
intel_pmu_cpu_dying() which is wrong. The counterpart of the
intel_pmu_cpu_prepare() callback is x86_pmu_dead_cpu().
Otherwise if the CPU fails on the UP path between CPUHP_PERF_X86_PREPARE
and CPUHP_AP_PERF_X86_STARTING then it won't release the memory but
allocate new memory on the next attempt to online the CPU (leaking the
old memory).
Also, if the CPU down path fails between CPUHP_AP_PERF_X86_STARTING and
CPUHP_PERF_X86_PREPARE then the CPU will go back online but never
allocate the memory that was released in x86_pmu_dying_cpu().
Make the memory allocation/free symmetrical in regard to the CPU hotplug
notifier by moving the deallocation to intel_pmu_cpu_dead().
This started in commit:
a7e3ed1e47011 ("perf: Add support for supplementary event registers").
In principle the bug was introduced in v2.6.39 (!), but it will almost
certainly not backport cleanly across the big CPU hotplug rewrite between v4.7-v4.15...
[ bigeasy: Added patch description. ]
[ mingo: Added backporting guidance. ]
Reported-by: He Zhe <zhe.he(a)windriver.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> # With developer hat on
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> # With maintainer hat on
Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme(a)redhat.com>
Cc: Jiri Olsa <jolsa(a)redhat.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: acme(a)kernel.org
Cc: bp(a)alien8.de
Cc: hpa(a)zytor.com
Cc: jolsa(a)kernel.org
Cc: kan.liang(a)linux.intel.com
Cc: namhyung(a)kernel.org
Cc: <stable(a)vger.kernel.org>
Fixes: a7e3ed1e47011 ("perf: Add support for supplementary event registers").
Link: https://lkml.kernel.org/r/20181219165350.6s3jvyxbibpvlhtq@linutronix.de
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f6..daafb893449b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
}
static void intel_pmu_cpu_dying(int cpu)
+{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
}
free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
};
static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b284909abad48b07d3071a9fc9b5692b3e64914b Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe(a)redhat.com>
Date: Wed, 30 Jan 2019 07:13:58 -0600
Subject: [PATCH] cpu/hotplug: Fix "SMT disabled by BIOS" detection for KVM
With the following commit:
73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
... the hotplug code attempted to detect when SMT was disabled by BIOS,
in which case it reported SMT as permanently disabled. However, that
code broke a virt hotplug scenario, where the guest is booted with only
primary CPU threads, and a sibling is brought online later.
The problem is that there doesn't seem to be a way to reliably
distinguish between the HW "SMT disabled by BIOS" case and the virt
"sibling not yet brought online" case. So the above-mentioned commit
was a bit misguided, as it permanently disabled SMT for both cases,
preventing future virt sibling hotplugs.
Going back and reviewing the original problems which were attempted to
be solved by that commit, when SMT was disabled in BIOS:
1) /sys/devices/system/cpu/smt/control showed "on" instead of
"notsupported"; and
2) vmx_vm_init() was incorrectly showing the L1TF_MSG_SMT warning.
I'd propose that we instead consider #1 above to not actually be a
problem. Because, at least in the virt case, it's possible that SMT
wasn't disabled by BIOS and a sibling thread could be brought online
later. So it makes sense to just always default the smt control to "on"
to allow for that possibility (assuming cpuid indicates that the CPU
supports SMT).
The real problem is #2, which has a simple fix: change vmx_vm_init() to
query the actual current SMT state -- i.e., whether any siblings are
currently online -- instead of looking at the SMT "control" sysfs value.
So fix it by:
a) reverting the original "fix" and its followup fix:
73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
bc2d8d262cba ("cpu/hotplug: Fix SMT supported evaluation")
and
b) changing vmx_vm_init() to query the actual current SMT state --
instead of the sysfs control value -- to determine whether the L1TF
warning is needed. This also requires the 'sched_smt_present'
variable to exported, instead of 'cpu_smt_control'.
Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
Reported-by: Igor Mammedov <imammedo(a)redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe(a)redhat.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Joe Mario <jmario(a)redhat.com>
Cc: Jiri Kosina <jikos(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: kvm(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/e3a85d585da28cc333ecbc1e78ee9216e6da9396.15487943…
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1de0f4170178..01874d54f4fd 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -71,7 +71,7 @@ void __init check_bugs(void)
* identify_boot_cpu() initialized SMT support information, let the
* core code know.
*/
- cpu_smt_check_topology_early();
+ cpu_smt_check_topology();
if (!IS_ENABLED(CONFIG_SMP)) {
pr_info("CPU: ");
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4341175339f3..95d618045001 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -26,6 +26,7 @@
#include <linux/mod_devicetable.h>
#include <linux/mm.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/trace_events.h>
@@ -6823,7 +6824,7 @@ static int vmx_vm_init(struct kvm *kvm)
* Warn upon starting the first VM in a potentially
* insecure environment.
*/
- if (cpu_smt_control == CPU_SMT_ENABLED)
+ if (sched_smt_active())
pr_warn_once(L1TF_MSG_SMT);
if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
pr_warn_once(L1TF_MSG_L1D);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 218df7f4d3e1..5041357d0297 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -180,12 +180,10 @@ enum cpuhp_smt_control {
#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
extern enum cpuhp_smt_control cpu_smt_control;
extern void cpu_smt_disable(bool force);
-extern void cpu_smt_check_topology_early(void);
extern void cpu_smt_check_topology(void);
#else
# define cpu_smt_control (CPU_SMT_ENABLED)
static inline void cpu_smt_disable(bool force) { }
-static inline void cpu_smt_check_topology_early(void) { }
static inline void cpu_smt_check_topology(void) { }
#endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index c0c7f64573ed..d1c6d152da89 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { }
#ifdef CONFIG_HOTPLUG_SMT
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
-EXPORT_SYMBOL_GPL(cpu_smt_control);
-
-static bool cpu_smt_available __read_mostly;
void __init cpu_smt_disable(bool force)
{
@@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force)
/*
* The decision whether SMT is supported can only be done after the full
- * CPU identification. Called from architecture code before non boot CPUs
- * are brought up.
- */
-void __init cpu_smt_check_topology_early(void)
-{
- if (!topology_smt_supported())
- cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-}
-
-/*
- * If SMT was disabled by BIOS, detect it here, after the CPUs have been
- * brought online. This ensures the smt/l1tf sysfs entries are consistent
- * with reality. cpu_smt_available is set to true during the bringup of non
- * boot CPUs when a SMT sibling is detected. Note, this may overwrite
- * cpu_smt_control's previous setting.
+ * CPU identification. Called from architecture code.
*/
void __init cpu_smt_check_topology(void)
{
- if (!cpu_smt_available)
+ if (!topology_smt_supported())
cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
}
@@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable);
static inline bool cpu_smt_allowed(unsigned int cpu)
{
- if (topology_is_primary_thread(cpu))
+ if (cpu_smt_control == CPU_SMT_ENABLED)
return true;
- /*
- * If the CPU is not a 'primary' thread and the booted_once bit is
- * set then the processor has SMT support. Store this information
- * for the late check of SMT support in cpu_smt_check_topology().
- */
- if (per_cpu(cpuhp_state, cpu).booted_once)
- cpu_smt_available = true;
-
- if (cpu_smt_control == CPU_SMT_ENABLED)
+ if (topology_is_primary_thread(cpu))
return true;
/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50aa2aba69bd..310d0637fe4b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
#ifdef CONFIG_SCHED_SMT
DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+EXPORT_SYMBOL_GPL(sched_smt_present);
static inline void set_idle_cores(int cpu, int val)
{
diff --git a/kernel/smp.c b/kernel/smp.c
index 163c451af42e..f4cf1b0bb3b8 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -584,8 +584,6 @@ void __init smp_init(void)
num_nodes, (num_nodes > 1 ? "s" : ""),
num_cpus, (num_cpus > 1 ? "s" : ""));
- /* Final decision about SMT support */
- cpu_smt_check_topology();
/* Any cleanup work */
smp_cpus_done(setup_max_cpus);
}
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From cfa39381173d5f969daf43582c95ad679189cbc9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Sat, 26 Jan 2019 01:54:33 +0100
Subject: [PATCH] kvm: fix kvm_ioctl_create_device() reference counting
(CVE-2019-6974)
kvm_ioctl_create_device() does the following:
1. creates a device that holds a reference to the VM object (with a borrowed
reference, the VM's refcount has not been bumped yet)
2. initializes the device
3. transfers the reference to the device to the caller's file descriptor table
4. calls kvm_get_kvm() to turn the borrowed reference to the VM into a real
reference
The ownership transfer in step 3 must not happen before the reference to the VM
becomes a proper, non-borrowed reference, which only happens in step 4.
After step 3, an attacker can close the file descriptor and drop the borrowed
reference, which can cause the refcount of the kvm object to drop to zero.
This means that we need to grab a reference for the device before
anon_inode_getfd(), otherwise the VM can disappear from under us.
Fixes: 852b6d57dc7f ("kvm: add device control API")
Cc: stable(a)kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5ecea812cb6a..585845203db8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3000,8 +3000,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
if (ops->init)
ops->init(dev);
+ kvm_get_kvm(kvm);
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
+ kvm_put_kvm(kvm);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
mutex_unlock(&kvm->lock);
@@ -3009,7 +3011,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
return ret;
}
- kvm_get_kvm(kvm);
cd->fd = ret;
return 0;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From cfa39381173d5f969daf43582c95ad679189cbc9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Sat, 26 Jan 2019 01:54:33 +0100
Subject: [PATCH] kvm: fix kvm_ioctl_create_device() reference counting
(CVE-2019-6974)
kvm_ioctl_create_device() does the following:
1. creates a device that holds a reference to the VM object (with a borrowed
reference, the VM's refcount has not been bumped yet)
2. initializes the device
3. transfers the reference to the device to the caller's file descriptor table
4. calls kvm_get_kvm() to turn the borrowed reference to the VM into a real
reference
The ownership transfer in step 3 must not happen before the reference to the VM
becomes a proper, non-borrowed reference, which only happens in step 4.
After step 3, an attacker can close the file descriptor and drop the borrowed
reference, which can cause the refcount of the kvm object to drop to zero.
This means that we need to grab a reference for the device before
anon_inode_getfd(), otherwise the VM can disappear from under us.
Fixes: 852b6d57dc7f ("kvm: add device control API")
Cc: stable(a)kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5ecea812cb6a..585845203db8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3000,8 +3000,10 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
if (ops->init)
ops->init(dev);
+ kvm_get_kvm(kvm);
ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
+ kvm_put_kvm(kvm);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
mutex_unlock(&kvm->lock);
@@ -3009,7 +3011,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
return ret;
}
- kvm_get_kvm(kvm);
cd->fd = ret;
return 0;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From bb61b843ffd46978d7ca5095453e572714934eeb Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav(a)linux.ibm.com>
Date: Wed, 30 Jan 2019 17:56:51 +0530
Subject: [PATCH] scsi: cxlflash: Prevent deadlock when adapter probe fails
Presently when an error is encountered during probe of the cxlflash
adapter, a deadlock is seen with cpu thread stuck inside
cxlflash_remove(). Below is the trace of the deadlock as logged by
khungtaskd:
cxlflash 0006:00:00.0: cxlflash_probe: init_afu failed rc=-16
INFO: task kworker/80:1:890 blocked for more than 120 seconds.
Not tainted 5.0.0-rc4-capi2-kexec+ #2
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/80:1 D 0 890 2 0x00000808
Workqueue: events work_for_cpu_fn
Call Trace:
0x4d72136320 (unreliable)
__switch_to+0x2cc/0x460
__schedule+0x2bc/0xac0
schedule+0x40/0xb0
cxlflash_remove+0xec/0x640 [cxlflash]
cxlflash_probe+0x370/0x8f0 [cxlflash]
local_pci_probe+0x6c/0x140
work_for_cpu_fn+0x38/0x60
process_one_work+0x260/0x530
worker_thread+0x280/0x5d0
kthread+0x1a8/0x1b0
ret_from_kernel_thread+0x5c/0x80
INFO: task systemd-udevd:5160 blocked for more than 120 seconds.
The deadlock occurs as cxlflash_remove() is called from cxlflash_probe()
without setting 'cxlflash_cfg->state' to STATE_PROBED and the probe thread
starts to wait on 'cxlflash_cfg->reset_waitq'. Since the device was never
successfully probed the 'cxlflash_cfg->state' never changes from
STATE_PROBING hence the deadlock occurs.
We fix this deadlock by setting the variable 'cxlflash_cfg->state' to
STATE_PROBED in case an error occurs during cxlflash_probe() and just
before calling cxlflash_remove().
Cc: stable(a)vger.kernel.org
Fixes: c21e0bbfc485("cxlflash: Base support for IBM CXL Flash Adapter")
Signed-off-by: Vaibhav Jain <vaibhav(a)linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index bfa13e3b191c..c8bad2c093b8 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3687,6 +3687,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
host->max_cmd_len = CXLFLASH_MAX_CDB_LEN;
cfg = shost_priv(host);
+ cfg->state = STATE_PROBING;
cfg->host = host;
rc = alloc_mem(cfg);
if (rc) {
@@ -3775,6 +3776,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
return rc;
out_remove:
+ cfg->state = STATE_PROBED;
cxlflash_remove(pdev);
goto out;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From bb61b843ffd46978d7ca5095453e572714934eeb Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav(a)linux.ibm.com>
Date: Wed, 30 Jan 2019 17:56:51 +0530
Subject: [PATCH] scsi: cxlflash: Prevent deadlock when adapter probe fails
Presently when an error is encountered during probe of the cxlflash
adapter, a deadlock is seen with cpu thread stuck inside
cxlflash_remove(). Below is the trace of the deadlock as logged by
khungtaskd:
cxlflash 0006:00:00.0: cxlflash_probe: init_afu failed rc=-16
INFO: task kworker/80:1:890 blocked for more than 120 seconds.
Not tainted 5.0.0-rc4-capi2-kexec+ #2
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/80:1 D 0 890 2 0x00000808
Workqueue: events work_for_cpu_fn
Call Trace:
0x4d72136320 (unreliable)
__switch_to+0x2cc/0x460
__schedule+0x2bc/0xac0
schedule+0x40/0xb0
cxlflash_remove+0xec/0x640 [cxlflash]
cxlflash_probe+0x370/0x8f0 [cxlflash]
local_pci_probe+0x6c/0x140
work_for_cpu_fn+0x38/0x60
process_one_work+0x260/0x530
worker_thread+0x280/0x5d0
kthread+0x1a8/0x1b0
ret_from_kernel_thread+0x5c/0x80
INFO: task systemd-udevd:5160 blocked for more than 120 seconds.
The deadlock occurs as cxlflash_remove() is called from cxlflash_probe()
without setting 'cxlflash_cfg->state' to STATE_PROBED and the probe thread
starts to wait on 'cxlflash_cfg->reset_waitq'. Since the device was never
successfully probed the 'cxlflash_cfg->state' never changes from
STATE_PROBING hence the deadlock occurs.
We fix this deadlock by setting the variable 'cxlflash_cfg->state' to
STATE_PROBED in case an error occurs during cxlflash_probe() and just
before calling cxlflash_remove().
Cc: stable(a)vger.kernel.org
Fixes: c21e0bbfc485("cxlflash: Base support for IBM CXL Flash Adapter")
Signed-off-by: Vaibhav Jain <vaibhav(a)linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index bfa13e3b191c..c8bad2c093b8 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3687,6 +3687,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
host->max_cmd_len = CXLFLASH_MAX_CDB_LEN;
cfg = shost_priv(host);
+ cfg->state = STATE_PROBING;
cfg->host = host;
rc = alloc_mem(cfg);
if (rc) {
@@ -3775,6 +3776,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
return rc;
out_remove:
+ cfg->state = STATE_PROBED;
cxlflash_remove(pdev);
goto out;
}
The patch below does not apply to the 3.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From c418fd6c01fbc5516a2cd1eaf1df1ec86869028a Mon Sep 17 00:00:00 2001
From: Paul Elder <paul.elder(a)ideasonboard.com>
Date: Wed, 30 Jan 2019 08:13:21 -0600
Subject: [PATCH] usb: gadget: musb: fix short isoc packets with inventra dma
Handling short packets (length < max packet size) in the Inventra DMA
engine in the MUSB driver causes the MUSB DMA controller to hang. An
example of a problem that is caused by this problem is when streaming
video out of a UVC gadget, only the first video frame is transferred.
For short packets (mode-0 or mode-1 DMA), MUSB_TXCSR_TXPKTRDY must be
set manually by the driver. This was previously done in musb_g_tx
(musb_gadget.c), but incorrectly (all csr flags were cleared, and only
MUSB_TXCSR_MODE and MUSB_TXCSR_TXPKTRDY were set). Fixing that problem
allows some requests to be transferred correctly, but multiple requests
were often put together in one USB packet, and caused problems if the
packet size was not a multiple of 4. Instead, set MUSB_TXCSR_TXPKTRDY
in dma_controller_irq (musbhsdma.c), just like host mode transfers.
This topic was originally tackled by Nicolas Boichat [0] [1] and is
discussed further at [2] as part of his GSoC project [3].
[0] https://groups.google.com/forum/?hl=en#!topic/beagleboard-gsoc/k8Azwfp75CU
[1] https://gitorious.org/beagleboard-usbsniffer/beagleboard-usbsniffer-kernel/…
[2] http://beagleboard-usbsniffer.blogspot.com/2010/07/musb-isochronous-transfe…
[3] http://elinux.org/BeagleBoard/GSoC/USBSniffer
Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support")
Signed-off-by: Paul Elder <paul.elder(a)ideasonboard.com>
Signed-off-by: Bin Liu <b-liu(a)ti.com>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index eae8b1b1b45b..ffe462a657b1 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -452,13 +452,10 @@ void musb_g_tx(struct musb *musb, u8 epnum)
}
if (request) {
- u8 is_dma = 0;
- bool short_packet = false;
trace_musb_req_tx(req);
if (dma && (csr & MUSB_TXCSR_DMAENAB)) {
- is_dma = 1;
csr |= MUSB_TXCSR_P_WZC_BITS;
csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN |
MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET);
@@ -476,16 +473,8 @@ void musb_g_tx(struct musb *musb, u8 epnum)
*/
if ((request->zero && request->length)
&& (request->length % musb_ep->packet_sz == 0)
- && (request->actual == request->length))
- short_packet = true;
+ && (request->actual == request->length)) {
- if ((musb_dma_inventra(musb) || musb_dma_ux500(musb)) &&
- (is_dma && (!dma->desired_mode ||
- (request->actual &
- (musb_ep->packet_sz - 1)))))
- short_packet = true;
-
- if (short_packet) {
/*
* On DMA completion, FIFO may not be
* available yet...
diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c
index a688f7f87829..5fc6825745f2 100644
--- a/drivers/usb/musb/musbhsdma.c
+++ b/drivers/usb/musb/musbhsdma.c
@@ -346,12 +346,10 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
channel->status = MUSB_DMA_STATUS_FREE;
/* completed */
- if ((devctl & MUSB_DEVCTL_HM)
- && (musb_channel->transmit)
- && ((channel->desired_mode == 0)
- || (channel->actual_len &
- (musb_channel->max_packet_sz - 1)))
- ) {
+ if (musb_channel->transmit &&
+ (!channel->desired_mode ||
+ (channel->actual_len %
+ musb_channel->max_packet_sz))) {
u8 epnum = musb_channel->epnum;
int offset = musb->io.ep_offset(epnum,
MUSB_TXCSR);
@@ -363,11 +361,14 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
*/
musb_ep_select(mbase, epnum);
txcsr = musb_readw(mbase, offset);
- txcsr &= ~(MUSB_TXCSR_DMAENAB
+ if (channel->desired_mode == 1) {
+ txcsr &= ~(MUSB_TXCSR_DMAENAB
| MUSB_TXCSR_AUTOSET);
- musb_writew(mbase, offset, txcsr);
- /* Send out the packet */
- txcsr &= ~MUSB_TXCSR_DMAMODE;
+ musb_writew(mbase, offset, txcsr);
+ /* Send out the packet */
+ txcsr &= ~MUSB_TXCSR_DMAMODE;
+ txcsr |= MUSB_TXCSR_DMAENAB;
+ }
txcsr |= MUSB_TXCSR_TXPKTRDY;
musb_writew(mbase, offset, txcsr);
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From c418fd6c01fbc5516a2cd1eaf1df1ec86869028a Mon Sep 17 00:00:00 2001
From: Paul Elder <paul.elder(a)ideasonboard.com>
Date: Wed, 30 Jan 2019 08:13:21 -0600
Subject: [PATCH] usb: gadget: musb: fix short isoc packets with inventra dma
Handling short packets (length < max packet size) in the Inventra DMA
engine in the MUSB driver causes the MUSB DMA controller to hang. An
example of a problem that is caused by this problem is when streaming
video out of a UVC gadget, only the first video frame is transferred.
For short packets (mode-0 or mode-1 DMA), MUSB_TXCSR_TXPKTRDY must be
set manually by the driver. This was previously done in musb_g_tx
(musb_gadget.c), but incorrectly (all csr flags were cleared, and only
MUSB_TXCSR_MODE and MUSB_TXCSR_TXPKTRDY were set). Fixing that problem
allows some requests to be transferred correctly, but multiple requests
were often put together in one USB packet, and caused problems if the
packet size was not a multiple of 4. Instead, set MUSB_TXCSR_TXPKTRDY
in dma_controller_irq (musbhsdma.c), just like host mode transfers.
This topic was originally tackled by Nicolas Boichat [0] [1] and is
discussed further at [2] as part of his GSoC project [3].
[0] https://groups.google.com/forum/?hl=en#!topic/beagleboard-gsoc/k8Azwfp75CU
[1] https://gitorious.org/beagleboard-usbsniffer/beagleboard-usbsniffer-kernel/…
[2] http://beagleboard-usbsniffer.blogspot.com/2010/07/musb-isochronous-transfe…
[3] http://elinux.org/BeagleBoard/GSoC/USBSniffer
Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support")
Signed-off-by: Paul Elder <paul.elder(a)ideasonboard.com>
Signed-off-by: Bin Liu <b-liu(a)ti.com>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index eae8b1b1b45b..ffe462a657b1 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -452,13 +452,10 @@ void musb_g_tx(struct musb *musb, u8 epnum)
}
if (request) {
- u8 is_dma = 0;
- bool short_packet = false;
trace_musb_req_tx(req);
if (dma && (csr & MUSB_TXCSR_DMAENAB)) {
- is_dma = 1;
csr |= MUSB_TXCSR_P_WZC_BITS;
csr &= ~(MUSB_TXCSR_DMAENAB | MUSB_TXCSR_P_UNDERRUN |
MUSB_TXCSR_TXPKTRDY | MUSB_TXCSR_AUTOSET);
@@ -476,16 +473,8 @@ void musb_g_tx(struct musb *musb, u8 epnum)
*/
if ((request->zero && request->length)
&& (request->length % musb_ep->packet_sz == 0)
- && (request->actual == request->length))
- short_packet = true;
+ && (request->actual == request->length)) {
- if ((musb_dma_inventra(musb) || musb_dma_ux500(musb)) &&
- (is_dma && (!dma->desired_mode ||
- (request->actual &
- (musb_ep->packet_sz - 1)))))
- short_packet = true;
-
- if (short_packet) {
/*
* On DMA completion, FIFO may not be
* available yet...
diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c
index a688f7f87829..5fc6825745f2 100644
--- a/drivers/usb/musb/musbhsdma.c
+++ b/drivers/usb/musb/musbhsdma.c
@@ -346,12 +346,10 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
channel->status = MUSB_DMA_STATUS_FREE;
/* completed */
- if ((devctl & MUSB_DEVCTL_HM)
- && (musb_channel->transmit)
- && ((channel->desired_mode == 0)
- || (channel->actual_len &
- (musb_channel->max_packet_sz - 1)))
- ) {
+ if (musb_channel->transmit &&
+ (!channel->desired_mode ||
+ (channel->actual_len %
+ musb_channel->max_packet_sz))) {
u8 epnum = musb_channel->epnum;
int offset = musb->io.ep_offset(epnum,
MUSB_TXCSR);
@@ -363,11 +361,14 @@ static irqreturn_t dma_controller_irq(int irq, void *private_data)
*/
musb_ep_select(mbase, epnum);
txcsr = musb_readw(mbase, offset);
- txcsr &= ~(MUSB_TXCSR_DMAENAB
+ if (channel->desired_mode == 1) {
+ txcsr &= ~(MUSB_TXCSR_DMAENAB
| MUSB_TXCSR_AUTOSET);
- musb_writew(mbase, offset, txcsr);
- /* Send out the packet */
- txcsr &= ~MUSB_TXCSR_DMAMODE;
+ musb_writew(mbase, offset, txcsr);
+ /* Send out the packet */
+ txcsr &= ~MUSB_TXCSR_DMAMODE;
+ txcsr |= MUSB_TXCSR_DMAENAB;
+ }
txcsr |= MUSB_TXCSR_TXPKTRDY;
musb_writew(mbase, offset, txcsr);
}