--
Greetings,
I am contacting you based on the Investment/Loan opportunity for
companies in need of financing a project/business, We have developed a
new method of financing that doesn't take long to receive financing from
our clients.
If you are looking for funds to finance your project/Business or if
you are willing to work as our agent in your country to find clients in
need of financing and earn commissions, then get back to me for more
details.
Regards,
Ibrahim Tafa
ABIENCE INVESTMENT GROUP FZE, United Arab Emirates
--
Greetings,
I am contacting you based on the Investment/Loan opportunity for
companies in need of financing a project/business, We have developed a
new method of financing that doesn't take long to receive financing from
our clients.
If you are looking for funds to finance your project/Business or if
you are willing to work as our agent in your country to find clients in
need of financing and earn commissions, then get back to me for more
details.
Regards,
Ibrahim Tafa
ABIENCE INVESTMENT GROUP FZE, United Arab Emirates
From: Valentin Schneider <vschneid(a)redhat.com>
commit 05c6257433b7212f07a7e53479a8ab038fc1666a upstream.
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI
panic() doesn't work. The cause of that lies in the PREEMPT_RT definition
of mutex_trylock():
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
return 0;
This prevents an nmi_panic() from executing the main body of
__crash_kexec() which does the actual kexec into the kdump kernel. The
warning and return are explained by:
6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
[...]
The reasons for this are:
1) There is a potential deadlock in the slowpath
2) Another cpu which blocks on the rtmutex will boost the task
which allegedly locked the rtmutex, but that cannot work
because the hard/softirq context borrows the task context.
Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex
and replace it with an atomic variable. This is somewhat overzealous as
*some* callsites could keep using a mutex (e.g. the sysfs-facing ones
like crash_shrink_memory()), but this has the benefit of involving a
single unified lock and preventing any future NMI-related surprises.
Tested by triggering NMI panics via:
$ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
$ echo 1 > /proc/sys/kernel/unknown_nmi_panic
$ echo 1 > /proc/sys/kernel/panic
$ ipmitool power diag
Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com
Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
Signed-off-by: Valentin Schneider <vschneid(a)redhat.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: "Eric W . Biederman" <ebiederm(a)xmission.com>
Cc: Juri Lelli <jlelli(a)redhat.com>
Cc: Luis Claudio R. Goncalves <lgoncalv(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org # 5.15.x
Signed-off-by: Wen Yang <wenyang.linux(a)foxmail.com>
---
kernel/kexec.c | 11 ++++-------
kernel/kexec_core.c | 20 ++++++++++----------
kernel/kexec_file.c | 4 ++--
kernel/kexec_internal.h | 15 ++++++++++++++-
4 files changed, 30 insertions(+), 20 deletions(-)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index b5e40f069768..cb8e6e6f983c 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
/*
* Because we write directly to the reserved memory region when loading
- * crash kernels we need a mutex here to prevent multiple crash kernels
- * from attempting to load simultaneously, and to prevent a crash kernel
- * from loading over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
+ * crash kernels we need a serialization here to prevent multiple crash
+ * kernels from attempting to load simultaneously.
*/
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (flags & KEXEC_ON_CRASH) {
@@ -165,7 +162,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
kimage_free(image);
out_unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index a101d2b77936..bdc2d952911c 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -46,7 +46,7 @@
#include <crypto/hash.h>
#include "kexec_internal.h"
-DEFINE_MUTEX(kexec_mutex);
+atomic_t __kexec_lock = ATOMIC_INIT(0);
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -944,7 +944,7 @@ int kexec_load_disabled;
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
- /* Take the kexec_mutex here to prevent sys_kexec_load
+ /* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
@@ -952,7 +952,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
- if (mutex_trylock(&kexec_mutex)) {
+ if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
@@ -961,7 +961,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -993,13 +993,13 @@ ssize_t crash_get_memory_size(void)
{
ssize_t size = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return size;
}
@@ -1019,7 +1019,7 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (kexec_crash_image) {
@@ -1058,7 +1058,7 @@ int crash_shrink_memory(unsigned long new_size)
insert_resource(&iomem_resource, ram_res);
unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
@@ -1130,7 +1130,7 @@ int kernel_kexec(void)
{
int error = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (!kexec_image) {
error = -EINVAL;
@@ -1206,7 +1206,7 @@ int kernel_kexec(void)
#endif
Unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return error;
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index f7a4fd4d243f..1fb7ff690577 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -343,7 +343,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
image = NULL;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
dest_image = &kexec_image;
@@ -415,7 +415,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres();
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
kimage_free(image);
return ret;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 48aaf2ac0d0d..74da1409cd14 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image);
int kimage_is_destination_range(struct kimage *image,
unsigned long start, unsigned long end);
-extern struct mutex kexec_mutex;
+/*
+ * Whatever is used to serialize accesses to the kexec_crash_image needs to be
+ * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
+ * "simple" atomic variable that is acquired with a cmpxchg().
+ */
+extern atomic_t __kexec_lock;
+static inline bool kexec_trylock(void)
+{
+ return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
+}
+static inline void kexec_unlock(void)
+{
+ atomic_set_release(&__kexec_lock, 0);
+}
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
--
2.37.2
From: Valentin Schneider <vschneid(a)redhat.com>
commit 7bb5da0d490b2d836c5218f5186ee588d2145310 upstream.
Patch series "kexec, panic: Making crash_kexec() NMI safe", v4.
This patch (of 2):
Most acquistions of kexec_mutex are done via mutex_trylock() - those were
a direct "translation" from:
8c5a1cf0ad3a ("kexec: use a mutex for locking rather than xchg()")
there have however been two additions since then that use mutex_lock():
crash_get_memory_size() and crash_shrink_memory().
A later commit will replace said mutex with an atomic variable, and
locking operations will become atomic_cmpxchg(). Rather than having those
mutex_lock() become while (atomic_cmpxchg(&lock, 0, 1)), turn them into
trylocks that can return -EBUSY on acquisition failure.
This does halve the printable size of the crash kernel, but that's still
neighbouring 2G for 32bit kernels which should be ample enough.
Link: https://lkml.kernel.org/r/20220630223258.4144112-1-vschneid@redhat.com
Link: https://lkml.kernel.org/r/20220630223258.4144112-2-vschneid@redhat.com
Signed-off-by: Valentin Schneider <vschneid(a)redhat.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: "Eric W . Biederman" <ebiederm(a)xmission.com>
Cc: Juri Lelli <jlelli(a)redhat.com>
Cc: Luis Claudio R. Goncalves <lgoncalv(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Baoquan He <bhe(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: <stable(a)vger.kernel.org> # v5.15+
Signed-off-by: Wen Yang <wenyang.linux(a)foxmail.com>
---
include/linux/kexec.h | 2 +-
kernel/kexec_core.c | 12 ++++++++----
kernel/ksysfs.c | 7 ++++++-
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index cf042d41c87b..88c289ce3039 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -390,8 +390,8 @@ extern note_buf_t __percpu *crash_notes;
extern bool kexec_in_progress;
int crash_shrink_memory(unsigned long new_size);
-size_t crash_get_memory_size(void);
void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+ssize_t crash_get_memory_size(void);
void arch_kexec_protect_crashkres(void);
void arch_kexec_unprotect_crashkres(void);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 5a5d192a89ac..a101d2b77936 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -989,13 +989,16 @@ void crash_kexec(struct pt_regs *regs)
}
}
-size_t crash_get_memory_size(void)
+ssize_t crash_get_memory_size(void)
{
- size_t size = 0;
+ ssize_t size = 0;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
- mutex_lock(&kexec_mutex);
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
+
mutex_unlock(&kexec_mutex);
return size;
}
@@ -1016,7 +1019,8 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;
- mutex_lock(&kexec_mutex);
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
if (kexec_crash_image) {
ret = -ENOENT;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 35859da8bd4f..e20c19e3ba49 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -106,7 +106,12 @@ KERNEL_ATTR_RO(kexec_crash_loaded);
static ssize_t kexec_crash_size_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%zu\n", crash_get_memory_size());
+ ssize_t size = crash_get_memory_size();
+
+ if (size < 0)
+ return size;
+
+ return sprintf(buf, "%zd\n", size);
}
static ssize_t kexec_crash_size_store(struct kobject *kobj,
struct kobj_attribute *attr,
--
2.37.2
From: Valentin Schneider <vschneid(a)redhat.com>
commit 05c6257433b7212f07a7e53479a8ab038fc1666a upstream.
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI
panic() doesn't work. The cause of that lies in the PREEMPT_RT definition
of mutex_trylock():
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
return 0;
This prevents an nmi_panic() from executing the main body of
__crash_kexec() which does the actual kexec into the kdump kernel. The
warning and return are explained by:
6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
[...]
The reasons for this are:
1) There is a potential deadlock in the slowpath
2) Another cpu which blocks on the rtmutex will boost the task
which allegedly locked the rtmutex, but that cannot work
because the hard/softirq context borrows the task context.
Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex
and replace it with an atomic variable. This is somewhat overzealous as
*some* callsites could keep using a mutex (e.g. the sysfs-facing ones
like crash_shrink_memory()), but this has the benefit of involving a
single unified lock and preventing any future NMI-related surprises.
Tested by triggering NMI panics via:
$ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
$ echo 1 > /proc/sys/kernel/unknown_nmi_panic
$ echo 1 > /proc/sys/kernel/panic
$ ipmitool power diag
Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com
Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
Signed-off-by: Valentin Schneider <vschneid(a)redhat.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: "Eric W . Biederman" <ebiederm(a)xmission.com>
Cc: Juri Lelli <jlelli(a)redhat.com>
Cc: Luis Claudio R. Goncalves <lgoncalv(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Wen Yang <wenyang.linux(a)foxmail.com>
---
kernel/kexec.c | 11 ++++-------
kernel/kexec_core.c | 20 ++++++++++----------
kernel/kexec_file.c | 4 ++--
kernel/kexec_internal.h | 15 ++++++++++++++-
4 files changed, 30 insertions(+), 20 deletions(-)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9c7aef8f4bb6..f0f0c6555454 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -112,13 +112,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
/*
* Because we write directly to the reserved memory region when loading
- * crash kernels we need a mutex here to prevent multiple crash kernels
- * from attempting to load simultaneously, and to prevent a crash kernel
- * from loading over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
+ * crash kernels we need a serialization here to prevent multiple crash
+ * kernels from attempting to load simultaneously.
*/
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (flags & KEXEC_ON_CRASH) {
@@ -184,7 +181,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
kimage_free(image);
out_unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index e47870f30728..7a8104d48997 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -45,7 +45,7 @@
#include <crypto/sha.h>
#include "kexec_internal.h"
-DEFINE_MUTEX(kexec_mutex);
+atomic_t __kexec_lock = ATOMIC_INIT(0);
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -943,7 +943,7 @@ int kexec_load_disabled;
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
- /* Take the kexec_mutex here to prevent sys_kexec_load
+ /* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
@@ -951,7 +951,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
- if (mutex_trylock(&kexec_mutex)) {
+ if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
@@ -960,7 +960,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -993,13 +993,13 @@ ssize_t crash_get_memory_size(void)
{
ssize_t size = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return size;
}
@@ -1019,7 +1019,7 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (kexec_crash_image) {
@@ -1058,7 +1058,7 @@ int crash_shrink_memory(unsigned long new_size)
insert_resource(&iomem_resource, ram_res);
unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
@@ -1130,7 +1130,7 @@ int kernel_kexec(void)
{
int error = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (!kexec_image) {
error = -EINVAL;
@@ -1205,7 +1205,7 @@ int kernel_kexec(void)
#endif
Unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return error;
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index fff11916aba3..b9c857782ada 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -343,7 +343,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
image = NULL;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
dest_image = &kexec_image;
@@ -415,7 +415,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres();
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
kimage_free(image);
return ret;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 39d30ccf8d87..49d4e3ab9c96 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,7 +15,20 @@ int kimage_is_destination_range(struct kimage *image,
int machine_kexec_post_load(struct kimage *image);
-extern struct mutex kexec_mutex;
+/*
+ * Whatever is used to serialize accesses to the kexec_crash_image needs to be
+ * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
+ * "simple" atomic variable that is acquired with a cmpxchg().
+ */
+extern atomic_t __kexec_lock;
+static inline bool kexec_trylock(void)
+{
+ return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
+}
+static inline void kexec_unlock(void)
+{
+ atomic_set_release(&__kexec_lock, 0);
+}
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
--
2.37.2