Good day,
My name is Luis Fernandez.I would like to discuss something
important that will benefit both of us. I will send you more
details upon your response
Regards
Luis Fernandez
commit fad7cd3310db ("nbd: add the check to prevent overflow in
__nbd_ioctl()") raised an issue from the fallback helpers added in
commit f0907827a8a9 ("compiler.h: enable builtin overflow checkers and
add fallback code")
ERROR: modpost: "__divdi3" [drivers/block/nbd.ko] undefined!
As Stephen Rothwell notes:
The added check_mul_overflow() call is being passed 64 bit values.
COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW is not set for this build (see
include/linux/overflow.h).
Specifically, the helpers for checking whether the results of a
multiplication overflowed (__unsigned_mul_overflow,
__signed_add_overflow) use the division operator when
!COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW. This is problematic for 64b
operands on 32b hosts.
This was fixed upstream by
commit 76ae847497bc ("Documentation: raise minimum supported version of
GCC to 5.1")
which is not suitable to be backported to stable.
Further, __builtin_mul_overflow() would emit a libcall to a
compiler-rt-only symbol when compiling with clang < 14 for 32b targets.
ld.lld: error: undefined symbol: __mulodi4
In order to keep stable buildable with GCC 4.9 and clang < 14, modify
struct nbd_config to instead track the number of bits of the block size;
reconstructing the block size using runtime checked shifts that are not
problematic for those compilers and in a ways that can be backported to
stable.
In nbd_set_size, we do validate that the value of blksize must be a
power of two (POT) and is in the range of [512, PAGE_SIZE] (both
inclusive).
This does modify the debugfs interface.
Cc: stable(a)vger.kernel.org
Cc: Arnd Bergmann <arnd(a)kernel.org>
Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Link: https://github.com/ClangBuiltLinux/linux/issues/1438
Link: https://lore.kernel.org/all/20210909182525.372ee687@canb.auug.org.au/
Link: https://lore.kernel.org/stable/CAHk-=whiQBofgis_rkniz8GBP9wZtSZdcDEffgSLO62…
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Reported-by: Nathan Chancellor <nathan(a)kernel.org>
Reported-by: Stephen Rothwell <sfr(a)canb.auug.org.au>
Suggested-by: Kees Cook <keescook(a)chromium.org>
Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Suggested-by: Pavel Machek <pavel(a)ucw.cz>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
This patch is kind of a v3 for solving this problem.
v2: https://lore.kernel.org/stable/20210914002318.2298583-1-ndesaulniers@google…
v1: https://lore.kernel.org/stable/20210913203201.1844253-1-ndesaulniers@google…
But is quite different in approach. Instead of trying to fix up the
overflow routines in stable, we amend the code in nbd.c as per Linus
(against mainline) with fixes from Kees to use the named overflow
checker.
drivers/block/nbd.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5170a630778d..1183f7872b71 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -97,13 +97,18 @@ struct nbd_config {
atomic_t recv_threads;
wait_queue_head_t recv_wq;
- loff_t blksize;
+ unsigned int blksize_bits;
loff_t bytesize;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbg_dir;
#endif
};
+static inline unsigned int nbd_blksize(struct nbd_config *config)
+{
+ return 1u << config->blksize_bits;
+}
+
struct nbd_device {
struct blk_mq_tag_set tag_set;
@@ -146,7 +151,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
-#define NBD_DEF_BLKSIZE 1024
+#define NBD_DEF_BLKSIZE_BITS 10
static unsigned int nbds_max = 16;
static int max_part = 16;
@@ -317,12 +322,12 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
loff_t blksize)
{
if (!blksize)
- blksize = NBD_DEF_BLKSIZE;
+ blksize = 1u << NBD_DEF_BLKSIZE_BITS;
if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
return -EINVAL;
nbd->config->bytesize = bytesize;
- nbd->config->blksize = blksize;
+ nbd->config->blksize_bits = __ffs(blksize);
if (!nbd->task_recv)
return 0;
@@ -1337,7 +1342,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
- return nbd_set_size(nbd, config->bytesize, config->blksize);
+ return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
@@ -1406,11 +1411,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_BLKSIZE:
return nbd_set_size(nbd, config->bytesize, arg);
case NBD_SET_SIZE:
- return nbd_set_size(nbd, arg, config->blksize);
+ return nbd_set_size(nbd, arg, nbd_blksize(config));
case NBD_SET_SIZE_BLOCKS:
- if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
+ if (check_shl_overflow(arg, config->blksize_bits, &bytesize))
return -EINVAL;
- return nbd_set_size(nbd, bytesize, config->blksize);
+ return nbd_set_size(nbd, bytesize, nbd_blksize(config));
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
@@ -1476,7 +1481,7 @@ static struct nbd_config *nbd_alloc_config(void)
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
- config->blksize = NBD_DEF_BLKSIZE;
+ config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
try_module_get(THIS_MODULE);
return config;
@@ -1604,7 +1609,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
- debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
+ debugfs_create_u32("blocksize_bits", 0444, dir, &config->blksize_bits);
debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
return 0;
@@ -1826,7 +1831,7 @@ nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
- u64 bsize = config->blksize;
+ u64 bsize = nbd_blksize(config);
u64 bytes = config->bytesize;
if (info->attrs[NBD_ATTR_SIZE_BYTES])
@@ -1835,7 +1840,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
- if (bytes != config->bytesize || bsize != config->blksize)
+ if (bytes != config->bytesize || bsize != nbd_blksize(config))
return nbd_set_size(nbd, bytes, bsize);
return 0;
}
base-commit: 4c17ca27923c16fd73bbb9ad033c7d749c3bcfcc
--
2.33.0.464.g1972c5931b-goog
From: Joerg Roedel <jroedel(a)suse.de>
The trampoline_pgd only maps the 0xfffffff000000000-0xffffffffffffffff
range of kernel memory (with 4-level paging). This range contains the
kernels text+data+bss mappings and the module mapping space, but not the
direct mapping and the vmalloc area.
This is enough to get an application processors out of real-mode, but
for code that switches back to real-mode the trampoline_pgd is missing
important parts of the address space. For example, consider this code
from arch/x86/kernel/reboot.c, function machine_real_restart() for a
64-bit kernel:
#ifdef CONFIG_X86_32
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
#ifdef CONFIG_X86_32
asm volatile("jmpl *%0" : :
"rm" (real_mode_header->machine_real_restart_asm),
"a" (type));
#else
asm volatile("ljmpl *%0" : :
"m" (real_mode_header->machine_real_restart_asm),
"D" (type));
#endif
The code switches to the trampoline_pgd, which unmaps the direct mapping
and also the kernel stack. The call to cr4_clear_bits() will find no
stack and crash the machine. The real_mode_header pointer below points
into the direct mapping, and dereferencing it also causes a crash.
The reason this does not crash always is only that kernel mappings are
global and the CR3 switch does not flush those mappings. But if theses
mappings are not in the TLB already, the above code will crash before it
can jump to the real-mode stub.
Extend the trampoline_pgd to contain all kernel mappings to prevent
these crashes and to make code which runs on this page-table more
robust.
Cc: stable(a)vger.kernel.org
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/realmode/init.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 0cfe1046cec9..792cb9ca9b29 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -91,6 +91,7 @@ static void __init setup_real_mode(void)
#ifdef CONFIG_X86_64
u64 *trampoline_pgd;
u64 efer;
+ int i;
#endif
base = (unsigned char *)real_mode_header;
@@ -147,8 +148,17 @@ static void __init setup_real_mode(void)
trampoline_header->flags = 0;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
+
+ /*
+ * Map all of kernel memory into the trampoline PGD so that it includes
+ * the direct mapping and vmalloc space. This is needed to keep the
+ * stack and real_mode_header mapped when switching to this page table.
+ */
+ for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
+ trampoline_pgd[i] = init_top_pgt[i].pgd;
+
+ /* Map the real mode stub as virtual == physical */
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
- trampoline_pgd[511] = init_top_pgt[511].pgd;
#endif
sme_sev_setup_real_mode(trampoline_header);
--
2.33.0
When I was doing PTP_SYS_OFFSET_PRECISE ioctl in VM which has 128 vCPUs,
I got error returned occasionally. Then I checked the routine of
"getcrosststamp". I found in kvm_arch_ptp_get_crosststamp() of x86,
pvclock vcpu time info was got from hv_clock arrary which has only 64
elements. Hence this ioctl is executed on vCPU > 64, a wild/dangling
pointer will be got, which had casued the error.
To confirm this finding, I wrote a simple PTP_SYS_OFFSET_PRECISE ioctl
test and used "taskset -c n" to run the test, when it was executed on
vCPUs >= 64 it returned error.
This patchset exposes this_cpu_pvti() to get per cpu pvclock vcpu time
info of vCPUs >= 64 insdead of getting them from hv_clock arrary.
Zelin Deng (2):
x86/kvmclock: Move this_cpu_pvti into kvmclock.h
ptp: Fix ptp_kvm_getcrosststamp issue for x86 ptp_kvm
arch/x86/include/asm/kvmclock.h | 14 ++++++++++++++
arch/x86/kernel/kvmclock.c | 13 ++-----------
drivers/ptp/ptp_kvm_x86.c | 9 ++-------
3 files changed, 18 insertions(+), 18 deletions(-)
--
1.8.3.1
In tpm_del_char_device() make sure that chip->ops is still valid.
This check is needed since in case of a system shutdown
tpm_class_shutdown() has already been called and set chip->ops to NULL.
This leads to a NULL pointer access as soon as tpm_del_char_device()
tries to access chip->ops in case of TPM 2.
Fixes: dcbeab1946454 ("tpm: fix crash in tpm_tis deinitialization")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lino Sanfilippo <LinoSanfilippo(a)gmx.de>
---
drivers/char/tpm/tpm-chip.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index ddaeceb7e109..ed1fb5d82caf 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -474,13 +474,19 @@ static void tpm_del_char_device(struct tpm_chip *chip)
/* Make the driver uncallable. */
down_write(&chip->ops_sem);
- if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- if (!tpm_chip_start(chip)) {
- tpm2_shutdown(chip, TPM2_SU_CLEAR);
- tpm_chip_stop(chip);
+ /* Check if chip->ops is still valid since in case of a shutdown
+ * tpm_class_shutdown() has already sent the TPM2_Shutdown command
+ * and set chip->ops to NULL.
+ */
+ if (chip->ops) {
+ if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+ if (!tpm_chip_start(chip)) {
+ tpm2_shutdown(chip, TPM2_SU_CLEAR);
+ tpm_chip_stop(chip);
+ }
}
+ chip->ops = NULL;
}
- chip->ops = NULL;
up_write(&chip->ops_sem);
}
base-commit: a3fa7a101dcff93791d1b1bdb3affcad1410c8c1
--
2.33.0