The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 75ca6ad408f459f00b09a64f04c774559848c097 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh(a)linux.ibm.com>
Date: Sat, 5 Jun 2021 10:39:32 +0530
Subject: [PATCH] ext4: fix loff_t overflow in ext4_max_bitmap_size()
We should use unsigned long long rather than loff_t to avoid
overflow in ext4_max_bitmap_size() for comparison before returning.
w/o this patch sbi->s_bitmap_maxbytes was becoming a negative
value due to overflow of upper_limit (with has_huge_files as true)
Below is a quick test to trigger it on a 64KB pagesize system.
sudo mkfs.ext4 -b 65536 -O ^has_extents,^64bit /dev/loop2
sudo mount /dev/loop2 /mnt
sudo echo "hello" > /mnt/hello -> This will error out with
"echo: write error: File too large"
Signed-off-by: Ritesh Harjani <riteshh(a)linux.ibm.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
Link: https://lore.kernel.org/r/594f409e2c543e90fd836b78188dfa5c575065ba.16228675…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a52f1572daa5..9b5b2f63b470 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3030,17 +3030,17 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
*/
static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
{
- loff_t res = EXT4_NDIR_BLOCKS;
+ unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
int meta_blocks;
- loff_t upper_limit;
- /* This is calculated to be the largest file size for a dense, block
+
+ /*
+ * This is calculated to be the largest file size for a dense, block
* mapped file such that the file's total number of 512-byte sectors,
* including data and all indirect blocks, does not exceed (2^48 - 1).
*
* __u32 i_blocks_lo and _u16 i_blocks_high represent the total
* number of 512-byte sectors of the file.
*/
-
if (!has_huge_files) {
/*
* !has_huge_files or implies that the inode i_block field
@@ -3083,7 +3083,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
- return res;
+ return (loff_t)res;
}
static ext4_fsblk_t descriptor_loc(struct super_block *sb,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 75ca6ad408f459f00b09a64f04c774559848c097 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh(a)linux.ibm.com>
Date: Sat, 5 Jun 2021 10:39:32 +0530
Subject: [PATCH] ext4: fix loff_t overflow in ext4_max_bitmap_size()
We should use unsigned long long rather than loff_t to avoid
overflow in ext4_max_bitmap_size() for comparison before returning.
w/o this patch sbi->s_bitmap_maxbytes was becoming a negative
value due to overflow of upper_limit (with has_huge_files as true)
Below is a quick test to trigger it on a 64KB pagesize system.
sudo mkfs.ext4 -b 65536 -O ^has_extents,^64bit /dev/loop2
sudo mount /dev/loop2 /mnt
sudo echo "hello" > /mnt/hello -> This will error out with
"echo: write error: File too large"
Signed-off-by: Ritesh Harjani <riteshh(a)linux.ibm.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
Link: https://lore.kernel.org/r/594f409e2c543e90fd836b78188dfa5c575065ba.16228675…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a52f1572daa5..9b5b2f63b470 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3030,17 +3030,17 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
*/
static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
{
- loff_t res = EXT4_NDIR_BLOCKS;
+ unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
int meta_blocks;
- loff_t upper_limit;
- /* This is calculated to be the largest file size for a dense, block
+
+ /*
+ * This is calculated to be the largest file size for a dense, block
* mapped file such that the file's total number of 512-byte sectors,
* including data and all indirect blocks, does not exceed (2^48 - 1).
*
* __u32 i_blocks_lo and _u16 i_blocks_high represent the total
* number of 512-byte sectors of the file.
*/
-
if (!has_huge_files) {
/*
* !has_huge_files or implies that the inode i_block field
@@ -3083,7 +3083,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
- return res;
+ return (loff_t)res;
}
static ext4_fsblk_t descriptor_loc(struct super_block *sb,
The command ring pointer is located at [6:63] bits of the command
ring control register (CRCR). All the control bits like command stop,
abort are located at [0:3] bits. While aborting a command, we read the
CRCR and set the abort bit and write to the CRCR. The read will always
give command ring pointer as all zeros. So we essentially write only
the control bits. Since we split the 64 bit write into two 32 bit writes,
there is a possibility of xHC command ring stopped before the upper
dword (all zeros) is written. If that happens, xHC updates the upper
dword of its internal command ring pointer with all zeros. Next time,
when the command ring is restarted, we see xHC memory access failures.
Fix this issue by only writing to the lower dword of CRCR where all
control bits are located.
Signed-off-by: Pavankumar Kondeti <pkondeti(a)codeaurora.org>
---
drivers/usb/host/xhci-ring.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index e676749..3c8456b 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -366,16 +366,22 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
/* Must be called with xhci->lock held, releases and aquires lock back */
static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
{
- u64 temp_64;
+ u64 temp_32;
int ret;
xhci_dbg(xhci, "Abort command ring\n");
reinit_completion(&xhci->cmd_ring_stop_completion);
- temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
- xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
- &xhci->op_regs->cmd_ring);
+ /*
+ * The control bits like command stop, abort are located in lower
+ * dword of the command ring control register. Limit the write
+ * to the lower dword to avoid corrupting the command ring pointer
+ * in case if the command ring is stopped by the time upper dword
+ * is written.
+ */
+ temp_32 = readl(&xhci->op_regs->cmd_ring);
+ writel(temp_32 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring);
/* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the
* completion of the Command Abort operation. If CRR is not negated in 5
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
From: Pawel Laszczak <pawell(a)cadence.com>
commit b69ec50b3e55c4b2a85c8bc46763eaf33060584 upstream
For DEV_VER_V3 version there exist race condition between clearing
ep_sts.EP_STS_TRBERR and setting ep_cmd.EP_CMD_DRDY bit.
Setting EP_CMD_DRDY will be ignored by controller when
EP_STS_TRBERR is set. So, between these two instructions we have
a small time gap in which the EP_STS_TRBERR can be set. In such case
the transfer will not start after setting doorbell.
Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
cc: <stable(a)vger.kernel.org> # 5.4.x
Tested-by: Aswath Govindraju <a-govindraju(a)ti.com>
Reviewed-by: Aswath Govindraju <a-govindraju(a)ti.com>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
drivers/usb/cdns3/gadget.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c
index f624cc87cbab..5edde6a0e77a 100644
--- a/drivers/usb/cdns3/gadget.c
+++ b/drivers/usb/cdns3/gadget.c
@@ -807,6 +807,19 @@ static void cdns3_wa1_tray_restore_cycle_bit(struct cdns3_device *priv_dev,
cdns3_wa1_restore_cycle_bit(priv_ep);
}
+static void cdns3_rearm_drdy_if_needed(struct cdns3_endpoint *priv_ep)
+{
+ struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
+
+ if (priv_dev->dev_ver < DEV_VER_V3)
+ return;
+
+ if (readl(&priv_dev->regs->ep_sts) & EP_STS_TRBERR) {
+ writel(EP_STS_TRBERR, &priv_dev->regs->ep_sts);
+ writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd);
+ }
+}
+
/**
* cdns3_ep_run_transfer - start transfer on no-default endpoint hardware
* @priv_ep: endpoint object
@@ -1003,6 +1016,7 @@ int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep,
/*clearing TRBERR and EP_STS_DESCMIS before seting DRDY*/
writel(EP_STS_TRBERR | EP_STS_DESCMIS, &priv_dev->regs->ep_sts);
writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd);
+ cdns3_rearm_drdy_if_needed(priv_ep);
trace_cdns3_doorbell_epx(priv_ep->name,
readl(&priv_dev->regs->ep_traddr));
}
--
2.25.1
From: Pali Rohár <pali(a)kernel.org>
Change PCIe Max Payload Size setting in PCIe Device Control register to 512
bytes to align with PCIe Link Initialization sequence as defined in Marvell
Armada 3700 Functional Specification. According to the specification,
maximal Max Payload Size supported by this device is 512 bytes.
Without this kernel prints suspicious line:
pci 0000:01:00.0: Upstream bridge's Max Payload Size set to 256 (was 16384, max 512)
With this change it changes to:
pci 0000:01:00.0: Upstream bridge's Max Payload Size set to 256 (was 512, max 512)
Fixes: 8c39d710363c ("PCI: aardvark: Add Aardvark PCI host controller driver")
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Signed-off-by: Marek Behún <kabel(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/pci/controller/pci-aardvark.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 596ebcfcc82d..884510630bae 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -488,8 +488,9 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL);
reg &= ~PCI_EXP_DEVCTL_RELAX_EN;
reg &= ~PCI_EXP_DEVCTL_NOSNOOP_EN;
+ reg &= ~PCI_EXP_DEVCTL_PAYLOAD;
reg &= ~PCI_EXP_DEVCTL_READRQ;
- reg |= PCI_EXP_DEVCTL_PAYLOAD; /* Set max payload size */
+ reg |= PCI_EXP_DEVCTL_PAYLOAD_512B;
reg |= PCI_EXP_DEVCTL_READRQ_512B;
advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL);
--
2.32.0
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 41e76c6a3c83c85e849f10754b8632ea763d9be4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers(a)google.com>
Date: Mon, 20 Sep 2021 16:25:33 -0700
Subject: [PATCH] nbd: use shifts rather than multiplies
commit fad7cd3310db ("nbd: add the check to prevent overflow in
__nbd_ioctl()") raised an issue from the fallback helpers added in
commit f0907827a8a9 ("compiler.h: enable builtin overflow checkers and
add fallback code")
ERROR: modpost: "__divdi3" [drivers/block/nbd.ko] undefined!
As Stephen Rothwell notes:
The added check_mul_overflow() call is being passed 64 bit values.
COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW is not set for this build (see
include/linux/overflow.h).
Specifically, the helpers for checking whether the results of a
multiplication overflowed (__unsigned_mul_overflow,
__signed_add_overflow) use the division operator when
!COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW. This is problematic for 64b
operands on 32b hosts.
This was fixed upstream by
commit 76ae847497bc ("Documentation: raise minimum supported version of
GCC to 5.1")
which is not suitable to be backported to stable.
Further, __builtin_mul_overflow() would emit a libcall to a
compiler-rt-only symbol when compiling with clang < 14 for 32b targets.
ld.lld: error: undefined symbol: __mulodi4
In order to keep stable buildable with GCC 4.9 and clang < 14, modify
struct nbd_config to instead track the number of bits of the block size;
reconstructing the block size using runtime checked shifts that are not
problematic for those compilers and in a ways that can be backported to
stable.
In nbd_set_size, we do validate that the value of blksize must be a
power of two (POT) and is in the range of [512, PAGE_SIZE] (both
inclusive).
This does modify the debugfs interface.
Cc: stable(a)vger.kernel.org
Cc: Arnd Bergmann <arnd(a)kernel.org>
Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Link: https://github.com/ClangBuiltLinux/linux/issues/1438
Link: https://lore.kernel.org/all/20210909182525.372ee687@canb.auug.org.au/
Link: https://lore.kernel.org/stable/CAHk-=whiQBofgis_rkniz8GBP9wZtSZdcDEffgSLO62…
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Reported-by: Nathan Chancellor <nathan(a)kernel.org>
Reported-by: Stephen Rothwell <sfr(a)canb.auug.org.au>
Suggested-by: Kees Cook <keescook(a)chromium.org>
Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Suggested-by: Pavel Machek <pavel(a)ucw.cz>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Link: https://lore.kernel.org/r/20210920232533.4092046-1-ndesaulniers@google.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5170a630778d..1183f7872b71 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -97,13 +97,18 @@ struct nbd_config {
atomic_t recv_threads;
wait_queue_head_t recv_wq;
- loff_t blksize;
+ unsigned int blksize_bits;
loff_t bytesize;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbg_dir;
#endif
};
+static inline unsigned int nbd_blksize(struct nbd_config *config)
+{
+ return 1u << config->blksize_bits;
+}
+
struct nbd_device {
struct blk_mq_tag_set tag_set;
@@ -146,7 +151,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
-#define NBD_DEF_BLKSIZE 1024
+#define NBD_DEF_BLKSIZE_BITS 10
static unsigned int nbds_max = 16;
static int max_part = 16;
@@ -317,12 +322,12 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
loff_t blksize)
{
if (!blksize)
- blksize = NBD_DEF_BLKSIZE;
+ blksize = 1u << NBD_DEF_BLKSIZE_BITS;
if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
return -EINVAL;
nbd->config->bytesize = bytesize;
- nbd->config->blksize = blksize;
+ nbd->config->blksize_bits = __ffs(blksize);
if (!nbd->task_recv)
return 0;
@@ -1337,7 +1342,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
- return nbd_set_size(nbd, config->bytesize, config->blksize);
+ return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
@@ -1406,11 +1411,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_BLKSIZE:
return nbd_set_size(nbd, config->bytesize, arg);
case NBD_SET_SIZE:
- return nbd_set_size(nbd, arg, config->blksize);
+ return nbd_set_size(nbd, arg, nbd_blksize(config));
case NBD_SET_SIZE_BLOCKS:
- if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
+ if (check_shl_overflow(arg, config->blksize_bits, &bytesize))
return -EINVAL;
- return nbd_set_size(nbd, bytesize, config->blksize);
+ return nbd_set_size(nbd, bytesize, nbd_blksize(config));
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
@@ -1476,7 +1481,7 @@ static struct nbd_config *nbd_alloc_config(void)
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
- config->blksize = NBD_DEF_BLKSIZE;
+ config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
try_module_get(THIS_MODULE);
return config;
@@ -1604,7 +1609,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
- debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
+ debugfs_create_u32("blocksize_bits", 0444, dir, &config->blksize_bits);
debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
return 0;
@@ -1826,7 +1831,7 @@ nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
- u64 bsize = config->blksize;
+ u64 bsize = nbd_blksize(config);
u64 bytes = config->bytesize;
if (info->attrs[NBD_ATTR_SIZE_BYTES])
@@ -1835,7 +1840,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
- if (bytes != config->bytesize || bsize != config->blksize)
+ if (bytes != config->bytesize || bsize != nbd_blksize(config))
return nbd_set_size(nbd, bytes, bsize);
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 41e76c6a3c83c85e849f10754b8632ea763d9be4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers(a)google.com>
Date: Mon, 20 Sep 2021 16:25:33 -0700
Subject: [PATCH] nbd: use shifts rather than multiplies
commit fad7cd3310db ("nbd: add the check to prevent overflow in
__nbd_ioctl()") raised an issue from the fallback helpers added in
commit f0907827a8a9 ("compiler.h: enable builtin overflow checkers and
add fallback code")
ERROR: modpost: "__divdi3" [drivers/block/nbd.ko] undefined!
As Stephen Rothwell notes:
The added check_mul_overflow() call is being passed 64 bit values.
COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW is not set for this build (see
include/linux/overflow.h).
Specifically, the helpers for checking whether the results of a
multiplication overflowed (__unsigned_mul_overflow,
__signed_add_overflow) use the division operator when
!COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW. This is problematic for 64b
operands on 32b hosts.
This was fixed upstream by
commit 76ae847497bc ("Documentation: raise minimum supported version of
GCC to 5.1")
which is not suitable to be backported to stable.
Further, __builtin_mul_overflow() would emit a libcall to a
compiler-rt-only symbol when compiling with clang < 14 for 32b targets.
ld.lld: error: undefined symbol: __mulodi4
In order to keep stable buildable with GCC 4.9 and clang < 14, modify
struct nbd_config to instead track the number of bits of the block size;
reconstructing the block size using runtime checked shifts that are not
problematic for those compilers and in a ways that can be backported to
stable.
In nbd_set_size, we do validate that the value of blksize must be a
power of two (POT) and is in the range of [512, PAGE_SIZE] (both
inclusive).
This does modify the debugfs interface.
Cc: stable(a)vger.kernel.org
Cc: Arnd Bergmann <arnd(a)kernel.org>
Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Link: https://github.com/ClangBuiltLinux/linux/issues/1438
Link: https://lore.kernel.org/all/20210909182525.372ee687@canb.auug.org.au/
Link: https://lore.kernel.org/stable/CAHk-=whiQBofgis_rkniz8GBP9wZtSZdcDEffgSLO62…
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Reported-by: Nathan Chancellor <nathan(a)kernel.org>
Reported-by: Stephen Rothwell <sfr(a)canb.auug.org.au>
Suggested-by: Kees Cook <keescook(a)chromium.org>
Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Suggested-by: Pavel Machek <pavel(a)ucw.cz>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Link: https://lore.kernel.org/r/20210920232533.4092046-1-ndesaulniers@google.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5170a630778d..1183f7872b71 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -97,13 +97,18 @@ struct nbd_config {
atomic_t recv_threads;
wait_queue_head_t recv_wq;
- loff_t blksize;
+ unsigned int blksize_bits;
loff_t bytesize;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbg_dir;
#endif
};
+static inline unsigned int nbd_blksize(struct nbd_config *config)
+{
+ return 1u << config->blksize_bits;
+}
+
struct nbd_device {
struct blk_mq_tag_set tag_set;
@@ -146,7 +151,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
-#define NBD_DEF_BLKSIZE 1024
+#define NBD_DEF_BLKSIZE_BITS 10
static unsigned int nbds_max = 16;
static int max_part = 16;
@@ -317,12 +322,12 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
loff_t blksize)
{
if (!blksize)
- blksize = NBD_DEF_BLKSIZE;
+ blksize = 1u << NBD_DEF_BLKSIZE_BITS;
if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
return -EINVAL;
nbd->config->bytesize = bytesize;
- nbd->config->blksize = blksize;
+ nbd->config->blksize_bits = __ffs(blksize);
if (!nbd->task_recv)
return 0;
@@ -1337,7 +1342,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
- return nbd_set_size(nbd, config->bytesize, config->blksize);
+ return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
@@ -1406,11 +1411,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_BLKSIZE:
return nbd_set_size(nbd, config->bytesize, arg);
case NBD_SET_SIZE:
- return nbd_set_size(nbd, arg, config->blksize);
+ return nbd_set_size(nbd, arg, nbd_blksize(config));
case NBD_SET_SIZE_BLOCKS:
- if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
+ if (check_shl_overflow(arg, config->blksize_bits, &bytesize))
return -EINVAL;
- return nbd_set_size(nbd, bytesize, config->blksize);
+ return nbd_set_size(nbd, bytesize, nbd_blksize(config));
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
@@ -1476,7 +1481,7 @@ static struct nbd_config *nbd_alloc_config(void)
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
- config->blksize = NBD_DEF_BLKSIZE;
+ config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
try_module_get(THIS_MODULE);
return config;
@@ -1604,7 +1609,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
- debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
+ debugfs_create_u32("blocksize_bits", 0444, dir, &config->blksize_bits);
debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
return 0;
@@ -1826,7 +1831,7 @@ nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
- u64 bsize = config->blksize;
+ u64 bsize = nbd_blksize(config);
u64 bytes = config->bytesize;
if (info->attrs[NBD_ATTR_SIZE_BYTES])
@@ -1835,7 +1840,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
- if (bytes != config->bytesize || bsize != config->blksize)
+ if (bytes != config->bytesize || bsize != nbd_blksize(config))
return nbd_set_size(nbd, bytes, bsize);
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 305d568b72f17f674155a2a8275f865f207b3808 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg(a)ziepe.ca>
Date: Thu, 16 Sep 2021 15:34:46 -0300
Subject: [PATCH] RDMA/cma: Ensure rdma_addr_cancel() happens before issuing
more requests
The FSM can run in a circle allowing rdma_resolve_ip() to be called twice
on the same id_priv. While this cannot happen without going through the
work, it violates the invariant that the same address resolution
background request cannot be active twice.
CPU 1 CPU 2
rdma_resolve_addr():
RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler) #1
process_one_req(): for #1
addr_handler():
RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND
mutex_unlock(&id_priv->handler_mutex);
[.. handler still running ..]
rdma_resolve_addr():
RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler)
!! two requests are now on the req_list
rdma_destroy_id():
destroy_id_handler_unlock():
_destroy_id():
cma_cancel_operation():
rdma_addr_cancel()
// process_one_req() self removes it
spin_lock_bh(&lock);
cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) == true
! rdma_addr_cancel() returns after process_on_req #1 is done
kfree(id_priv)
process_one_req(): for #2
addr_handler():
mutex_lock(&id_priv->handler_mutex);
!! Use after free on id_priv
rdma_addr_cancel() expects there to be one req on the list and only
cancels the first one. The self-removal behavior of the work only happens
after the handler has returned. This yields a situations where the
req_list can have two reqs for the same "handle" but rdma_addr_cancel()
only cancels the first one.
The second req remains active beyond rdma_destroy_id() and will
use-after-free id_priv once it inevitably triggers.
Fix this by remembering if the id_priv has called rdma_resolve_ip() and
always cancel before calling it again. This ensures the req_list never
gets more than one item in it and doesn't cost anything in the normal flow
that never uses this strange error path.
Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com
Cc: stable(a)vger.kernel.org
Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager")
Reported-by: syzbot+dc3dfba010d7671e05f5(a)syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8862b0e572f0..704ce595542c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5c463da99845..f92f101ea981 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -91,6 +91,7 @@ struct rdma_id_private {
u8 afonly;
u8 timeout;
u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 305d568b72f17f674155a2a8275f865f207b3808 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg(a)ziepe.ca>
Date: Thu, 16 Sep 2021 15:34:46 -0300
Subject: [PATCH] RDMA/cma: Ensure rdma_addr_cancel() happens before issuing
more requests
The FSM can run in a circle allowing rdma_resolve_ip() to be called twice
on the same id_priv. While this cannot happen without going through the
work, it violates the invariant that the same address resolution
background request cannot be active twice.
CPU 1 CPU 2
rdma_resolve_addr():
RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler) #1
process_one_req(): for #1
addr_handler():
RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND
mutex_unlock(&id_priv->handler_mutex);
[.. handler still running ..]
rdma_resolve_addr():
RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler)
!! two requests are now on the req_list
rdma_destroy_id():
destroy_id_handler_unlock():
_destroy_id():
cma_cancel_operation():
rdma_addr_cancel()
// process_one_req() self removes it
spin_lock_bh(&lock);
cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) == true
! rdma_addr_cancel() returns after process_on_req #1 is done
kfree(id_priv)
process_one_req(): for #2
addr_handler():
mutex_lock(&id_priv->handler_mutex);
!! Use after free on id_priv
rdma_addr_cancel() expects there to be one req on the list and only
cancels the first one. The self-removal behavior of the work only happens
after the handler has returned. This yields a situations where the
req_list can have two reqs for the same "handle" but rdma_addr_cancel()
only cancels the first one.
The second req remains active beyond rdma_destroy_id() and will
use-after-free id_priv once it inevitably triggers.
Fix this by remembering if the id_priv has called rdma_resolve_ip() and
always cancel before calling it again. This ensures the req_list never
gets more than one item in it and doesn't cost anything in the normal flow
that never uses this strange error path.
Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com
Cc: stable(a)vger.kernel.org
Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager")
Reported-by: syzbot+dc3dfba010d7671e05f5(a)syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8862b0e572f0..704ce595542c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5c463da99845..f92f101ea981 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -91,6 +91,7 @@ struct rdma_id_private {
u8 afonly;
u8 timeout;
u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 305d568b72f17f674155a2a8275f865f207b3808 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg(a)ziepe.ca>
Date: Thu, 16 Sep 2021 15:34:46 -0300
Subject: [PATCH] RDMA/cma: Ensure rdma_addr_cancel() happens before issuing
more requests
The FSM can run in a circle allowing rdma_resolve_ip() to be called twice
on the same id_priv. While this cannot happen without going through the
work, it violates the invariant that the same address resolution
background request cannot be active twice.
CPU 1 CPU 2
rdma_resolve_addr():
RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler) #1
process_one_req(): for #1
addr_handler():
RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND
mutex_unlock(&id_priv->handler_mutex);
[.. handler still running ..]
rdma_resolve_addr():
RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler)
!! two requests are now on the req_list
rdma_destroy_id():
destroy_id_handler_unlock():
_destroy_id():
cma_cancel_operation():
rdma_addr_cancel()
// process_one_req() self removes it
spin_lock_bh(&lock);
cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) == true
! rdma_addr_cancel() returns after process_on_req #1 is done
kfree(id_priv)
process_one_req(): for #2
addr_handler():
mutex_lock(&id_priv->handler_mutex);
!! Use after free on id_priv
rdma_addr_cancel() expects there to be one req on the list and only
cancels the first one. The self-removal behavior of the work only happens
after the handler has returned. This yields a situations where the
req_list can have two reqs for the same "handle" but rdma_addr_cancel()
only cancels the first one.
The second req remains active beyond rdma_destroy_id() and will
use-after-free id_priv once it inevitably triggers.
Fix this by remembering if the id_priv has called rdma_resolve_ip() and
always cancel before calling it again. This ensures the req_list never
gets more than one item in it and doesn't cost anything in the normal flow
that never uses this strange error path.
Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com
Cc: stable(a)vger.kernel.org
Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager")
Reported-by: syzbot+dc3dfba010d7671e05f5(a)syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8862b0e572f0..704ce595542c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5c463da99845..f92f101ea981 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -91,6 +91,7 @@ struct rdma_id_private {
u8 afonly;
u8 timeout;
u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 305d568b72f17f674155a2a8275f865f207b3808 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg(a)ziepe.ca>
Date: Thu, 16 Sep 2021 15:34:46 -0300
Subject: [PATCH] RDMA/cma: Ensure rdma_addr_cancel() happens before issuing
more requests
The FSM can run in a circle allowing rdma_resolve_ip() to be called twice
on the same id_priv. While this cannot happen without going through the
work, it violates the invariant that the same address resolution
background request cannot be active twice.
CPU 1 CPU 2
rdma_resolve_addr():
RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler) #1
process_one_req(): for #1
addr_handler():
RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND
mutex_unlock(&id_priv->handler_mutex);
[.. handler still running ..]
rdma_resolve_addr():
RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler)
!! two requests are now on the req_list
rdma_destroy_id():
destroy_id_handler_unlock():
_destroy_id():
cma_cancel_operation():
rdma_addr_cancel()
// process_one_req() self removes it
spin_lock_bh(&lock);
cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) == true
! rdma_addr_cancel() returns after process_on_req #1 is done
kfree(id_priv)
process_one_req(): for #2
addr_handler():
mutex_lock(&id_priv->handler_mutex);
!! Use after free on id_priv
rdma_addr_cancel() expects there to be one req on the list and only
cancels the first one. The self-removal behavior of the work only happens
after the handler has returned. This yields a situations where the
req_list can have two reqs for the same "handle" but rdma_addr_cancel()
only cancels the first one.
The second req remains active beyond rdma_destroy_id() and will
use-after-free id_priv once it inevitably triggers.
Fix this by remembering if the id_priv has called rdma_resolve_ip() and
always cancel before calling it again. This ensures the req_list never
gets more than one item in it and doesn't cost anything in the normal flow
that never uses this strange error path.
Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com
Cc: stable(a)vger.kernel.org
Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager")
Reported-by: syzbot+dc3dfba010d7671e05f5(a)syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8862b0e572f0..704ce595542c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5c463da99845..f92f101ea981 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -91,6 +91,7 @@ struct rdma_id_private {
u8 afonly;
u8 timeout;
u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 305d568b72f17f674155a2a8275f865f207b3808 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg(a)ziepe.ca>
Date: Thu, 16 Sep 2021 15:34:46 -0300
Subject: [PATCH] RDMA/cma: Ensure rdma_addr_cancel() happens before issuing
more requests
The FSM can run in a circle allowing rdma_resolve_ip() to be called twice
on the same id_priv. While this cannot happen without going through the
work, it violates the invariant that the same address resolution
background request cannot be active twice.
CPU 1 CPU 2
rdma_resolve_addr():
RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler) #1
process_one_req(): for #1
addr_handler():
RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND
mutex_unlock(&id_priv->handler_mutex);
[.. handler still running ..]
rdma_resolve_addr():
RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler)
!! two requests are now on the req_list
rdma_destroy_id():
destroy_id_handler_unlock():
_destroy_id():
cma_cancel_operation():
rdma_addr_cancel()
// process_one_req() self removes it
spin_lock_bh(&lock);
cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) == true
! rdma_addr_cancel() returns after process_on_req #1 is done
kfree(id_priv)
process_one_req(): for #2
addr_handler():
mutex_lock(&id_priv->handler_mutex);
!! Use after free on id_priv
rdma_addr_cancel() expects there to be one req on the list and only
cancels the first one. The self-removal behavior of the work only happens
after the handler has returned. This yields a situations where the
req_list can have two reqs for the same "handle" but rdma_addr_cancel()
only cancels the first one.
The second req remains active beyond rdma_destroy_id() and will
use-after-free id_priv once it inevitably triggers.
Fix this by remembering if the id_priv has called rdma_resolve_ip() and
always cancel before calling it again. This ensures the req_list never
gets more than one item in it and doesn't cost anything in the normal flow
that never uses this strange error path.
Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com
Cc: stable(a)vger.kernel.org
Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager")
Reported-by: syzbot+dc3dfba010d7671e05f5(a)syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8862b0e572f0..704ce595542c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5c463da99845..f92f101ea981 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -91,6 +91,7 @@ struct rdma_id_private {
u8 afonly;
u8 timeout;
u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 305d568b72f17f674155a2a8275f865f207b3808 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg(a)ziepe.ca>
Date: Thu, 16 Sep 2021 15:34:46 -0300
Subject: [PATCH] RDMA/cma: Ensure rdma_addr_cancel() happens before issuing
more requests
The FSM can run in a circle allowing rdma_resolve_ip() to be called twice
on the same id_priv. While this cannot happen without going through the
work, it violates the invariant that the same address resolution
background request cannot be active twice.
CPU 1 CPU 2
rdma_resolve_addr():
RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler) #1
process_one_req(): for #1
addr_handler():
RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND
mutex_unlock(&id_priv->handler_mutex);
[.. handler still running ..]
rdma_resolve_addr():
RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY
rdma_resolve_ip(addr_handler)
!! two requests are now on the req_list
rdma_destroy_id():
destroy_id_handler_unlock():
_destroy_id():
cma_cancel_operation():
rdma_addr_cancel()
// process_one_req() self removes it
spin_lock_bh(&lock);
cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) == true
! rdma_addr_cancel() returns after process_on_req #1 is done
kfree(id_priv)
process_one_req(): for #2
addr_handler():
mutex_lock(&id_priv->handler_mutex);
!! Use after free on id_priv
rdma_addr_cancel() expects there to be one req on the list and only
cancels the first one. The self-removal behavior of the work only happens
after the handler has returned. This yields a situations where the
req_list can have two reqs for the same "handle" but rdma_addr_cancel()
only cancels the first one.
The second req remains active beyond rdma_destroy_id() and will
use-after-free id_priv once it inevitably triggers.
Fix this by remembering if the id_priv has called rdma_resolve_ip() and
always cancel before calling it again. This ensures the req_list never
gets more than one item in it and doesn't cost anything in the normal flow
that never uses this strange error path.
Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com
Cc: stable(a)vger.kernel.org
Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager")
Reported-by: syzbot+dc3dfba010d7671e05f5(a)syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8862b0e572f0..704ce595542c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1783,6 +1783,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -3425,6 +3433,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
&id->route.addr.dev_addr,
timeout_ms, addr_handler,
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5c463da99845..f92f101ea981 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -91,6 +91,7 @@ struct rdma_id_private {
u8 afonly;
u8 timeout;
u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
commit 8646e53633f314e4d746a988240d3b951a92f94a upstream.
Invoke rseq's NOTIFY_RESUME handler when processing the flag prior to
transferring to a KVM guest, which is roughly equivalent to an exit to
userspace and processes many of the same pending actions. While the task
cannot be in an rseq critical section as the KVM path is reachable only
by via ioctl(KVM_RUN), the side effects that apply to rseq outside of a
critical section still apply, e.g. the current CPU needs to be updated if
the task is migrated.
Clearing TIF_NOTIFY_RESUME without informing rseq can lead to segfaults
and other badness in userspace VMMs that use rseq in combination with KVM,
e.g. due to the CPU ID being stale after task migration.
Fixes: 72c3c0fe54a3 ("x86/kvm: Use generic xfer to guest work function")
Reported-by: Peter Foley <pefoley(a)google.com>
Bisected-by: Doug Evans <dje(a)google.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20210901203030.1292304-2-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
[sean: Resolve benign conflict due to unrelated access_ok() check in 5.10]
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
---
kernel/entry/kvm.c | 4 +++-
kernel/rseq.c | 13 ++++++++++---
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
index b6678a5e3cf6..2a3139dab109 100644
--- a/kernel/entry/kvm.c
+++ b/kernel/entry/kvm.c
@@ -16,8 +16,10 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
if (ti_work & _TIF_NEED_RESCHED)
schedule();
- if (ti_work & _TIF_NOTIFY_RESUME)
+ if (ti_work & _TIF_NOTIFY_RESUME) {
tracehook_notify_resume(NULL);
+ rseq_handle_notify_resume(NULL, NULL);
+ }
ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work);
if (ret)
diff --git a/kernel/rseq.c b/kernel/rseq.c
index a4f86a9d6937..0077713bf240 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -268,9 +268,16 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
return;
if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq))))
goto error;
- ret = rseq_ip_fixup(regs);
- if (unlikely(ret < 0))
- goto error;
+ /*
+ * regs is NULL if and only if the caller is in a syscall path. Skip
+ * fixup and leave rseq_cs as is so that rseq_sycall() will detect and
+ * kill a misbehaving userspace on debug kernels.
+ */
+ if (regs) {
+ ret = rseq_ip_fixup(regs);
+ if (unlikely(ret < 0))
+ goto error;
+ }
if (unlikely(rseq_update_cpu_id(t)))
goto error;
return;
--
2.33.0.685.g46640cef36-goog
The patch below does not apply to the 5.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e840f42a49925707fca90e6c7a4095118fdb8c4d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Sun, 19 Sep 2021 14:09:49 +0100
Subject: [PATCH] KVM: arm64: Fix PMU probe ordering
Russell reported that since 5.13, KVM's probing of the PMU has
started to fail on his HW. As it turns out, there is an implicit
ordering dependency between the architectural PMU probing code and
and KVM's own probing. If, due to probe ordering reasons, KVM probes
before the PMU driver, it will fail to detect the PMU and prevent it
from being advertised to guests as well as the VMM.
Obviously, this is one probing too many, and we should be able to
deal with any ordering.
Add a callback from the PMU code into KVM to advertise the registration
of a host CPU PMU, allowing for any probing order.
Fixes: 5421db1be3b1 ("KVM: arm64: Divorce the perf code from oprofile helpers")
Reported-by: "Russell King (Oracle)" <linux(a)armlinux.org.uk>
Tested-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/YUYRKVflRtUytzy5@shell.armlinux.org.uk
Cc: stable(a)vger.kernel.org
diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index f9bb3b14130e..c84fe24b2ea1 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
int kvm_perf_init(void)
{
- if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
- static_branch_enable(&kvm_arm_pmu_available);
-
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f5065f23b413..2af3c37445e0 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -740,7 +740,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
kvm_pmu_create_perf_event(vcpu, select_idx);
}
-int kvm_pmu_probe_pmuver(void)
+void kvm_host_pmu_init(struct arm_pmu *pmu)
+{
+ if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
+ !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
+ static_branch_enable(&kvm_arm_pmu_available);
+}
+
+static int kvm_pmu_probe_pmuver(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3cbc3baf087f..295cc7952d0e 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -952,6 +952,8 @@ int armpmu_register(struct arm_pmu *pmu)
pmu->name, pmu->num_events,
has_nmi ? ", using NMIs" : "");
+ kvm_host_pmu_init(pmu);
+
return 0;
out_destroy:
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 864b9997efb2..90f21898aad8 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
-int kvm_pmu_probe_pmuver(void);
#else
struct kvm_pmu {
};
@@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
return 0;
}
-static inline int kvm_pmu_probe_pmuver(void) { return 0xf; }
-
#endif
#endif
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 505480217cf1..2512e2f9cd4e 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
#endif
+#ifdef CONFIG_KVM
+void kvm_host_pmu_init(struct arm_pmu *pmu);
+#else
+#define kvm_host_pmu_init(x) do { } while(0)
+#endif
+
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
struct arm_pmu *armpmu_alloc_atomic(void);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03a6e84069d1870f5b3d360e64cb330b66f76dee Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Mon, 20 Sep 2021 17:02:55 -0700
Subject: [PATCH] KVM: x86: Clear KVM's cached guest CR3 at RESET/INIT
Explicitly zero the guest's CR3 and mark it available+dirty at RESET/INIT.
Per Intel's SDM and AMD's APM, CR3 is zeroed at both RESET and INIT. For
RESET, this is a nop as vcpu is zero-allocated. For INIT, the bug has
likely escaped notice because no firmware/kernel puts its page tables root
at PA=0, let alone relies on INIT to get the desired CR3 for such page
tables.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20210921000303.400537-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06026f3d7ea2..8a83dd1b882e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10895,6 +10895,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
+ vcpu->arch.cr3 = 0;
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03a6e84069d1870f5b3d360e64cb330b66f76dee Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Mon, 20 Sep 2021 17:02:55 -0700
Subject: [PATCH] KVM: x86: Clear KVM's cached guest CR3 at RESET/INIT
Explicitly zero the guest's CR3 and mark it available+dirty at RESET/INIT.
Per Intel's SDM and AMD's APM, CR3 is zeroed at both RESET and INIT. For
RESET, this is a nop as vcpu is zero-allocated. For INIT, the bug has
likely escaped notice because no firmware/kernel puts its page tables root
at PA=0, let alone relies on INIT to get the desired CR3 for such page
tables.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20210921000303.400537-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06026f3d7ea2..8a83dd1b882e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10895,6 +10895,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
+ vcpu->arch.cr3 = 0;
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03a6e84069d1870f5b3d360e64cb330b66f76dee Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Mon, 20 Sep 2021 17:02:55 -0700
Subject: [PATCH] KVM: x86: Clear KVM's cached guest CR3 at RESET/INIT
Explicitly zero the guest's CR3 and mark it available+dirty at RESET/INIT.
Per Intel's SDM and AMD's APM, CR3 is zeroed at both RESET and INIT. For
RESET, this is a nop as vcpu is zero-allocated. For INIT, the bug has
likely escaped notice because no firmware/kernel puts its page tables root
at PA=0, let alone relies on INIT to get the desired CR3 for such page
tables.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20210921000303.400537-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06026f3d7ea2..8a83dd1b882e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10895,6 +10895,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
+ vcpu->arch.cr3 = 0;
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03a6e84069d1870f5b3d360e64cb330b66f76dee Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Mon, 20 Sep 2021 17:02:55 -0700
Subject: [PATCH] KVM: x86: Clear KVM's cached guest CR3 at RESET/INIT
Explicitly zero the guest's CR3 and mark it available+dirty at RESET/INIT.
Per Intel's SDM and AMD's APM, CR3 is zeroed at both RESET and INIT. For
RESET, this is a nop as vcpu is zero-allocated. For INIT, the bug has
likely escaped notice because no firmware/kernel puts its page tables root
at PA=0, let alone relies on INIT to get the desired CR3 for such page
tables.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20210921000303.400537-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06026f3d7ea2..8a83dd1b882e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10895,6 +10895,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
+ vcpu->arch.cr3 = 0;
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03a6e84069d1870f5b3d360e64cb330b66f76dee Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Mon, 20 Sep 2021 17:02:55 -0700
Subject: [PATCH] KVM: x86: Clear KVM's cached guest CR3 at RESET/INIT
Explicitly zero the guest's CR3 and mark it available+dirty at RESET/INIT.
Per Intel's SDM and AMD's APM, CR3 is zeroed at both RESET and INIT. For
RESET, this is a nop as vcpu is zero-allocated. For INIT, the bug has
likely escaped notice because no firmware/kernel puts its page tables root
at PA=0, let alone relies on INIT to get the desired CR3 for such page
tables.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20210921000303.400537-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06026f3d7ea2..8a83dd1b882e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10895,6 +10895,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
+ vcpu->arch.cr3 = 0;
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 03a6e84069d1870f5b3d360e64cb330b66f76dee Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Mon, 20 Sep 2021 17:02:55 -0700
Subject: [PATCH] KVM: x86: Clear KVM's cached guest CR3 at RESET/INIT
Explicitly zero the guest's CR3 and mark it available+dirty at RESET/INIT.
Per Intel's SDM and AMD's APM, CR3 is zeroed at both RESET and INIT. For
RESET, this is a nop as vcpu is zero-allocated. For INIT, the bug has
likely escaped notice because no firmware/kernel puts its page tables root
at PA=0, let alone relies on INIT to get the desired CR3 for such page
tables.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20210921000303.400537-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06026f3d7ea2..8a83dd1b882e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10895,6 +10895,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
+ vcpu->arch.cr3 = 0;
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
+
/*
* CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions
* of Intel's SDM list CD/NW as being set on INIT, but they contradict
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 773e89ab0056aaa2baa1ffd9f044551654410104 Mon Sep 17 00:00:00 2001
From: Zelin Deng <zelin.deng(a)linux.alibaba.com>
Date: Wed, 29 Sep 2021 13:13:49 +0800
Subject: [PATCH] ptp: Fix ptp_kvm_getcrosststamp issue for x86 ptp_kvm
hv_clock is preallocated to have only HVC_BOOT_ARRAY_SIZE (64) elements;
if the PTP_SYS_OFFSET_PRECISE ioctl is executed on vCPUs whose index is
64 of higher, retrieving the struct pvclock_vcpu_time_info pointer with
"src = &hv_clock[cpu].pvti" will result in an out-of-bounds access and
a wild pointer. Change it to "this_cpu_pvti()" which is guaranteed to
be valid.
Fixes: 95a3d4454bb1 ("Switch kvmclock data to a PER_CPU variable")
Signed-off-by: Zelin Deng <zelin.deng(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
Message-Id: <1632892429-101194-3-git-send-email-zelin.deng(a)linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c
index 3dd519dfc473..d0096cd7096a 100644
--- a/drivers/ptp/ptp_kvm_x86.c
+++ b/drivers/ptp/ptp_kvm_x86.c
@@ -15,8 +15,6 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/ptp_kvm.h>
-struct pvclock_vsyscall_time_info *hv_clock;
-
static phys_addr_t clock_pair_gpa;
static struct kvm_clock_pairing clock_pair;
@@ -28,8 +26,7 @@ int kvm_arch_ptp_init(void)
return -ENODEV;
clock_pair_gpa = slow_virt_to_phys(&clock_pair);
- hv_clock = pvclock_get_pvti_cpu0_va();
- if (!hv_clock)
+ if (!pvclock_get_pvti_cpu0_va())
return -ENODEV;
ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,
@@ -64,10 +61,8 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec,
struct pvclock_vcpu_time_info *src;
unsigned int version;
long ret;
- int cpu;
- cpu = smp_processor_id();
- src = &hv_clock[cpu].pvti;
+ src = this_cpu_pvti();
do {
/*
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 773e89ab0056aaa2baa1ffd9f044551654410104 Mon Sep 17 00:00:00 2001
From: Zelin Deng <zelin.deng(a)linux.alibaba.com>
Date: Wed, 29 Sep 2021 13:13:49 +0800
Subject: [PATCH] ptp: Fix ptp_kvm_getcrosststamp issue for x86 ptp_kvm
hv_clock is preallocated to have only HVC_BOOT_ARRAY_SIZE (64) elements;
if the PTP_SYS_OFFSET_PRECISE ioctl is executed on vCPUs whose index is
64 of higher, retrieving the struct pvclock_vcpu_time_info pointer with
"src = &hv_clock[cpu].pvti" will result in an out-of-bounds access and
a wild pointer. Change it to "this_cpu_pvti()" which is guaranteed to
be valid.
Fixes: 95a3d4454bb1 ("Switch kvmclock data to a PER_CPU variable")
Signed-off-by: Zelin Deng <zelin.deng(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
Message-Id: <1632892429-101194-3-git-send-email-zelin.deng(a)linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c
index 3dd519dfc473..d0096cd7096a 100644
--- a/drivers/ptp/ptp_kvm_x86.c
+++ b/drivers/ptp/ptp_kvm_x86.c
@@ -15,8 +15,6 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/ptp_kvm.h>
-struct pvclock_vsyscall_time_info *hv_clock;
-
static phys_addr_t clock_pair_gpa;
static struct kvm_clock_pairing clock_pair;
@@ -28,8 +26,7 @@ int kvm_arch_ptp_init(void)
return -ENODEV;
clock_pair_gpa = slow_virt_to_phys(&clock_pair);
- hv_clock = pvclock_get_pvti_cpu0_va();
- if (!hv_clock)
+ if (!pvclock_get_pvti_cpu0_va())
return -ENODEV;
ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,
@@ -64,10 +61,8 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec,
struct pvclock_vcpu_time_info *src;
unsigned int version;
long ret;
- int cpu;
- cpu = smp_processor_id();
- src = &hv_clock[cpu].pvti;
+ src = this_cpu_pvti();
do {
/*
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 773e89ab0056aaa2baa1ffd9f044551654410104 Mon Sep 17 00:00:00 2001
From: Zelin Deng <zelin.deng(a)linux.alibaba.com>
Date: Wed, 29 Sep 2021 13:13:49 +0800
Subject: [PATCH] ptp: Fix ptp_kvm_getcrosststamp issue for x86 ptp_kvm
hv_clock is preallocated to have only HVC_BOOT_ARRAY_SIZE (64) elements;
if the PTP_SYS_OFFSET_PRECISE ioctl is executed on vCPUs whose index is
64 of higher, retrieving the struct pvclock_vcpu_time_info pointer with
"src = &hv_clock[cpu].pvti" will result in an out-of-bounds access and
a wild pointer. Change it to "this_cpu_pvti()" which is guaranteed to
be valid.
Fixes: 95a3d4454bb1 ("Switch kvmclock data to a PER_CPU variable")
Signed-off-by: Zelin Deng <zelin.deng(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
Message-Id: <1632892429-101194-3-git-send-email-zelin.deng(a)linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/drivers/ptp/ptp_kvm_x86.c b/drivers/ptp/ptp_kvm_x86.c
index 3dd519dfc473..d0096cd7096a 100644
--- a/drivers/ptp/ptp_kvm_x86.c
+++ b/drivers/ptp/ptp_kvm_x86.c
@@ -15,8 +15,6 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/ptp_kvm.h>
-struct pvclock_vsyscall_time_info *hv_clock;
-
static phys_addr_t clock_pair_gpa;
static struct kvm_clock_pairing clock_pair;
@@ -28,8 +26,7 @@ int kvm_arch_ptp_init(void)
return -ENODEV;
clock_pair_gpa = slow_virt_to_phys(&clock_pair);
- hv_clock = pvclock_get_pvti_cpu0_va();
- if (!hv_clock)
+ if (!pvclock_get_pvti_cpu0_va())
return -ENODEV;
ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,
@@ -64,10 +61,8 @@ int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec,
struct pvclock_vcpu_time_info *src;
unsigned int version;
long ret;
- int cpu;
- cpu = smp_processor_id();
- src = &hv_clock[cpu].pvti;
+ src = this_cpu_pvti();
do {
/*
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From f060db99374e80e853ac4916b49f0a903f65e9dc Mon Sep 17 00:00:00 2001
From: Jia He <justin.he(a)arm.com>
Date: Wed, 22 Sep 2021 23:29:19 +0800
Subject: [PATCH] ACPI: NFIT: Use fallback node id when numa info in NFIT table
is incorrect
When ACPI NFIT table is failing to populate correct numa information
on arm64, dax_kmem will get NUMA_NO_NODE from the NFIT driver.
Without this patch, pmem can't be probed as RAM devices on arm64 guest:
$ndctl create-namespace -fe namespace0.0 --mode=devdax --map=dev -s 1g -a 128M
kmem dax0.0: rejecting DAX region [mem 0x240400000-0x2bfffffff] with invalid node: -1
kmem: probe of dax0.0 failed with error -22
Suggested-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Jia He <justin.he(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Fixes: c221c0b0308f ("device-dax: "Hotplug" persistent memory for use like normal RAM")
Link: https://lore.kernel.org/r/20210922152919.6940-1-justin.he@arm.com
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index a3ef6cce644c..7dd80acf92c7 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3007,6 +3007,18 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
ndr_desc->target_node = NUMA_NO_NODE;
}
+ /* Fallback to address based numa information if node lookup failed */
+ if (ndr_desc->numa_node == NUMA_NO_NODE) {
+ ndr_desc->numa_node = memory_add_physaddr_to_nid(spa->address);
+ dev_info(acpi_desc->dev, "changing numa node from %d to %d for nfit region [%pa-%pa]",
+ NUMA_NO_NODE, ndr_desc->numa_node, &res.start, &res.end);
+ }
+ if (ndr_desc->target_node == NUMA_NO_NODE) {
+ ndr_desc->target_node = phys_to_target_node(spa->address);
+ dev_info(acpi_desc->dev, "changing target node from %d to %d for nfit region [%pa-%pa]",
+ NUMA_NO_NODE, ndr_desc->numa_node, &res.start, &res.end);
+ }
+
/*
* Persistence domain bits are hierarchical, if
* ACPI_NFIT_CAPABILITY_CACHE_FLUSH is set then
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2938b2978a70d4cc10777ee71c9e512ffe4e0f4b Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav(a)gmail.com>
Date: Fri, 24 Sep 2021 12:30:09 +0300
Subject: [PATCH] hwmon: (tmp421) handle I2C errors
Function i2c_smbus_read_byte_data() can return a negative error number
instead of the data read if I2C transaction failed for whatever reason.
Lack of error checking can lead to serious issues on production
hardware, e.g. errors treated as temperatures produce spurious critical
temperature-crossed-threshold errors in BMC logs for OCP server
hardware. The patch was tested with Mellanox OCP Mezzanine card
emulating TMP421 protocol for temperature sensing which sometimes leads
to I2C protocol error during early boot up stage.
Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paul Fertser <fercerpav(a)gmail.com>
Link: https://lore.kernel.org/r/20210924093011.26083-1-fercerpav@gmail.com
[groeck: dropped unnecessary line breaks]
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index ede66ea6a730..8fd8c3a94dfe 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -119,38 +119,56 @@ static int temp_from_u16(u16 reg)
return (temp * 1000 + 128) / 256;
}
-static struct tmp421_data *tmp421_update_device(struct device *dev)
+static int tmp421_update_device(struct tmp421_data *data)
{
- struct tmp421_data *data = dev_get_drvdata(dev);
struct i2c_client *client = data->client;
+ int ret = 0;
int i;
mutex_lock(&data->update_lock);
if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
!data->valid) {
- data->config = i2c_smbus_read_byte_data(client,
- TMP421_CONFIG_REG_1);
+ ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1);
+ if (ret < 0)
+ goto exit;
+ data->config = ret;
for (i = 0; i < data->channels; i++) {
- data->temp[i] = i2c_smbus_read_byte_data(client,
- TMP421_TEMP_MSB[i]) << 8;
- data->temp[i] |= i2c_smbus_read_byte_data(client,
- TMP421_TEMP_LSB[i]);
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_MSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] = ret << 8;
+
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_LSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] |= ret;
}
data->last_updated = jiffies;
data->valid = 1;
}
+exit:
mutex_unlock(&data->update_lock);
- return data;
+ if (ret < 0) {
+ data->valid = 0;
+ return ret;
+ }
+
+ return 0;
}
static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
- struct tmp421_data *tmp421 = tmp421_update_device(dev);
+ struct tmp421_data *tmp421 = dev_get_drvdata(dev);
+ int ret = 0;
+
+ ret = tmp421_update_device(tmp421);
+ if (ret)
+ return ret;
switch (attr) {
case hwmon_temp_input:
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2938b2978a70d4cc10777ee71c9e512ffe4e0f4b Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav(a)gmail.com>
Date: Fri, 24 Sep 2021 12:30:09 +0300
Subject: [PATCH] hwmon: (tmp421) handle I2C errors
Function i2c_smbus_read_byte_data() can return a negative error number
instead of the data read if I2C transaction failed for whatever reason.
Lack of error checking can lead to serious issues on production
hardware, e.g. errors treated as temperatures produce spurious critical
temperature-crossed-threshold errors in BMC logs for OCP server
hardware. The patch was tested with Mellanox OCP Mezzanine card
emulating TMP421 protocol for temperature sensing which sometimes leads
to I2C protocol error during early boot up stage.
Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paul Fertser <fercerpav(a)gmail.com>
Link: https://lore.kernel.org/r/20210924093011.26083-1-fercerpav@gmail.com
[groeck: dropped unnecessary line breaks]
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index ede66ea6a730..8fd8c3a94dfe 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -119,38 +119,56 @@ static int temp_from_u16(u16 reg)
return (temp * 1000 + 128) / 256;
}
-static struct tmp421_data *tmp421_update_device(struct device *dev)
+static int tmp421_update_device(struct tmp421_data *data)
{
- struct tmp421_data *data = dev_get_drvdata(dev);
struct i2c_client *client = data->client;
+ int ret = 0;
int i;
mutex_lock(&data->update_lock);
if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
!data->valid) {
- data->config = i2c_smbus_read_byte_data(client,
- TMP421_CONFIG_REG_1);
+ ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1);
+ if (ret < 0)
+ goto exit;
+ data->config = ret;
for (i = 0; i < data->channels; i++) {
- data->temp[i] = i2c_smbus_read_byte_data(client,
- TMP421_TEMP_MSB[i]) << 8;
- data->temp[i] |= i2c_smbus_read_byte_data(client,
- TMP421_TEMP_LSB[i]);
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_MSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] = ret << 8;
+
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_LSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] |= ret;
}
data->last_updated = jiffies;
data->valid = 1;
}
+exit:
mutex_unlock(&data->update_lock);
- return data;
+ if (ret < 0) {
+ data->valid = 0;
+ return ret;
+ }
+
+ return 0;
}
static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
- struct tmp421_data *tmp421 = tmp421_update_device(dev);
+ struct tmp421_data *tmp421 = dev_get_drvdata(dev);
+ int ret = 0;
+
+ ret = tmp421_update_device(tmp421);
+ if (ret)
+ return ret;
switch (attr) {
case hwmon_temp_input:
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2938b2978a70d4cc10777ee71c9e512ffe4e0f4b Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav(a)gmail.com>
Date: Fri, 24 Sep 2021 12:30:09 +0300
Subject: [PATCH] hwmon: (tmp421) handle I2C errors
Function i2c_smbus_read_byte_data() can return a negative error number
instead of the data read if I2C transaction failed for whatever reason.
Lack of error checking can lead to serious issues on production
hardware, e.g. errors treated as temperatures produce spurious critical
temperature-crossed-threshold errors in BMC logs for OCP server
hardware. The patch was tested with Mellanox OCP Mezzanine card
emulating TMP421 protocol for temperature sensing which sometimes leads
to I2C protocol error during early boot up stage.
Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paul Fertser <fercerpav(a)gmail.com>
Link: https://lore.kernel.org/r/20210924093011.26083-1-fercerpav@gmail.com
[groeck: dropped unnecessary line breaks]
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index ede66ea6a730..8fd8c3a94dfe 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -119,38 +119,56 @@ static int temp_from_u16(u16 reg)
return (temp * 1000 + 128) / 256;
}
-static struct tmp421_data *tmp421_update_device(struct device *dev)
+static int tmp421_update_device(struct tmp421_data *data)
{
- struct tmp421_data *data = dev_get_drvdata(dev);
struct i2c_client *client = data->client;
+ int ret = 0;
int i;
mutex_lock(&data->update_lock);
if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
!data->valid) {
- data->config = i2c_smbus_read_byte_data(client,
- TMP421_CONFIG_REG_1);
+ ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1);
+ if (ret < 0)
+ goto exit;
+ data->config = ret;
for (i = 0; i < data->channels; i++) {
- data->temp[i] = i2c_smbus_read_byte_data(client,
- TMP421_TEMP_MSB[i]) << 8;
- data->temp[i] |= i2c_smbus_read_byte_data(client,
- TMP421_TEMP_LSB[i]);
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_MSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] = ret << 8;
+
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_LSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] |= ret;
}
data->last_updated = jiffies;
data->valid = 1;
}
+exit:
mutex_unlock(&data->update_lock);
- return data;
+ if (ret < 0) {
+ data->valid = 0;
+ return ret;
+ }
+
+ return 0;
}
static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
- struct tmp421_data *tmp421 = tmp421_update_device(dev);
+ struct tmp421_data *tmp421 = dev_get_drvdata(dev);
+ int ret = 0;
+
+ ret = tmp421_update_device(tmp421);
+ if (ret)
+ return ret;
switch (attr) {
case hwmon_temp_input:
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2938b2978a70d4cc10777ee71c9e512ffe4e0f4b Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav(a)gmail.com>
Date: Fri, 24 Sep 2021 12:30:09 +0300
Subject: [PATCH] hwmon: (tmp421) handle I2C errors
Function i2c_smbus_read_byte_data() can return a negative error number
instead of the data read if I2C transaction failed for whatever reason.
Lack of error checking can lead to serious issues on production
hardware, e.g. errors treated as temperatures produce spurious critical
temperature-crossed-threshold errors in BMC logs for OCP server
hardware. The patch was tested with Mellanox OCP Mezzanine card
emulating TMP421 protocol for temperature sensing which sometimes leads
to I2C protocol error during early boot up stage.
Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paul Fertser <fercerpav(a)gmail.com>
Link: https://lore.kernel.org/r/20210924093011.26083-1-fercerpav@gmail.com
[groeck: dropped unnecessary line breaks]
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index ede66ea6a730..8fd8c3a94dfe 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -119,38 +119,56 @@ static int temp_from_u16(u16 reg)
return (temp * 1000 + 128) / 256;
}
-static struct tmp421_data *tmp421_update_device(struct device *dev)
+static int tmp421_update_device(struct tmp421_data *data)
{
- struct tmp421_data *data = dev_get_drvdata(dev);
struct i2c_client *client = data->client;
+ int ret = 0;
int i;
mutex_lock(&data->update_lock);
if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
!data->valid) {
- data->config = i2c_smbus_read_byte_data(client,
- TMP421_CONFIG_REG_1);
+ ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1);
+ if (ret < 0)
+ goto exit;
+ data->config = ret;
for (i = 0; i < data->channels; i++) {
- data->temp[i] = i2c_smbus_read_byte_data(client,
- TMP421_TEMP_MSB[i]) << 8;
- data->temp[i] |= i2c_smbus_read_byte_data(client,
- TMP421_TEMP_LSB[i]);
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_MSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] = ret << 8;
+
+ ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_LSB[i]);
+ if (ret < 0)
+ goto exit;
+ data->temp[i] |= ret;
}
data->last_updated = jiffies;
data->valid = 1;
}
+exit:
mutex_unlock(&data->update_lock);
- return data;
+ if (ret < 0) {
+ data->valid = 0;
+ return ret;
+ }
+
+ return 0;
}
static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
- struct tmp421_data *tmp421 = tmp421_update_device(dev);
+ struct tmp421_data *tmp421 = dev_get_drvdata(dev);
+ int ret = 0;
+
+ ret = tmp421_update_device(tmp421);
+ if (ret)
+ return ret;
switch (attr) {
case hwmon_temp_input:
On Galaxy S3 (i9300/i9305), which has the max17047 fuel gauge and no
current sense resistor (rsns), the RepSOC register does not provide an
accurate state of charge value. The reported value is wrong, and does
not change over time. VFSOC however, which uses the voltage fuel gauge
to determine the state of charge, always shows an accurate value.
For devices without current sense, VFSOC is already used for the
soc-alert (0x0003 is written to MiscCFG register), so with this change
the source of the alert and the PROP_CAPACITY value match.
Fixes: 359ab9f5b154 ("power_supply: Add MAX17042 Fuel Gauge Driver")
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
Suggested-by: Wolfgang Wiedmeyer <wolfgit(a)wiedmeyer.de>
Signed-off-by: Henrik Grimler <henrik(a)grimler.se>
---
Changes in v2:
Re-write commit message to highlight that VFSOC is already used for
alert, after Krzysztof's comments
---
drivers/power/supply/max17042_battery.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 8dffae76b6a3..5809ba997093 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -313,7 +313,10 @@ static int max17042_get_property(struct power_supply *psy,
val->intval = data * 625 / 8;
break;
case POWER_SUPPLY_PROP_CAPACITY:
- ret = regmap_read(map, MAX17042_RepSOC, &data);
+ if (chip->pdata->enable_current_sense)
+ ret = regmap_read(map, MAX17042_RepSOC, &data);
+ else
+ ret = regmap_read(map, MAX17042_VFSOC, &data);
if (ret < 0)
return ret;
base-commit: 5816b3e6577eaa676ceb00a848f0fd65fe2adc29
--
2.33.0
This simply adds proper support for panel backlights that can be controlled
via VESA's backlight control protocol, but which also require that we
enable and disable the backlight via PWM instead of via the DPCD interface.
We also enable this by default, in order to fix some people's backlights
that were broken by not having this enabled.
For reference, backlights that require this and use VESA's backlight
interface tend to be laptops with hybrid GPUs, but this very well may
change in the future.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Link: https://gitlab.freedesktop.org/drm/intel/-/issues/3680
Fixes: fe7d52bccab6 ("drm/i915/dp: Don't use DPCD backlights that need PWM enable/disable")
Cc: <stable(a)vger.kernel.org> # v5.12+
---
.../drm/i915/display/intel_dp_aux_backlight.c | 24 ++++++++++++++-----
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 569d17b4d00f..594fdc7453ca 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -293,6 +293,10 @@ intel_dp_aux_vesa_enable_backlight(const struct intel_crtc_state *crtc_state,
struct intel_panel *panel = &connector->panel;
struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
+ if (!panel->backlight.edp.vesa.info.aux_enable)
+ panel->backlight.pwm_funcs->enable(crtc_state, conn_state,
+ panel->backlight.pwm_level_max);
+
drm_edp_backlight_enable(&intel_dp->aux, &panel->backlight.edp.vesa.info, level);
}
@@ -304,6 +308,10 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state
struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info);
+
+ if (!panel->backlight.edp.vesa.info.aux_enable)
+ panel->backlight.pwm_funcs->disable(old_conn_state,
+ intel_backlight_invert_pwm_level(connector, 0));
}
static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe)
@@ -321,6 +329,15 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector,
if (ret < 0)
return ret;
+ if (!panel->backlight.edp.vesa.info.aux_enable) {
+ ret = panel->backlight.pwm_funcs->setup(connector, pipe);
+ if (ret < 0) {
+ drm_err(&i915->drm,
+ "Failed to setup PWM backlight controls for eDP backlight: %d\n",
+ ret);
+ return ret;
+ }
+ }
panel->backlight.max = panel->backlight.edp.vesa.info.max;
panel->backlight.min = 0;
if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) {
@@ -340,12 +357,7 @@ intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector)
struct intel_dp *intel_dp = intel_attached_dp(connector);
struct drm_i915_private *i915 = dp_to_i915(intel_dp);
- /* TODO: We currently only support AUX only backlight configurations, not backlights which
- * require a mix of PWM and AUX controls to work. In the mean time, these machines typically
- * work just fine using normal PWM controls anyway.
- */
- if ((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) &&
- drm_edp_backlight_supported(intel_dp->edp_dpcd)) {
+ if (drm_edp_backlight_supported(intel_dp->edp_dpcd)) {
drm_dbg_kms(&i915->drm, "AUX Backlight Control Supported!\n");
return true;
}
--
2.31.1
From: Pali Rohár <pali(a)kernel.org>
Add support for reporting PCI_EXP_LNKSTA_DLLLA bit in Link Control register
on emulated bridge via current LTSSM state. Also correctly indicate DLLLA
capability via PCI_EXP_LNKCAP_DLLLARC bit in Link Control Capability
register.
Fixes: 8a3ebd8de328 ("PCI: aardvark: Implement emulated root PCI bridge config space")
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Signed-off-by: Marek Behún <kabel(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/pci/controller/pci-aardvark.c | 29 ++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 9465b630cede..f429c89ba4a1 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -312,6 +312,20 @@ static inline bool advk_pcie_link_up(struct advk_pcie *pcie)
return ltssm_state >= LTSSM_L0 && ltssm_state < LTSSM_DISABLED;
}
+static inline bool advk_pcie_link_active(struct advk_pcie *pcie)
+{
+ /*
+ * According to PCIe Base specification 3.0, Table 4-14: Link
+ * Status Mapped to the LTSSM, and 4.2.6.3.6 Configuration.Idle
+ * is Link Up mapped to LTSSM Configuration.Idle, Recovery, L0,
+ * L0s, L1 and L2 states. And according to 3.2.1. Data Link
+ * Control and Management State Machine Rules is DL Up status
+ * reported in DL Active state.
+ */
+ u8 ltssm_state = advk_pcie_ltssm_state(pcie);
+ return ltssm_state >= LTSSM_CONFIG_IDLE && ltssm_state < LTSSM_DISABLED;
+}
+
static inline bool advk_pcie_link_training(struct advk_pcie *pcie)
{
/*
@@ -761,12 +775,26 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
return PCI_BRIDGE_EMUL_HANDLED;
}
+ case PCI_EXP_LNKCAP: {
+ u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
+ /*
+ * PCI_EXP_LNKCAP_DLLLARC bit is hardwired in aardvark HW to 0.
+ * But support for PCI_EXP_LNKSTA_DLLLA is emulated via ltssm
+ * state so explicitly enable PCI_EXP_LNKCAP_DLLLARC flag.
+ */
+ val |= PCI_EXP_LNKCAP_DLLLARC;
+ *value = val;
+ return PCI_BRIDGE_EMUL_HANDLED;
+ }
+
case PCI_EXP_LNKCTL: {
/* u32 contains both PCI_EXP_LNKCTL and PCI_EXP_LNKSTA */
u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg) &
~(PCI_EXP_LNKSTA_LT << 16);
if (advk_pcie_link_training(pcie))
val |= (PCI_EXP_LNKSTA_LT << 16);
+ if (advk_pcie_link_active(pcie))
+ val |= (PCI_EXP_LNKSTA_DLLLA << 16);
*value = val;
return PCI_BRIDGE_EMUL_HANDLED;
}
@@ -774,7 +802,6 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
case PCI_CAP_LIST_ID:
case PCI_EXP_DEVCAP:
case PCI_EXP_DEVCTL:
- case PCI_EXP_LNKCAP:
*value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
return PCI_BRIDGE_EMUL_HANDLED;
default:
--
2.32.0
From: Pali Rohár <pali(a)kernel.org>
Fix multiple link training issues in aardvark driver. The main reason of
these issues was misunderstanding of what certain registers do, since their
names and comments were misleading: before commit 96be36dbffac ("PCI:
aardvark: Replace custom macros by standard linux/pci_regs.h macros"), the
pci-aardvark.c driver used custom macros for accessing standard PCIe Root
Bridge registers, and misleading comments did not help to understand what
the code was really doing.
After doing more tests and experiments I've come to the conclusion that the
SPEED_GEN register in aardvark sets the PCIe revision / generation
compliance and forces maximal link speed. Both GEN3 and GEN2 values set the
read-only PCI_EXP_FLAGS_VERS bits (PCIe capabilities version of Root
Bridge) to value 2, while GEN1 value sets PCI_EXP_FLAGS_VERS to 1, which
matches with PCI Express specifications revisions 3, 2 and 1 respectively.
Changing SPEED_GEN also sets the read-only bits PCI_EXP_LNKCAP_SLS and
PCI_EXP_LNKCAP2_SLS to corresponding speed.
(Note that PCI Express rev 1 specification does not define PCI_EXP_LNKCAP2
and PCI_EXP_LNKCTL2 registers and when SPEED_GEN is set to GEN1 (which
also sets PCI_EXP_FLAGS_VERS set to 1), lspci cannot access
PCI_EXP_LNKCAP2 and PCI_EXP_LNKCTL2 registers.)
Changing PCIe link speed can be done via PCI_EXP_LNKCTL2_TLS bits of
PCI_EXP_LNKCTL2 register. Armada 3700 Functional Specifications says that
the default value of PCI_EXP_LNKCTL2_TLS is based on SPEED_GEN value, but
tests showed that the default value is always 8.0 GT/s, independently of
speed set by SPEED_GEN. So after setting SPEED_GEN, we must also set value
in PCI_EXP_LNKCTL2 register via PCI_EXP_LNKCTL2_TLS bits.
Triggering PCI_EXP_LNKCTL_RL bit immediately after setting LINK_TRAINING_EN
bit actually doesn't do anything. Tests have shown that a delay is needed
after enabling LINK_TRAINING_EN bit. As triggering PCI_EXP_LNKCTL_RL
currently does nothing, remove it.
Commit 43fc679ced18 ("PCI: aardvark: Improve link training") introduced
code which sets SPEED_GEN register based on negotiated link speed from
PCI_EXP_LNKSTA_CLS bits of PCI_EXP_LNKSTA register. This code was added to
fix detection of Compex WLE900VX (Atheros QCA9880) WiFi GEN1 PCIe cards, as
otherwise these cards were "invisible" on PCIe bus (probably because they
crashed). But apparently more people reported the same issues with these
cards also with other PCIe controllers [1] and I was able to reproduce this
issue also with other "noname" WiFi cards based on Atheros QCA9890 chip
(with the same PCI vendor/device ids as Atheros QCA9880). So this is not an
issue in aardvark but rather an issue in Atheros QCA98xx chips. Also, this
issue only exists if the kernel is compiled with PCIe ASPM support, and a
generic workaround for this is to change PCIe Bridge to 2.5 GT/s link speed
via PCI_EXP_LNKCTL2_TLS_2_5GT bits in PCI_EXP_LNKCTL2 register [2], before
triggering PCI_EXP_LNKCTL_RL bit. This workaround also works when SPEED_GEN
is set to value GEN2 (5 GT/s). So remove this hack completely in the
aardvark driver and always set SPEED_GEN to value from 'max-link-speed' DT
property. Fix for Atheros QCA98xx chips is handled separately by patch [2].
These two things (code for triggering PCI_EXP_LNKCTL_RL bit and changing
SPEED_GEN value) also explain why commit 6964494582f5 ("PCI: aardvark:
Train link immediately after enabling training") somehow fixed detection of
those problematic Compex cards with Atheros chips: if triggering link
retraining (via PCI_EXP_LNKCTL_RL bit) was done immediately after enabling
link training (via LINK_TRAINING_EN), it did nothing. If there was a
specific delay, aardvark HW already initialized PCIe link and therefore
triggering link retraining caused the above issue. Compex cards triggered
link down event and disappeared from the PCIe bus.
Commit f4c7d053d7f7 ("PCI: aardvark: Wait for endpoint to be ready before
training link") added 100ms sleep before calling 'Start link training'
command and explained that it is a requirement of PCI Express
specification. But the code after this 100ms sleep was not doing 'Start
link training', rather it triggered PCI_EXP_LNKCTL_RL bit via PCIe Root
Bridge to put link into Recovery state.
The required delay after fundamental reset is already done in function
advk_pcie_wait_for_link() which also checks whether PCIe link is up.
So after removing the code which triggers PCI_EXP_LNKCTL_RL bit on PCIe
Root Bridge, there is no need to wait 100ms again. Remove the extra
msleep() call and update comment about the delay required by the PCI
Express specification.
According to Marvell Armada 3700 Functional Specifications, Link training
should be enabled via aardvark register LINK_TRAINING_EN after selecting
PCIe generation and x1 lane. There is no need to disable it prior resetting
card via PERST# signal. This disabling code was introduced in commit
5169a9851daa ("PCI: aardvark: Issue PERST via GPIO") as a workaround for
some Atheros cards. It turns out that this also is Atheros specific issue
and affects any PCIe controller, not only aardvark. Moreover this Atheros
issue was triggered by juggling with PCI_EXP_LNKCTL_RL, LINK_TRAINING_EN
and SPEED_GEN bits interleaved with sleeps. Now, after removing triggering
PCI_EXP_LNKCTL_RL, there is no need to explicitly disable LINK_TRAINING_EN
bit. So remove this code too. The problematic Compex cards described in
previous git commits are correctly detected in advk_pcie_train_link()
function even after applying all these changes.
Note that with this patch, and also prior this patch, some NVMe disks which
support PCIe GEN3 with 8 GT/s speed are negotiated only at the lowest link
speed 2.5 GT/s, independently of SPEED_GEN value. After manually triggering
PCI_EXP_LNKCTL_RL bit (e.g. from userspace via setpci), these NVMe disks
change link speed to 5 GT/s when SPEED_GEN was configured to GEN2. This
issue first needs to be properly investigated. I will send a fix in the
future.
On the other hand, some other GEN2 PCIe cards with 5 GT/s speed are
autonomously by HW autonegotiated at full 5 GT/s speed without need of any
software interaction.
Armada 3700 Functional Specifications describes the following steps for
link training: set SPEED_GEN to GEN2, enable LINK_TRAINING_EN, poll until
link training is complete, trigger PCI_EXP_LNKCTL_RL, poll until signal
rate is 5 GT/s, poll until link training is complete, enable ASPM L0s.
The requirement for triggering PCI_EXP_LNKCTL_RL can be explained by the
need to achieve 5 GT/s speed (as changing link speed is done by throw to
recovery state entered by PCI_EXP_LNKCTL_RL) or maybe as a part of enabling
ASPM L0s (but in this case ASPM L0s should have been enabled prior
PCI_EXP_LNKCTL_RL).
It is unknown why the original pci-aardvark.c driver was triggering
PCI_EXP_LNKCTL_RL bit before waiting for the link to be up. This does not
align with neither PCIe base specifications nor with Armada 3700 Functional
Specification. (Note that in older versions of aardvark, this bit was
called incorrectly PCIE_CORE_LINK_TRAINING, so this may be the reason.)
It is also unknown why Armada 3700 Functional Specification says that it is
needed to trigger PCI_EXP_LNKCTL_RL for GEN2 mode, as according to PCIe
base specification 5 GT/s speed negotiation is supposed to be entirely
autonomous, even if initial speed is 2.5 GT/s.
[1] - https://lore.kernel.org/linux-pci/87h7l8axqp.fsf@toke.dk/
[2] - https://lore.kernel.org/linux-pci/20210326124326.21163-1-pali@kernel.org/
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Signed-off-by: Marek Behún <kabel(a)kernel.org>
Cc: stable(a)vger.kernel.org # f4c7d053d7f7 ("PCI: aardvark: Wait for endpoint to be ready before training link")
Cc: stable(a)vger.kernel.org # 6964494582f5 ("PCI: aardvark: Train link immediately after enabling training")
Cc: stable(a)vger.kernel.org # 43fc679ced18 ("PCI: aardvark: Improve link training")
Cc: stable(a)vger.kernel.org # 5169a9851daa ("PCI: aardvark: Issue PERST via GPIO")
Cc: stable(a)vger.kernel.org # 96be36dbffac ("PCI: aardvark: Replace custom macros by standard linux/pci_regs.h macros")
Cc: stable(a)vger.kernel.org # d0c6a3475b03 ("PCI: aardvark: Move PCIe reset card code to advk_pcie_train_link()")
Cc: stable(a)vger.kernel.org # 1d1cd163d0de ("PCI: aardvark: Update comment about disabling link training")
---
drivers/pci/controller/pci-aardvark.c | 117 ++++++++------------------
1 file changed, 34 insertions(+), 83 deletions(-)
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 74d1ec7eff16..5387d9cc3eba 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -258,11 +258,6 @@ static inline u32 advk_readl(struct advk_pcie *pcie, u64 reg)
return readl(pcie->base + reg);
}
-static inline u16 advk_read16(struct advk_pcie *pcie, u64 reg)
-{
- return advk_readl(pcie, (reg & ~0x3)) >> ((reg & 0x3) * 8);
-}
-
static int advk_pcie_link_up(struct advk_pcie *pcie)
{
u32 val, ltssm_state;
@@ -300,23 +295,9 @@ static void advk_pcie_wait_for_retrain(struct advk_pcie *pcie)
static void advk_pcie_issue_perst(struct advk_pcie *pcie)
{
- u32 reg;
-
if (!pcie->reset_gpio)
return;
- /*
- * As required by PCI Express spec (PCI Express Base Specification, REV.
- * 4.0 PCI Express, February 19 2014, 6.6.1 Conventional Reset) a delay
- * for at least 100ms after de-asserting PERST# signal is needed before
- * link training is enabled. So ensure that link training is disabled
- * prior de-asserting PERST# signal to fulfill that PCI Express spec
- * requirement.
- */
- reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
- reg &= ~LINK_TRAINING_EN;
- advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
-
/* 10ms delay is needed for some cards */
dev_info(&pcie->pdev->dev, "issuing PERST via reset GPIO for 10ms\n");
gpiod_set_value_cansleep(pcie->reset_gpio, 1);
@@ -324,53 +305,46 @@ static void advk_pcie_issue_perst(struct advk_pcie *pcie)
gpiod_set_value_cansleep(pcie->reset_gpio, 0);
}
-static int advk_pcie_train_at_gen(struct advk_pcie *pcie, int gen)
+static void advk_pcie_train_link(struct advk_pcie *pcie)
{
- int ret, neg_gen;
+ struct device *dev = &pcie->pdev->dev;
u32 reg;
+ int ret;
- /* Setup link speed */
+ /*
+ * Setup PCIe rev / gen compliance based on device tree property
+ * 'max-link-speed' which also forces maximal link speed.
+ */
reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
reg &= ~PCIE_GEN_SEL_MSK;
- if (gen == 3)
+ if (pcie->link_gen == 3)
reg |= SPEED_GEN_3;
- else if (gen == 2)
+ else if (pcie->link_gen == 2)
reg |= SPEED_GEN_2;
else
reg |= SPEED_GEN_1;
advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
/*
- * Enable link training. This is not needed in every call to this
- * function, just once suffices, but it does not break anything either.
+ * Set maximal link speed value also into PCIe Link Control 2 register.
+ * Armada 3700 Functional Specification says that default value is based
+ * on SPEED_GEN but tests showed that default value is always 8.0 GT/s.
*/
+ reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2);
+ reg &= ~PCI_EXP_LNKCTL2_TLS;
+ if (pcie->link_gen == 3)
+ reg |= PCI_EXP_LNKCTL2_TLS_8_0GT;
+ else if (pcie->link_gen == 2)
+ reg |= PCI_EXP_LNKCTL2_TLS_5_0GT;
+ else
+ reg |= PCI_EXP_LNKCTL2_TLS_2_5GT;
+ advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2);
+
+ /* Enable link training after selecting PCIe generation */
reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG);
reg |= LINK_TRAINING_EN;
advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG);
- /*
- * Start link training immediately after enabling it.
- * This solves problems for some buggy cards.
- */
- reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL);
- reg |= PCI_EXP_LNKCTL_RL;
- advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL);
-
- ret = advk_pcie_wait_for_link(pcie);
- if (ret)
- return ret;
-
- reg = advk_read16(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKSTA);
- neg_gen = reg & PCI_EXP_LNKSTA_CLS;
-
- return neg_gen;
-}
-
-static void advk_pcie_train_link(struct advk_pcie *pcie)
-{
- struct device *dev = &pcie->pdev->dev;
- int neg_gen = -1, gen;
-
/*
* Reset PCIe card via PERST# signal. Some cards are not detected
* during link training when they are in some non-initial state.
@@ -381,41 +355,18 @@ static void advk_pcie_train_link(struct advk_pcie *pcie)
* PERST# signal could have been asserted by pinctrl subsystem before
* probe() callback has been called or issued explicitly by reset gpio
* function advk_pcie_issue_perst(), making the endpoint going into
- * fundamental reset. As required by PCI Express spec a delay for at
- * least 100ms after such a reset before link training is needed.
- */
- msleep(PCI_PM_D3COLD_WAIT);
-
- /*
- * Try link training at link gen specified by device tree property
- * 'max-link-speed'. If this fails, iteratively train at lower gen.
- */
- for (gen = pcie->link_gen; gen > 0; --gen) {
- neg_gen = advk_pcie_train_at_gen(pcie, gen);
- if (neg_gen > 0)
- break;
- }
-
- if (neg_gen < 0)
- goto err;
-
- /*
- * After successful training if negotiated gen is lower than requested,
- * train again on negotiated gen. This solves some stability issues for
- * some buggy gen1 cards.
+ * fundamental reset. As required by PCI Express spec (PCI Express
+ * Base Specification, REV. 4.0 PCI Express, February 19 2014, 6.6.1
+ * Conventional Reset) a delay for at least 100ms after such a reset
+ * before sending a Configuration Request to the device is needed.
+ * So wait until PCIe link is up. Function advk_pcie_wait_for_link()
+ * waits for link at least 900ms.
*/
- if (neg_gen < gen) {
- gen = neg_gen;
- neg_gen = advk_pcie_train_at_gen(pcie, gen);
- }
-
- if (neg_gen == gen) {
- dev_info(dev, "link up at gen %i\n", gen);
- return;
- }
-
-err:
- dev_err(dev, "link never came up\n");
+ ret = advk_pcie_wait_for_link(pcie);
+ if (ret < 0)
+ dev_err(dev, "link never came up\n");
+ else
+ dev_info(dev, "link up\n");
}
/*
--
2.32.0
From: Pali Rohár <pali(a)kernel.org>
Commit 366697018c9a ("PCI: aardvark: Add PHY support") introduced
configuration of PCIe Reference clock via PCIE_CORE_REF_CLK_REG register,
but did it incorrectly.
PCIe Reference clock differential pair is routed from system board to
endpoint card, so on CPU side it has output direction. Therefore it is
required to enable transmitting and disable receiving.
Default configuration according to Armada 3700 Functional Specifications is
enabled receiver part and disabled transmitter.
We need this change because otherwise PCIe Reference clock is configured to
some undefined state when differential pair is used for both transmitting
and receiving.
Fix this by disabling receiver part.
Fixes: 366697018c9a ("PCI: aardvark: Add PHY support")
Signed-off-by: Pali Rohár <pali(a)kernel.org>
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Signed-off-by: Marek Behún <kabel(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/pci/controller/pci-aardvark.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index bb57ca6aed35..d5d6f92e5143 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -99,6 +99,7 @@
#define PCIE_CORE_CTRL2_MSI_ENABLE BIT(10)
#define PCIE_CORE_REF_CLK_REG (CONTROL_BASE_ADDR + 0x14)
#define PCIE_CORE_REF_CLK_TX_ENABLE BIT(1)
+#define PCIE_CORE_REF_CLK_RX_ENABLE BIT(2)
#define PCIE_MSG_LOG_REG (CONTROL_BASE_ADDR + 0x30)
#define PCIE_ISR0_REG (CONTROL_BASE_ADDR + 0x40)
#define PCIE_MSG_PM_PME_MASK BIT(7)
@@ -451,9 +452,15 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie)
u32 reg;
int i;
- /* Enable TX */
+ /*
+ * Configure PCIe Reference clock. Direction is from the PCIe
+ * controller to the endpoint card, so enable transmitting of
+ * Reference clock differential signal off-chip and disable
+ * receiving off-chip differential signal.
+ */
reg = advk_readl(pcie, PCIE_CORE_REF_CLK_REG);
reg |= PCIE_CORE_REF_CLK_TX_ENABLE;
+ reg &= ~PCIE_CORE_REF_CLK_RX_ENABLE;
advk_writel(pcie, reg, PCIE_CORE_REF_CLK_REG);
/* Set to Direct mode */
--
2.32.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 91c548bf23d6 - scsi: ufs: ufs-pci: Fix Intel LKF link stability
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
Targeted tests: NO
All kernel binaries, config files, and logs are available for download here:
https://arr-cki-prod-datawarehouse-public.s3.amazonaws.com/index.html?prefi…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - nvmeof-mp
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ ACPI table test
✅ ACPI enabled test
⚡⚡⚡ LTP - cve
⚡⚡⚡ LTP - sched
⚡⚡⚡ LTP - syscalls
⚡⚡⚡ LTP - can
⚡⚡⚡ LTP - commands
⚡⚡⚡ LTP - containers
⚡⚡⚡ LTP - dio
⚡⚡⚡ LTP - fs
⚡⚡⚡ LTP - fsx
⚡⚡⚡ LTP - math
⚡⚡⚡ LTP - hugetlb
⚡⚡⚡ LTP - mm
⚡⚡⚡ LTP - nptl
⚡⚡⚡ LTP - pty
⚡⚡⚡ LTP - ipc
⚡⚡⚡ LTP - tracing
⚡⚡⚡ LTP: openposix test suite
⚡⚡⚡ CIFS Connectathon
⚡⚡⚡ POSIX pjd-fstest suites
⚡⚡⚡ NFS Connectathon
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ jvm - jcstress tests
⚡⚡⚡ Memory: fork_mem
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking cki netfilter test
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: dm/common
⚡⚡⚡ storage: SCSI VPD
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ xarray-idr-radixtree-test
🚧 ⚡⚡⚡ i2c: i2cdetect sanity
🚧 ⚡⚡⚡ Firmware test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking: igmp conformance test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ lvm cache test
🚧 ⚡⚡⚡ lvm snapper test
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ Networking bridge: sanity - mlx5
⚡⚡⚡ Ethernet drivers sanity - mlx5
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 6:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - nvmeof-mp
Host 7:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
ppc64le:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ LTP - cve
✅ LTP - sched
⚡⚡⚡ LTP - syscalls
⚡⚡⚡ LTP - can
⚡⚡⚡ LTP - commands
⚡⚡⚡ LTP - containers
⚡⚡⚡ LTP - dio
⚡⚡⚡ LTP - fs
⚡⚡⚡ LTP - fsx
⚡⚡⚡ LTP - math
⚡⚡⚡ LTP - hugetlb
⚡⚡⚡ LTP - mm
⚡⚡⚡ LTP - nptl
⚡⚡⚡ LTP - pty
⚡⚡⚡ LTP - ipc
⚡⚡⚡ LTP - tracing
⚡⚡⚡ LTP: openposix test suite
⚡⚡⚡ CIFS Connectathon
⚡⚡⚡ POSIX pjd-fstest suites
⚡⚡⚡ NFS Connectathon
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ jvm - jcstress tests
⚡⚡⚡ Memory: fork_mem
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking cki netfilter test
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: dm/common
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ xarray-idr-radixtree-test
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ lvm cache test
🚧 ⚡⚡⚡ lvm snapper test
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Host 3:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - nvmeof-mp
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ Storage: lvm device-mapper test - upstream
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ Storage: lvm device-mapper test - upstream
Host 6:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
s390x:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ selinux-policy: serge-testsuite
✅ Storage blktests - blk
✅ Storage: swraid mdadm raid_module test
🚧 ✅ Podman system test - as root
🚧 ✅ Podman system test - as user
🚧 ✅ Storage blktests - nvme-tcp
🚧 ✅ stress: stress-ng - interrupt
🚧 ✅ stress: stress-ng - cpu
🚧 ✅ stress: stress-ng - cpu-cache
🚧 ✅ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 2:
✅ Boot test
✅ Reboot test
✅ LTP - cve
✅ LTP - sched
✅ LTP - syscalls
✅ LTP - can
✅ LTP - commands
✅ LTP - containers
✅ LTP - dio
✅ LTP - fs
✅ LTP - fsx
✅ LTP - math
✅ LTP - hugetlb
✅ LTP - mm
✅ LTP - nptl
✅ LTP - pty
✅ LTP - ipc
✅ LTP - tracing
✅ LTP: openposix test suite
✅ CIFS Connectathon
✅ POSIX pjd-fstest suites
✅ NFS Connectathon
✅ Loopdev Sanity
✅ jvm - jcstress tests
✅ Memory: fork_mem
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking cki netfilter test
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ storage: dm/common
✅ trace: ftrace/tracer
🚧 ❌ xarray-idr-radixtree-test
🚧 ✅ Memory function: kaslr
🚧 ✅ audit: audit testsuite test
🚧 ✅ lvm cache test
🚧 ✅ lvm snapper test
Host 3:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - nvmeof-mp
Host 4:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - srp
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ ACPI table test
✅ LTP - cve
✅ LTP - sched
⚡⚡⚡ LTP - syscalls
⚡⚡⚡ LTP - can
⚡⚡⚡ LTP - commands
⚡⚡⚡ LTP - containers
⚡⚡⚡ LTP - dio
⚡⚡⚡ LTP - fs
⚡⚡⚡ LTP - fsx
⚡⚡⚡ LTP - math
⚡⚡⚡ LTP - hugetlb
⚡⚡⚡ LTP - mm
⚡⚡⚡ LTP - nptl
⚡⚡⚡ LTP - pty
⚡⚡⚡ LTP - ipc
⚡⚡⚡ LTP - tracing
⚡⚡⚡ LTP: openposix test suite
⚡⚡⚡ CIFS Connectathon
⚡⚡⚡ POSIX pjd-fstest suites
⚡⚡⚡ NFS Connectathon
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ jvm - jcstress tests
⚡⚡⚡ Memory: fork_mem
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking cki netfilter test
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: dm/common
⚡⚡⚡ storage: SCSI VPD
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ xarray-idr-radixtree-test
🚧 ⚡⚡⚡ i2c: i2cdetect sanity
🚧 ⚡⚡⚡ Firmware test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking: igmp conformance test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ lvm cache test
🚧 ⚡⚡⚡ lvm snapper test
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ xfstests - nfsv4.2
⚡⚡⚡ xfstests - cifsv3.11
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ power-management: cpupower/sanity test
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ CPU: Idle Test
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ Storage: lvm device-mapper test - upstream
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - nvmeof-mp
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Host 6:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Targeted tests
--------------
Test runs for patches always include a set of base tests, plus some
tests chosen based on the file paths modified by the patch. The latter
are called "targeted tests". If no targeted tests are run, that means
no patch-specific tests are available. Please, consider contributing a
targeted test for related patches to increase test coverage. See
https://docs.engineering.redhat.com/x/_wEZB for more details.
The return type of ktime_divns() is s64. The timeout_to_jiffies() currently
assigns the result of this ktime_divns() to unsigned long, which on 32 bit
systems may overflow. Furthermore, the result of this function is sometimes
also passed to functions which expect signed long, dma_fence_wait_timeout()
is one such example.
Fix this by adjusting the type of remaining_jiffies to s64, so we do not
suffer overflow there, and return a value limited to range of 0..INT_MAX,
which is safe for all usecases of this timeout.
The above overflow can be triggered if userspace passes in too large timeout
value, larger than INT_MAX / HZ seconds. The kernel detects it and complains
about "schedule_timeout: wrong timeout value %lx" and generates a warning
backtrace.
Note that this fixes commit 6cedb8b377bb ("drm/msm: avoid using 'timespec'"),
because the previously used timespec_to_jiffies() function returned unsigned
long instead of s64:
static inline unsigned long timespec_to_jiffies(const struct timespec *value)
Fixes: 6cedb8b377bb ("drm/msm: avoid using 'timespec'")
Signed-off-by: Marek Vasut <marex(a)denx.de>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Jordan Crouse <jcrouse(a)codeaurora.org>
Cc: Rob Clark <robdclark(a)chromium.org>
Cc: stable(a)vger.kernel.org # 5.6+
---
NOTE: This is related to Mesa MR
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12886
---
drivers/gpu/drm/msm/msm_drv.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 0b2686b060c73..d96b254b8aa46 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -543,7 +543,7 @@ static inline int align_pitch(int width, int bpp)
static inline unsigned long timeout_to_jiffies(const ktime_t *timeout)
{
ktime_t now = ktime_get();
- unsigned long remaining_jiffies;
+ s64 remaining_jiffies;
if (ktime_compare(*timeout, now) < 0) {
remaining_jiffies = 0;
@@ -552,7 +552,7 @@ static inline unsigned long timeout_to_jiffies(const ktime_t *timeout)
remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ);
}
- return remaining_jiffies;
+ return clamp(remaining_jiffies, 0LL, (s64)INT_MAX);
}
#endif /* __MSM_DRV_H__ */
--
2.33.0
From: Joerg Roedel <jroedel(a)suse.de>
The trampoline_pgd only maps the 0xfffffff000000000-0xffffffffffffffff
range of kernel memory (with 4-level paging). This range contains the
kernels text+data+bss mappings and the module mapping space, but not the
direct mapping and the vmalloc area.
This is enough to get an application processors out of real-mode, but
for code that switches back to real-mode the trampoline_pgd is missing
important parts of the address space. For example, consider this code
from arch/x86/kernel/reboot.c, function machine_real_restart() for a
64-bit kernel:
#ifdef CONFIG_X86_32
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
#ifdef CONFIG_X86_32
asm volatile("jmpl *%0" : :
"rm" (real_mode_header->machine_real_restart_asm),
"a" (type));
#else
asm volatile("ljmpl *%0" : :
"m" (real_mode_header->machine_real_restart_asm),
"D" (type));
#endif
The code switches to the trampoline_pgd, which unmaps the direct mapping
and also the kernel stack. The call to cr4_clear_bits() will find no
stack and crash the machine. The real_mode_header pointer below points
into the direct mapping, and dereferencing it also causes a crash.
The reason this does not crash always is only that kernel mappings are
global and the CR3 switch does not flush those mappings. But if theses
mappings are not in the TLB already, the above code will crash before it
can jump to the real-mode stub.
Extend the trampoline_pgd to contain all kernel mappings to prevent
these crashes and to make code which runs on this page-table more
robust.
Cc: stable(a)vger.kernel.org
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/realmode/init.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index b9802b18f504..77617cd624fe 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -95,6 +95,7 @@ static void __init setup_real_mode(void)
#ifdef CONFIG_X86_64
u64 *trampoline_pgd;
u64 efer;
+ int i;
#endif
base = (unsigned char *)real_mode_header;
@@ -151,8 +152,17 @@ static void __init setup_real_mode(void)
trampoline_header->flags = 0;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
+
+ /* Map the real mode stub as virtual == physical */
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
- trampoline_pgd[511] = init_top_pgt[511].pgd;
+
+ /*
+ * Include the entirety of the kernel mapping into the trampoline
+ * PGD. This way, all mappings present in the normal kernel page
+ * tables are usable while running on trampoline_pgd.
+ */
+ for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
+ trampoline_pgd[i] = init_top_pgt[i].pgd;
#endif
sme_sev_setup_real_mode(trampoline_header);
--
2.33.0
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 6e3cd95234dc1eda488f4f487c281bac8fef4d9b
Gitweb: https://git.kernel.org/tip/6e3cd95234dc1eda488f4f487c281bac8fef4d9b
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Thu, 30 Sep 2021 19:21:39 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Fri, 01 Oct 2021 13:38:13 +02:00
x86/hpet: Use another crystalball to evaluate HPET usability
On recent Intel systems the HPET stops working when the system reaches PC10
idle state.
The approach of adding PCI ids to the early quirks to disable HPET on
these systems is a whack a mole game which makes no sense.
Check for PC10 instead and force disable HPET if supported. The check is
overbroad as it does not take ACPI, intel_idle enablement and command
line parameters into account. That's fine as long as there is at least
PMTIMER available to calibrate the TSC frequency. The decision can be
overruled by adding "hpet=force" on the kernel command line.
Remove the related early PCI quirks for affected Ice Cake and Coffin Lake
systems as they are not longer required. That should also cover all
other systems, i.e. Tiger Rag and newer generations, which are most
likely affected by this as well.
Fixes: Yet another hardware trainwreck
Reported-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Rafael J. Wysocki <rafael(a)kernel.org>
Cc: stable(a)vger.kernel.org
Cc: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
---
arch/x86/kernel/early-quirks.c | 6 +--
arch/x86/kernel/hpet.c | 81 +++++++++++++++++++++++++++++++++-
2 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 38837da..391a4e2 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -714,12 +714,6 @@ static struct chipset early_qrk[] __initdata = {
*/
{ PCI_VENDOR_ID_INTEL, 0x0f00,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
- { PCI_VENDOR_ID_INTEL, 0x3e20,
- PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
- { PCI_VENDOR_ID_INTEL, 0x3ec4,
- PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
- { PCI_VENDOR_ID_INTEL, 0x8a12,
- PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_BROADCOM, 0x4331,
PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
{}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 42fc41d..882213d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -10,6 +10,7 @@
#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/time.h>
+#include <asm/mwait.h>
#undef pr_fmt
#define pr_fmt(fmt) "hpet: " fmt
@@ -916,6 +917,83 @@ static bool __init hpet_counting(void)
return false;
}
+static bool __init mwait_pc10_supported(void)
+{
+ unsigned int eax, ebx, ecx, mwait_substates;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ if (!cpu_feature_enabled(X86_FEATURE_MWAIT))
+ return false;
+
+ if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ return false;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
+
+ return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) &&
+ (ecx & CPUID5_ECX_INTERRUPT_BREAK) &&
+ (mwait_substates & (0xF << 28));
+}
+
+/*
+ * Check whether the system supports PC10. If so force disable HPET as that
+ * stops counting in PC10. This check is overbroad as it does not take any
+ * of the following into account:
+ *
+ * - ACPI tables
+ * - Enablement of intel_idle
+ * - Command line arguments which limit intel_idle C-state support
+ *
+ * That's perfectly fine. HPET is a piece of hardware designed by committee
+ * and the only reasons why it is still in use on modern systems is the
+ * fact that it is impossible to reliably query TSC and CPU frequency via
+ * CPUID or firmware.
+ *
+ * If HPET is functional it is useful for calibrating TSC, but this can be
+ * done via PMTIMER as well which seems to be the last remaining timer on
+ * X86/INTEL platforms that has not been completely wreckaged by feature
+ * creep.
+ *
+ * In theory HPET support should be removed altogether, but there are older
+ * systems out there which depend on it because TSC and APIC timer are
+ * dysfunctional in deeper C-states.
+ *
+ * It's only 20 years now that hardware people have been asked to provide
+ * reliable and discoverable facilities which can be used for timekeeping
+ * and per CPU timer interrupts.
+ *
+ * The probability that this problem is going to be solved in the
+ * forseeable future is close to zero, so the kernel has to be cluttered
+ * with heuristics to keep up with the ever growing amount of hardware and
+ * firmware trainwrecks. Hopefully some day hardware people will understand
+ * that the approach of "This can be fixed in software" is not sustainable.
+ * Hope dies last...
+ */
+static bool __init hpet_is_pc10_damaged(void)
+{
+ unsigned long long pcfg;
+
+ /* Check whether PC10 substates are supported */
+ if (!mwait_pc10_supported())
+ return false;
+
+ /* Check whether PC10 is enabled in PKG C-state limit */
+ rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg);
+ if ((pcfg & 0xF) < 8)
+ return false;
+
+ if (hpet_force_user) {
+ pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n");
+ return false;
+ }
+
+ pr_info("HPET dysfunctional in PC10. Force disabled.\n");
+ boot_hpet_disable = true;
+ return true;
+}
+
/**
* hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
@@ -929,6 +1007,9 @@ int __init hpet_enable(void)
if (!is_hpet_capable())
return 0;
+ if (hpet_is_pc10_damaged())
+ return 0;
+
hpet_set_mapping();
if (!hpet_virt_address)
return 0;
My Lenovo T490s with i7-8665U had been marking TSC as unstable
since v5.13, resulting in very sluggish desktop experience...
Kernel logs show:
clocksource: timekeeping watchdog on CPU3: hpet read-back delay of 316000ns, attempt 4, marking unstable
tsc: Marking TSC unstable due to clocksource watchdog
TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
sched_clock: Marking unstable (14539801827657, -530891666)<-(14539319241737, -48307500)
clocksource: Checking clocksource tsc synchronization from CPU 3 to CPUs 0-2,6-7.
clocksource: Switched to clocksource hpet
I have a 8086:3e34 bridge, also known as "Host bridge: Intel
Corporation Coffee Lake HOST and DRAM Controller (rev 0c)".
Add it to the list.
We should perhaps consider applying this quirk more widely.
The Intel documentation does not list my device [1], but
linuxhw [2] does, and it seems to list a few more bridges
we do not currently cover (3e31, 3ecc, 3e35, 3e0f).
[1] https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/8th…
[2] https://github.com/linuxhw/DevicePopulation/blob/master/README.md
Cc: stable(a)vger.kernel.org # v5.13+
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
---
v2: - add the dmesg output
---
arch/x86/kernel/early-quirks.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 38837dad46e6..7d2de04f8750 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -716,6 +716,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_INTEL, 0x3e20,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+ { PCI_VENDOR_ID_INTEL, 0x3e34,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_INTEL, 0x3ec4,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_INTEL, 0x8a12,
--
2.31.1
The patch titled
Subject: ocfs2: mount fails with buffer overflow in strlen
has been added to the -mm tree. Its filename is
ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/ocfs2-mount-fails-with-buffer-ove…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/ocfs2-mount-fails-with-buffer-ove…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Valentin Vidic <vvidic(a)valentin-vidic.from.hr>
Subject: ocfs2: mount fails with buffer overflow in strlen
Starting with kernel 5.11 built with CONFIG_FORTIFY_SOURCE mouting an
ocfs2 filesystem with either o2cb or pcmk cluster stack fails with the
trace below. Problem seems to be that strings for cluster stack and
cluster name are not guaranteed to be null terminated in the disk
representation, while strlcpy assumes that the source string is always
null terminated. This causes a read outside of the source string
triggering the buffer overflow detection.
detected buffer overflow in strlen
------------[ cut here ]------------
kernel BUG at lib/string.c:1149!
invalid opcode: 0000 [#1] SMP PTI
CPU: 1 PID: 910 Comm: mount.ocfs2 Not tainted 5.14.0-1-amd64 #1
Debian 5.14.6-2
RIP: 0010:fortify_panic+0xf/0x11
...
Call Trace:
ocfs2_initialize_super.isra.0.cold+0xc/0x18 [ocfs2]
ocfs2_fill_super+0x359/0x19b0 [ocfs2]
mount_bdev+0x185/0x1b0
? ocfs2_remount+0x440/0x440 [ocfs2]
legacy_get_tree+0x27/0x40
vfs_get_tree+0x25/0xb0
path_mount+0x454/0xa20
__x64_sys_mount+0x103/0x140
do_syscall_64+0x3b/0xc0
entry_SYSCALL_64_after_hwframe+0x44/0xae
Link: https://lkml.kernel.org/r/20210929180654.32460-1-vvidic@valentin-vidic.from…
Signed-off-by: Valentin Vidic <vvidic(a)valentin-vidic.from.hr>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/super.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
--- a/fs/ocfs2/super.c~ocfs2-mount-fails-with-buffer-overflow-in-strlen
+++ a/fs/ocfs2/super.c
@@ -2167,11 +2167,17 @@ static int ocfs2_initialize_super(struct
}
if (ocfs2_clusterinfo_valid(osb)) {
+ /*
+ * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
+ * terminated, so make sure no overflow happens here by using
+ * memcpy. Destination strings will always be null terminated
+ * because osb is allocated using kzalloc.
+ */
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
- strlcpy(osb->osb_cluster_stack,
+ memcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
- OCFS2_STACK_LABEL_LEN + 1);
+ OCFS2_STACK_LABEL_LEN);
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
mlog(ML_ERROR,
"couldn't mount because of an invalid "
@@ -2180,9 +2186,9 @@ static int ocfs2_initialize_super(struct
status = -EINVAL;
goto bail;
}
- strlcpy(osb->osb_cluster_name,
+ memcpy(osb->osb_cluster_name,
OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
- OCFS2_CLUSTER_NAME_LEN + 1);
+ OCFS2_CLUSTER_NAME_LEN);
} else {
/* The empty string is identical with classic tools that
* don't know about s_cluster_info. */
_
Patches currently in -mm which might be from vvidic(a)valentin-vidic.from.hr are
ocfs2-mount-fails-with-buffer-overflow-in-strlen.patch
Good day,
My name is Luis Fernandez.I would like to discuss something
important that will benefit both of us. I will send you more
details upon your response
Regards
Luis Fernandez
The patch titled
Subject: ocfs2: Fix data corruption after conversion from inline format
has been added to the -mm tree. Its filename is
ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/ocfs2-fix-data-corruption-after-c…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/ocfs2-fix-data-corruption-after-c…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Jan Kara <jack(a)suse.cz>
Subject: ocfs2: Fix data corruption after conversion from inline format
Commit 6dbf7bb55598 ("fs: Don't invalidate page buffers in
block_write_full_page()") uncovered a latent bug in ocfs2 conversion
from inline inode format to a normal inode format.
The code in
ocfs2_convert_inline_data_to_extents() attempts to zero out the whole
cluster allocated for file data by grabbing, zeroing, and dirtying all
pages covering this cluster. However these pages are beyond i_size, thus
writeback code generally ignores these dirty pages and no blocks were
ever actually zeroed on the disk.
This oversight was fixed by commit 693c241a5f6a ("ocfs2: No need to zero
pages past i_size.") for standard ocfs2 write path, inline conversion path
was apparently forgotten; the commit log also has a reasoning why the
zeroing actually is not needed.
After commit 6dbf7bb55598, things became worse as writeback code stopped
invalidating buffers on pages beyond i_size and thus these pages end up
with clean PageDirty bit but with buffers attached to these pages being
still dirty. So when a file is converted from inline format, then
writeback triggers, and then the file is grown so that these pages become
valid, the invalid dirtiness state is preserved, mark_buffer_dirty() does
nothing on these pages (buffers are already dirty) but page is never
written back because it is clean. So data written to these pages is lost
once pages are reclaimed.
Simple reproducer for the problem is:
xfs_io -f -c "pwrite 0 2000" -c "pwrite 2000 2000" -c "fsync" \
-c "pwrite 4000 2000" ocfs2_file
After unmounting and mounting the fs again, you can observe that end of
'ocfs2_file' has lost its contents.
Fix the problem by not doing the pointless zeroing during conversion
from inline format similarly as in the standard write path.
Link: https://lkml.kernel.org/r/20210930095405.21433-1-jack@suse.cz
Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()")
Signed-off-by: Jan Kara <jack(a)suse.cz>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Joseph Qi <jiangqi903(a)gmail.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/alloc.c | 46 +++++++++++----------------------------------
1 file changed, 12 insertions(+), 34 deletions(-)
--- a/fs/ocfs2/alloc.c~ocfs2-fix-data-corruption-after-conversion-from-inline-format
+++ a/fs/ocfs2/alloc.c
@@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
- int ret, i, has_data, num_pages = 0;
+ int ret,has_data, num_pages = 0;
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
@@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
- struct page **pages = NULL;
- loff_t end = osb->s_clustersize;
+ struct page *page = NULL;
struct ocfs2_extent_tree et;
int did_quota = 0;
has_data = i_size_read(inode) ? 1 : 0;
if (has_data) {
- pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
- sizeof(struct page *), GFP_NOFS);
- if (pages == NULL) {
- ret = -ENOMEM;
- mlog_errno(ret);
- return ret;
- }
-
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
- goto free_pages;
+ goto out;
}
}
@@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents
}
if (has_data) {
- unsigned int page_end;
+ unsigned int page_end = min_t(unsigned, PAGE_SIZE,
+ osb->s_clustersize);
u64 phys;
ret = dquot_alloc_space_nodirty(inode,
@@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
- /*
- * Non sparse file systems zero on extend, so no need
- * to do that now.
- */
- if (!ocfs2_sparse_alloc(osb) &&
- PAGE_SIZE < osb->s_clustersize)
- end = PAGE_SIZE;
-
- ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
+ ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
+ &num_pages);
if (ret) {
mlog_errno(ret);
need_free = 1;
@@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents
* This should populate the 1st page for us and mark
* it up to date.
*/
- ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
+ ret = ocfs2_read_inline_data(inode, page, di_bh);
if (ret) {
mlog_errno(ret);
need_free = 1;
goto out_unlock;
}
- page_end = PAGE_SIZE;
- if (PAGE_SIZE > osb->s_clustersize)
- page_end = osb->s_clustersize;
-
- for (i = 0; i < num_pages; i++)
- ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
- pages[i], i > 0, &phys);
+ ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
+ &phys);
}
spin_lock(&oi->ip_lock);
@@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents
}
out_unlock:
- if (pages)
- ocfs2_unlock_and_free_pages(pages, num_pages);
+ if (page)
+ ocfs2_unlock_and_free_pages(&page, num_pages);
out_commit:
if (ret < 0 && did_quota)
@@ -7205,8 +7185,6 @@ out_commit:
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
-free_pages:
- kfree(pages);
return ret;
}
_
Patches currently in -mm which might be from jack(a)suse.cz are
ocfs2-fix-data-corruption-after-conversion-from-inline-format.patch
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 70248e7b378b - Linux 5.14.9
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
Targeted tests: NO
All kernel binaries, config files, and logs are available for download here:
https://arr-cki-prod-datawarehouse-public.s3.amazonaws.com/index.html?prefi…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Host 2:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - nvmeof-mp
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ Networking bridge: sanity - mlx5
⚡⚡⚡ Ethernet drivers sanity - mlx5
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ ACPI table test
✅ ACPI enabled test
⚡⚡⚡ LTP - cve
⚡⚡⚡ LTP - sched
⚡⚡⚡ LTP - syscalls
⚡⚡⚡ LTP - can
⚡⚡⚡ LTP - commands
⚡⚡⚡ LTP - containers
⚡⚡⚡ LTP - dio
⚡⚡⚡ LTP - fs
⚡⚡⚡ LTP - fsx
⚡⚡⚡ LTP - math
⚡⚡⚡ LTP - hugetlb
⚡⚡⚡ LTP - mm
⚡⚡⚡ LTP - nptl
⚡⚡⚡ LTP - pty
⚡⚡⚡ LTP - ipc
⚡⚡⚡ LTP - tracing
⚡⚡⚡ CIFS Connectathon
⚡⚡⚡ POSIX pjd-fstest suites
⚡⚡⚡ NFS Connectathon
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ jvm - jcstress tests
⚡⚡⚡ Memory: fork_mem
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking cki netfilter test
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: dm/common
⚡⚡⚡ storage: SCSI VPD
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ xarray-idr-radixtree-test
🚧 ⚡⚡⚡ i2c: i2cdetect sanity
🚧 ⚡⚡⚡ Firmware test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking: igmp conformance test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ lvm cache test
🚧 ⚡⚡⚡ lvm snapper test
Host 6:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ Networking bridge: sanity - mlx5
⚡⚡⚡ Ethernet drivers sanity - mlx5
Host 7:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
ppc64le:
Host 1:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - srp
Host 2:
✅ Boot test
✅ Reboot test
✅ LTP - cve
✅ LTP - sched
✅ LTP - syscalls
✅ LTP - can
✅ LTP - commands
✅ LTP - containers
✅ LTP - dio
✅ LTP - fs
✅ LTP - fsx
✅ LTP - math
✅ LTP - hugetlb
✅ LTP - mm
✅ LTP - nptl
✅ LTP - pty
✅ LTP - ipc
✅ LTP - tracing
✅ CIFS Connectathon
✅ POSIX pjd-fstest suites
✅ NFS Connectathon
✅ Loopdev Sanity
✅ jvm - jcstress tests
✅ Memory: fork_mem
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking cki netfilter test
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: dm/common
✅ trace: ftrace/tracer
🚧 ✅ xarray-idr-radixtree-test
🚧 ✅ Memory function: kaslr
🚧 ✅ audit: audit testsuite test
🚧 ✅ lvm cache test
🚧 ✅ lvm snapper test
Host 3:
✅ Boot test
✅ Reboot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ IPMI driver test
✅ IPMItool loop stress test
✅ selinux-policy: serge-testsuite
✅ Storage blktests - blk
✅ Storage block - filesystem fio test
✅ Storage block - queue scheduler test
✅ storage: software RAID testing
✅ Storage: swraid mdadm raid_module test
🚧 ✅ Podman system test - as root
🚧 ✅ Podman system test - as user
🚧 ✅ xfstests - btrfs
🚧 ✅ Storage blktests - nvme-tcp
🚧 ✅ Storage: lvm device-mapper test - upstream
Host 4:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - nvmeof-mp
s390x:
Host 1:
✅ Boot test
✅ Reboot test
✅ selinux-policy: serge-testsuite
✅ Storage blktests - blk
✅ Storage: swraid mdadm raid_module test
🚧 ✅ Podman system test - as root
🚧 ✅ Podman system test - as user
🚧 ✅ Storage blktests - nvme-tcp
🚧 ✅ stress: stress-ng - interrupt
🚧 ✅ stress: stress-ng - cpu
🚧 ✅ stress: stress-ng - cpu-cache
🚧 ✅ stress: stress-ng - memory
🚧 ✅ stress: stress-ng - os
Host 2:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - srp
Host 3:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - nvmeof-mp
Host 4:
✅ Boot test
✅ Reboot test
✅ LTP - cve
✅ LTP - sched
✅ LTP - syscalls
✅ LTP - can
✅ LTP - commands
✅ LTP - containers
✅ LTP - dio
✅ LTP - fs
✅ LTP - fsx
✅ LTP - math
✅ LTP - hugetlb
✅ LTP - mm
✅ LTP - nptl
✅ LTP - pty
✅ LTP - ipc
✅ LTP - tracing
✅ CIFS Connectathon
✅ POSIX pjd-fstest suites
✅ NFS Connectathon
✅ Loopdev Sanity
✅ jvm - jcstress tests
✅ Memory: fork_mem
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking cki netfilter test
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ storage: dm/common
✅ trace: ftrace/tracer
🚧 ✅ xarray-idr-radixtree-test
🚧 ✅ Memory function: kaslr
🚧 ✅ audit: audit testsuite test
🚧 ✅ lvm cache test
🚧 ✅ lvm snapper test
x86_64:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Host 2:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - nvmeof-mp
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ xfstests - nfsv4.2
⚡⚡⚡ xfstests - cifsv3.11
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ power-management: cpupower/sanity test
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ CPU: Idle Test
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ Storage: lvm device-mapper test - upstream
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 4:
✅ Boot test
✅ Reboot test
✅ ACPI table test
✅ LTP - cve
✅ LTP - sched
✅ LTP - syscalls
✅ LTP - can
✅ LTP - commands
✅ LTP - containers
✅ LTP - dio
✅ LTP - fs
✅ LTP - fsx
✅ LTP - math
✅ LTP - hugetlb
✅ LTP - mm
✅ LTP - nptl
✅ LTP - pty
✅ LTP - ipc
✅ LTP - tracing
✅ CIFS Connectathon
✅ POSIX pjd-fstest suites
✅ NFS Connectathon
✅ Loopdev Sanity
✅ jvm - jcstress tests
✅ Memory: fork_mem
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking cki netfilter test
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: sanity smoke test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: dm/common
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ xarray-idr-radixtree-test
🚧 ✅ i2c: i2cdetect sanity
🚧 ✅ Firmware test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ Networking: igmp conformance test
🚧 ✅ audit: audit testsuite test
🚧 ✅ lvm cache test
🚧 ✅ lvm snapper test
Host 5:
✅ Boot test
✅ Reboot test
🚧 ✅ Storage blktests - srp
Host 6:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ xfstests - nfsv4.2
⚡⚡⚡ xfstests - cifsv3.11
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ power-management: cpupower/sanity test
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ CPU: Idle Test
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ Storage: lvm device-mapper test - upstream
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Targeted tests
--------------
Test runs for patches always include a set of base tests, plus some
tests chosen based on the file paths modified by the patch. The latter
are called "targeted tests". If no targeted tests are run, that means
no patch-specific tests are available. Please, consider contributing a
targeted test for related patches to increase test coverage. See
https://docs.engineering.redhat.com/x/_wEZB for more details.
>From 7417896fcc7aea645fa0b89f39fa55979251dca3 Mon Sep 17 00:00:00 2001
From: Patrick Ho <Patrick.Ho(a)netapp.com>
Date: Sat, 21 Aug 2021 02:56:26 -0400
Subject: [PATCH] nfsd: fix error handling of register_pernet_subsys() in
init_nfsd()
init_nfsd() should not unregister pernet subsys if the register fails
but should instead unwind from the last successful operation which is
register_filesystem().
Unregistering a failed register_pernet_subsys() call can result in
a kernel GPF as revealed by programmatically injecting an error in
register_pernet_subsys().
Verified the fix handled failure gracefully with no lingering nfsd
entry in /proc/filesystems. This change was introduced by the commit
bd5ae9288d64 ("nfsd: register pernet ops last, unregister first"),
the original error handling logic was correct.
Fixes: bd5ae9288d64 ("nfsd: register pernet ops last, unregister first")
Cc: stable(a)vger.kernel.org
Signed-off-by: Patrick Ho <Patrick.Ho(a)netapp.com>
---
fs/nfsd/nfsctl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c2c3d9077dc5..09ae1a0873d0 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1545,7 +1545,7 @@ static int __init init_nfsd(void)
goto out_free_all;
return 0;
out_free_all:
- unregister_pernet_subsys(&nfsd_net_ops);
+ unregister_filesystem(&nfsd_fs_type);
out_free_exports:
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
--
2.17.1
There is no "connector" child node available on every
platform, so the driver can't fail to probe when it's
missing.
Fixes: 57560ee95cb7 ("usb: typec: tipd: Don't block probing of consumer of "connector" nodes")
Reported-by: "Regupathy, Rajaram" <rajaram.regupathy(a)intel.com>
Cc: stable(a)vger.kernel.org # 5.14+
Signed-off-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
---
drivers/usb/typec/tipd/core.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 93e56291f0cf1..b30efdb9fa9ed 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -628,10 +628,6 @@ static int tps6598x_probe(struct i2c_client *client)
if (ret < 0)
return ret;
- fwnode = device_get_named_child_node(&client->dev, "connector");
- if (!fwnode)
- return -ENODEV;
-
/*
* This fwnode has a "compatible" property, but is never populated as a
* struct device. Instead we simply parse it to read the properties.
@@ -639,7 +635,9 @@ static int tps6598x_probe(struct i2c_client *client)
* with existing DT files, we work around this by deleting any
* fwnode_links to/from this fwnode.
*/
- fw_devlink_purge_absent_suppliers(fwnode);
+ fwnode = device_get_named_child_node(&client->dev, "connector");
+ if (fwnode)
+ fw_devlink_purge_absent_suppliers(fwnode);
tps->role_sw = fwnode_usb_role_switch_get(fwnode);
if (IS_ERR(tps->role_sw)) {
--
2.33.0
Commit 6dbf7bb55598 ("fs: Don't invalidate page buffers in
block_write_full_page()") uncovered a latent bug in ocfs2 conversion
from inline inode format to a normal inode format. The code in
ocfs2_convert_inline_data_to_extents() attempts to zero out the whole
cluster allocated for file data by grabbing, zeroing, and dirtying all
pages covering this cluster. However these pages are beyond i_size, thus
writeback code generally ignores these dirty pages and no blocks were
ever actually zeroed on the disk. This oversight was fixed by commit
693c241a5f6a ("ocfs2: No need to zero pages past i_size.") for standard
ocfs2 write path, inline conversion path was apparently forgotten; the
commit log also has a reasoning why the zeroing actually is not needed.
After commit 6dbf7bb55598, things became worse as writeback code stopped
invalidating buffers on pages beyond i_size and thus these pages end up
with clean PageDirty bit but with buffers attached to these pages being
still dirty. So when a file is converted from inline format, then
writeback triggers, and then the file is grown so that these pages
become valid, the invalid dirtiness state is preserved,
mark_buffer_dirty() does nothing on these pages (buffers are already
dirty) but page is never written back because it is clean. So data
written to these pages is lost once pages are reclaimed.
Simple reproducer for the problem is:
xfs_io -f -c "pwrite 0 2000" -c "pwrite 2000 2000" -c "fsync" \
-c "pwrite 4000 2000" ocfs2_file
After unmounting and mounting the fs again, you can observe that end of
'ocfs2_file' has lost its contents.
Fix the problem by not doing the pointless zeroing during conversion
from inline format similarly as in the standard write path.
CC: stable(a)vger.kernel.org
Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/ocfs2/alloc.c | 46 ++++++++++++----------------------------------
1 file changed, 12 insertions(+), 34 deletions(-)
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f1cc8258d34a..12aead0dabe2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
- int ret, i, has_data, num_pages = 0;
+ int ret,has_data, num_pages = 0;
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
@@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
- struct page **pages = NULL;
- loff_t end = osb->s_clustersize;
+ struct page *page = NULL;
struct ocfs2_extent_tree et;
int did_quota = 0;
has_data = i_size_read(inode) ? 1 : 0;
if (has_data) {
- pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
- sizeof(struct page *), GFP_NOFS);
- if (pages == NULL) {
- ret = -ENOMEM;
- mlog_errno(ret);
- return ret;
- }
-
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
- goto free_pages;
+ goto out;
}
}
@@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
if (has_data) {
- unsigned int page_end;
+ unsigned int page_end = min_t(unsigned, PAGE_SIZE,
+ osb->s_clustersize);
u64 phys;
ret = dquot_alloc_space_nodirty(inode,
@@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
- /*
- * Non sparse file systems zero on extend, so no need
- * to do that now.
- */
- if (!ocfs2_sparse_alloc(osb) &&
- PAGE_SIZE < osb->s_clustersize)
- end = PAGE_SIZE;
-
- ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
+ ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
+ &num_pages);
if (ret) {
mlog_errno(ret);
need_free = 1;
@@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* This should populate the 1st page for us and mark
* it up to date.
*/
- ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
+ ret = ocfs2_read_inline_data(inode, page, di_bh);
if (ret) {
mlog_errno(ret);
need_free = 1;
goto out_unlock;
}
- page_end = PAGE_SIZE;
- if (PAGE_SIZE > osb->s_clustersize)
- page_end = osb->s_clustersize;
-
- for (i = 0; i < num_pages; i++)
- ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
- pages[i], i > 0, &phys);
+ ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
+ &phys);
}
spin_lock(&oi->ip_lock);
@@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_unlock:
- if (pages)
- ocfs2_unlock_and_free_pages(pages, num_pages);
+ if (page)
+ ocfs2_unlock_and_free_pages(&page, num_pages);
out_commit:
if (ret < 0 && did_quota)
@@ -7205,8 +7185,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
-free_pages:
- kfree(pages);
return ret;
}
--
2.26.2
Commit 5c912e679506 ("usb: cdc-wdm: fix build error when CONFIG_WWAN_CORE
is not set") fixed a build error when CONFIG_WWAN was set but
CONFIG_WWAN_CORE was not. Since then CONFIG_WWAN_CORE was removed and
joined with CONFIG_WWAN in commit 89212e160b81 ("net: wwan: Fix WWAN
config symbols").
Also, since CONFIG_WWAN has class tri-state instead of bool, we cannot
check if it is defined directly, but have to use IS_DEFINED() instead.
Fixes: 5c912e679506 ("usb: cdc-wdm: fix build error when CONFIG_WWAN_CORE is not set")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Rikard Falkeborn <rikard.falkeborn(a)gmail.com>
---
drivers/usb/class/cdc-wdm.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 35d5908b5478..03b25aaaa1dd 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -824,7 +824,7 @@ static struct usb_class_driver wdm_class = {
};
/* --- WWAN framework integration --- */
-#ifdef CONFIG_WWAN_CORE
+#if IS_ENABLED(CONFIG_WWAN)
static int wdm_wwan_port_start(struct wwan_port *port)
{
struct wdm_device *desc = wwan_port_get_drvdata(port);
@@ -963,11 +963,11 @@ static void wdm_wwan_rx(struct wdm_device *desc, int length)
/* inbuf has been copied, it is safe to check for outstanding data */
schedule_work(&desc->service_outs_intr);
}
-#else /* CONFIG_WWAN_CORE */
+#else /* CONFIG_WWAN */
static void wdm_wwan_init(struct wdm_device *desc) {}
static void wdm_wwan_deinit(struct wdm_device *desc) {}
static void wdm_wwan_rx(struct wdm_device *desc, int length) {}
-#endif /* CONFIG_WWAN_CORE */
+#endif /* CONFIG_WWAN */
/* --- error handling --- */
static void wdm_rxwork(struct work_struct *work)
--
2.33.0
A recent change that started reporting break events forgot to push the
event to the line discipline, which meant that a detected break would
not be reported until further characters had been receive (the port
could even have been closed and reopened in between).
Fixes: 08dff274edda ("cdc-acm: fix BREAK rx code path adding necessary calls")
Cc: stable(a)vger.kernel.org
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/usb/class/cdc-acm.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index c7a1736720e7..7b2e2420ecae 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -340,6 +340,9 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf)
acm->iocount.overrun++;
spin_unlock_irqrestore(&acm->read_lock, flags);
+ if (newctrl & ACM_CTRL_BRK)
+ tty_flip_buffer_push(&acm->port);
+
if (difference)
wake_up_all(&acm->wioctl);
--
2.32.0
A recent change that started reporting break events to the line
discipline caused the tty-buffer insertions to no longer be serialised
by inserting events also from the completion handler for the interrupt
endpoint.
Completion calls for distinct endpoints are not guaranteed to be
serialised. For example, in case a host-controller driver uses
bottom-half completion, the interrupt and bulk-in completion handlers
can end up running in parallel on two CPUs (high-and low-prio tasklets,
respectively) thereby breaking the tty layer's single producer
assumption.
Fix this by holding the read lock also when inserting characters from
the bulk endpoint.
Fixes: 08dff274edda ("cdc-acm: fix BREAK rx code path adding necessary calls")
Cc: stable(a)vger.kernel.org
Signed-off-by: Johan Hovold <johan(a)kernel.org>
---
drivers/usb/class/cdc-acm.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 4e2f1552f4b7..c7a1736720e7 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -475,11 +475,16 @@ static int acm_submit_read_urbs(struct acm *acm, gfp_t mem_flags)
static void acm_process_read_urb(struct acm *acm, struct urb *urb)
{
+ unsigned long flags;
+
if (!urb->actual_length)
return;
+ spin_lock_irqsave(&acm->read_lock, flags);
tty_insert_flip_string(&acm->port, urb->transfer_buffer,
urb->actual_length);
+ spin_unlock_irqrestore(&acm->read_lock, flags);
+
tty_flip_buffer_push(&acm->port);
}
--
2.32.0
Good day,
My name is Luis Fernandez.I would like to discuss something
important that will benefit both of us. I will send you more
details upon your response
Regards
Luis Fernandez
commit fad7cd3310db ("nbd: add the check to prevent overflow in
__nbd_ioctl()") raised an issue from the fallback helpers added in
commit f0907827a8a9 ("compiler.h: enable builtin overflow checkers and
add fallback code")
ERROR: modpost: "__divdi3" [drivers/block/nbd.ko] undefined!
As Stephen Rothwell notes:
The added check_mul_overflow() call is being passed 64 bit values.
COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW is not set for this build (see
include/linux/overflow.h).
Specifically, the helpers for checking whether the results of a
multiplication overflowed (__unsigned_mul_overflow,
__signed_add_overflow) use the division operator when
!COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW. This is problematic for 64b
operands on 32b hosts.
This was fixed upstream by
commit 76ae847497bc ("Documentation: raise minimum supported version of
GCC to 5.1")
which is not suitable to be backported to stable.
Further, __builtin_mul_overflow() would emit a libcall to a
compiler-rt-only symbol when compiling with clang < 14 for 32b targets.
ld.lld: error: undefined symbol: __mulodi4
In order to keep stable buildable with GCC 4.9 and clang < 14, modify
struct nbd_config to instead track the number of bits of the block size;
reconstructing the block size using runtime checked shifts that are not
problematic for those compilers and in a ways that can be backported to
stable.
In nbd_set_size, we do validate that the value of blksize must be a
power of two (POT) and is in the range of [512, PAGE_SIZE] (both
inclusive).
This does modify the debugfs interface.
Cc: stable(a)vger.kernel.org
Cc: Arnd Bergmann <arnd(a)kernel.org>
Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Link: https://github.com/ClangBuiltLinux/linux/issues/1438
Link: https://lore.kernel.org/all/20210909182525.372ee687@canb.auug.org.au/
Link: https://lore.kernel.org/stable/CAHk-=whiQBofgis_rkniz8GBP9wZtSZdcDEffgSLO62…
Reported-by: Naresh Kamboju <naresh.kamboju(a)linaro.org>
Reported-by: Nathan Chancellor <nathan(a)kernel.org>
Reported-by: Stephen Rothwell <sfr(a)canb.auug.org.au>
Suggested-by: Kees Cook <keescook(a)chromium.org>
Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Suggested-by: Pavel Machek <pavel(a)ucw.cz>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
This patch is kind of a v3 for solving this problem.
v2: https://lore.kernel.org/stable/20210914002318.2298583-1-ndesaulniers@google…
v1: https://lore.kernel.org/stable/20210913203201.1844253-1-ndesaulniers@google…
But is quite different in approach. Instead of trying to fix up the
overflow routines in stable, we amend the code in nbd.c as per Linus
(against mainline) with fixes from Kees to use the named overflow
checker.
drivers/block/nbd.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5170a630778d..1183f7872b71 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -97,13 +97,18 @@ struct nbd_config {
atomic_t recv_threads;
wait_queue_head_t recv_wq;
- loff_t blksize;
+ unsigned int blksize_bits;
loff_t bytesize;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbg_dir;
#endif
};
+static inline unsigned int nbd_blksize(struct nbd_config *config)
+{
+ return 1u << config->blksize_bits;
+}
+
struct nbd_device {
struct blk_mq_tag_set tag_set;
@@ -146,7 +151,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
-#define NBD_DEF_BLKSIZE 1024
+#define NBD_DEF_BLKSIZE_BITS 10
static unsigned int nbds_max = 16;
static int max_part = 16;
@@ -317,12 +322,12 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
loff_t blksize)
{
if (!blksize)
- blksize = NBD_DEF_BLKSIZE;
+ blksize = 1u << NBD_DEF_BLKSIZE_BITS;
if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize))
return -EINVAL;
nbd->config->bytesize = bytesize;
- nbd->config->blksize = blksize;
+ nbd->config->blksize_bits = __ffs(blksize);
if (!nbd->task_recv)
return 0;
@@ -1337,7 +1342,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
- return nbd_set_size(nbd, config->bytesize, config->blksize);
+ return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
}
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
@@ -1406,11 +1411,11 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_BLKSIZE:
return nbd_set_size(nbd, config->bytesize, arg);
case NBD_SET_SIZE:
- return nbd_set_size(nbd, arg, config->blksize);
+ return nbd_set_size(nbd, arg, nbd_blksize(config));
case NBD_SET_SIZE_BLOCKS:
- if (check_mul_overflow((loff_t)arg, config->blksize, &bytesize))
+ if (check_shl_overflow(arg, config->blksize_bits, &bytesize))
return -EINVAL;
- return nbd_set_size(nbd, bytesize, config->blksize);
+ return nbd_set_size(nbd, bytesize, nbd_blksize(config));
case NBD_SET_TIMEOUT:
nbd_set_cmd_timeout(nbd, arg);
return 0;
@@ -1476,7 +1481,7 @@ static struct nbd_config *nbd_alloc_config(void)
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
- config->blksize = NBD_DEF_BLKSIZE;
+ config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
try_module_get(THIS_MODULE);
return config;
@@ -1604,7 +1609,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_fops);
debugfs_create_u64("size_bytes", 0444, dir, &config->bytesize);
debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
- debugfs_create_u64("blocksize", 0444, dir, &config->blksize);
+ debugfs_create_u32("blocksize_bits", 0444, dir, &config->blksize_bits);
debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_fops);
return 0;
@@ -1826,7 +1831,7 @@ nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
{
struct nbd_config *config = nbd->config;
- u64 bsize = config->blksize;
+ u64 bsize = nbd_blksize(config);
u64 bytes = config->bytesize;
if (info->attrs[NBD_ATTR_SIZE_BYTES])
@@ -1835,7 +1840,7 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES])
bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
- if (bytes != config->bytesize || bsize != config->blksize)
+ if (bytes != config->bytesize || bsize != nbd_blksize(config))
return nbd_set_size(nbd, bytes, bsize);
return 0;
}
base-commit: 4c17ca27923c16fd73bbb9ad033c7d749c3bcfcc
--
2.33.0.464.g1972c5931b-goog
From: Joerg Roedel <jroedel(a)suse.de>
The trampoline_pgd only maps the 0xfffffff000000000-0xffffffffffffffff
range of kernel memory (with 4-level paging). This range contains the
kernels text+data+bss mappings and the module mapping space, but not the
direct mapping and the vmalloc area.
This is enough to get an application processors out of real-mode, but
for code that switches back to real-mode the trampoline_pgd is missing
important parts of the address space. For example, consider this code
from arch/x86/kernel/reboot.c, function machine_real_restart() for a
64-bit kernel:
#ifdef CONFIG_X86_32
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
#ifdef CONFIG_X86_32
asm volatile("jmpl *%0" : :
"rm" (real_mode_header->machine_real_restart_asm),
"a" (type));
#else
asm volatile("ljmpl *%0" : :
"m" (real_mode_header->machine_real_restart_asm),
"D" (type));
#endif
The code switches to the trampoline_pgd, which unmaps the direct mapping
and also the kernel stack. The call to cr4_clear_bits() will find no
stack and crash the machine. The real_mode_header pointer below points
into the direct mapping, and dereferencing it also causes a crash.
The reason this does not crash always is only that kernel mappings are
global and the CR3 switch does not flush those mappings. But if theses
mappings are not in the TLB already, the above code will crash before it
can jump to the real-mode stub.
Extend the trampoline_pgd to contain all kernel mappings to prevent
these crashes and to make code which runs on this page-table more
robust.
Cc: stable(a)vger.kernel.org
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
arch/x86/realmode/init.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 0cfe1046cec9..792cb9ca9b29 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -91,6 +91,7 @@ static void __init setup_real_mode(void)
#ifdef CONFIG_X86_64
u64 *trampoline_pgd;
u64 efer;
+ int i;
#endif
base = (unsigned char *)real_mode_header;
@@ -147,8 +148,17 @@ static void __init setup_real_mode(void)
trampoline_header->flags = 0;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
+
+ /*
+ * Map all of kernel memory into the trampoline PGD so that it includes
+ * the direct mapping and vmalloc space. This is needed to keep the
+ * stack and real_mode_header mapped when switching to this page table.
+ */
+ for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
+ trampoline_pgd[i] = init_top_pgt[i].pgd;
+
+ /* Map the real mode stub as virtual == physical */
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
- trampoline_pgd[511] = init_top_pgt[511].pgd;
#endif
sme_sev_setup_real_mode(trampoline_header);
--
2.33.0
When I was doing PTP_SYS_OFFSET_PRECISE ioctl in VM which has 128 vCPUs,
I got error returned occasionally. Then I checked the routine of
"getcrosststamp". I found in kvm_arch_ptp_get_crosststamp() of x86,
pvclock vcpu time info was got from hv_clock arrary which has only 64
elements. Hence this ioctl is executed on vCPU > 64, a wild/dangling
pointer will be got, which had casued the error.
To confirm this finding, I wrote a simple PTP_SYS_OFFSET_PRECISE ioctl
test and used "taskset -c n" to run the test, when it was executed on
vCPUs >= 64 it returned error.
This patchset exposes this_cpu_pvti() to get per cpu pvclock vcpu time
info of vCPUs >= 64 insdead of getting them from hv_clock arrary.
Zelin Deng (2):
x86/kvmclock: Move this_cpu_pvti into kvmclock.h
ptp: Fix ptp_kvm_getcrosststamp issue for x86 ptp_kvm
arch/x86/include/asm/kvmclock.h | 14 ++++++++++++++
arch/x86/kernel/kvmclock.c | 13 ++-----------
drivers/ptp/ptp_kvm_x86.c | 9 ++-------
3 files changed, 18 insertions(+), 18 deletions(-)
--
1.8.3.1
In tpm_del_char_device() make sure that chip->ops is still valid.
This check is needed since in case of a system shutdown
tpm_class_shutdown() has already been called and set chip->ops to NULL.
This leads to a NULL pointer access as soon as tpm_del_char_device()
tries to access chip->ops in case of TPM 2.
Fixes: dcbeab1946454 ("tpm: fix crash in tpm_tis deinitialization")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lino Sanfilippo <LinoSanfilippo(a)gmx.de>
---
drivers/char/tpm/tpm-chip.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index ddaeceb7e109..ed1fb5d82caf 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -474,13 +474,19 @@ static void tpm_del_char_device(struct tpm_chip *chip)
/* Make the driver uncallable. */
down_write(&chip->ops_sem);
- if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- if (!tpm_chip_start(chip)) {
- tpm2_shutdown(chip, TPM2_SU_CLEAR);
- tpm_chip_stop(chip);
+ /* Check if chip->ops is still valid since in case of a shutdown
+ * tpm_class_shutdown() has already sent the TPM2_Shutdown command
+ * and set chip->ops to NULL.
+ */
+ if (chip->ops) {
+ if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+ if (!tpm_chip_start(chip)) {
+ tpm2_shutdown(chip, TPM2_SU_CLEAR);
+ tpm_chip_stop(chip);
+ }
}
+ chip->ops = NULL;
}
- chip->ops = NULL;
up_write(&chip->ops_sem);
}
base-commit: a3fa7a101dcff93791d1b1bdb3affcad1410c8c1
--
2.33.0
In commit 43c2de1002d2, we moved most HW configuration to bind(), but we
didn't move the runtime PM management. Therefore, depending on initial
boot state, runtime-PM workqueue delays, and other timing factors, we
may disable our power domain in between the hardware configuration
(bind()) and when we enable the display. This can cause us to lose
hardware state and fail to configure our display. For example:
dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
panel-innolux-p079zca ff960000.mipi.0: failed to write command 0
or:
dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
panel-kingdisplay-kd097d04 ff960000.mipi.0: failed write init cmds: -110
We should match the runtime PM to the lifetime of the bind()/unbind()
cycle.
Tested on Acer Chrometab 10 (RK3399 Gru-Scarlet), with panel drivers
built either as modules or built-in.
Side notes: it seems one is more likely to see this problem when the
panel driver is built into the kernel. I've also seen this problem
bisect down to commits that simply changed Kconfig dependencies, because
it changed the order in which driver init functions were compiled into
the kernel, and therefore the ordering and timing of built-in device
probe.
Fixes: 43c2de1002d2 ("drm/rockchip: dsi: move all lane config except LCDC mux to bind()")
Link: https://lore.kernel.org/linux-rockchip/9aedfb528600ecf871885f7293ca4207c84d…
Reported-by: <aleksandr.o.makarov(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
---
(no changes since v2)
Changes in v2:
- Clean up pm-runtime state in error cases.
- Correct git hash for Fixes.
.../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 37 ++++++++++---------
1 file changed, 19 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index a2262bee5aa4..45676b23c019 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -773,10 +773,6 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
if (mux < 0)
return;
- pm_runtime_get_sync(dsi->dev);
- if (dsi->slave)
- pm_runtime_get_sync(dsi->slave->dev);
-
/*
* For the RK3399, the clk of grf must be enabled before writing grf
* register. And for RK3288 or other soc, this grf_clk must be NULL,
@@ -795,20 +791,10 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
clk_disable_unprepare(dsi->grf_clk);
}
-static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
-{
- struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder);
-
- if (dsi->slave)
- pm_runtime_put(dsi->slave->dev);
- pm_runtime_put(dsi->dev);
-}
-
static const struct drm_encoder_helper_funcs
dw_mipi_dsi_encoder_helper_funcs = {
.atomic_check = dw_mipi_dsi_encoder_atomic_check,
.enable = dw_mipi_dsi_encoder_enable,
- .disable = dw_mipi_dsi_encoder_disable,
};
static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
@@ -938,10 +924,14 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
put_device(second);
}
+ pm_runtime_get_sync(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_get_sync(dsi->slave->dev);
+
ret = clk_prepare_enable(dsi->pllref_clk);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret);
- return ret;
+ goto out_pm_runtime;
}
/*
@@ -953,7 +943,7 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
ret = clk_prepare_enable(dsi->grf_clk);
if (ret) {
DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret);
- return ret;
+ goto out_pm_runtime;
}
dw_mipi_dsi_rockchip_config(dsi);
@@ -965,16 +955,23 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to create drm encoder\n");
- return ret;
+ goto out_pm_runtime;
}
ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret);
- return ret;
+ goto out_pm_runtime;
}
return 0;
+
+out_pm_runtime:
+ pm_runtime_put(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_put(dsi->slave->dev);
+
+ return ret;
}
static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
@@ -989,6 +986,10 @@ static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
dw_mipi_dsi_unbind(dsi->dmd);
clk_disable_unprepare(dsi->pllref_clk);
+
+ pm_runtime_put(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_put(dsi->slave->dev);
}
static const struct component_ops dw_mipi_dsi_rockchip_ops = {
--
2.33.0.685.g46640cef36-goog
From: Thomas Gleixner <tglx(a)linutronix.de>
[ Upstream commit 2dcb96bacce36021c2f3eaae0cef607b5bb71ede ]
lock_sock_fast() and lock_sock_nested() contain lockdep annotations for the
sock::sk_lock.owned 'mutex'. sock::sk_lock.owned is not a regular mutex. It
is just lockdep wise equivalent. In fact it's an open coded trivial mutex
implementation with some interesting features.
sock::sk_lock.slock is a regular spinlock protecting the 'mutex'
representation sock::sk_lock.owned which is a plain boolean. If 'owned' is
true, then some other task holds the 'mutex', otherwise it is uncontended.
As this locking construct is obviously endangered by lock ordering issues as
any other locking primitive it got lockdep annotated via a dedicated
dependency map sock::sk_lock.dep_map which has to be updated at the lock
and unlock sites.
lock_sock_nested() is a straight forward 'mutex' lock operation:
might_sleep();
spin_lock_bh(sock::sk_lock.slock)
while (!try_lock(sock::sk_lock.owned)) {
spin_unlock_bh(sock::sk_lock.slock);
wait_for_release();
spin_lock_bh(sock::sk_lock.slock);
}
The lockdep annotation for sock::sk_lock.owned is for unknown reasons
_after_ the lock has been acquired, i.e. after the code block above and
after releasing sock::sk_lock.slock, but inside the bottom halves disabled
region:
spin_unlock(sock::sk_lock.slock);
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
local_bh_enable();
The placement after the unlock is obvious because otherwise the
mutex_acquire() would nest into the spin lock held region.
But that's from the lockdep perspective still the wrong place:
1) The mutex_acquire() is issued _after_ the successful acquisition which
is pointless because in a dead lock scenario this point is never
reached which means that if the deadlock is the first instance of
exposing the wrong lock order lockdep does not have a chance to detect
it.
2) It only works because lockdep is rather lax on the context from which
the mutex_acquire() is issued. Acquiring a mutex inside a bottom halves
and therefore non-preemptible region is obviously invalid, except for a
trylock which is clearly not the case here.
This 'works' stops working on RT enabled kernels where the bottom halves
serialization is done via a local lock, which exposes this misplacement
because the 'mutex' and the local lock nest the wrong way around and
lockdep complains rightfully about a lock inversion.
The placement is wrong since the initial commit a5b5bb9a053a ("[PATCH]
lockdep: annotate sk_locks") which introduced this.
Fix it by moving the mutex_acquire() in front of the actual lock
acquisition, which is what the regular mutex_lock() operation does as well.
lock_sock_fast() is not that straight forward. It looks at the first glance
like a convoluted trylock operation:
spin_lock_bh(sock::sk_lock.slock)
if (!sock::sk_lock.owned)
return false;
while (!try_lock(sock::sk_lock.owned)) {
spin_unlock_bh(sock::sk_lock.slock);
wait_for_release();
spin_lock_bh(sock::sk_lock.slock);
}
spin_unlock(sock::sk_lock.slock);
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
local_bh_enable();
return true;
But that's not the case: lock_sock_fast() is an interesting optimization
for short critical sections which can run with bottom halves disabled and
sock::sk_lock.slock held. This allows to shortcut the 'mutex' operation in
the non contended case by preventing other lockers to acquire
sock::sk_lock.owned because they are blocked on sock::sk_lock.slock, which
in turn avoids the overhead of doing the heavy processing in release_sock()
including waking up wait queue waiters.
In the contended case, i.e. when sock::sk_lock.owned == true the behavior
is the same as lock_sock_nested().
Semantically this shortcut means, that the task acquired the 'mutex' even
if it does not touch the sock::sk_lock.owned field in the non-contended
case. Not telling lockdep about this shortcut acquisition is hiding
potential lock ordering violations in the fast path.
As a consequence the same reasoning as for the above lock_sock_nested()
case vs. the placement of the lockdep annotation applies.
The current placement of the lockdep annotation was just copied from
the original lock_sock(), now renamed to lock_sock_nested(),
implementation.
Fix this by moving the mutex_acquire() in front of the actual lock
acquisition and adding the corresponding mutex_release() into
unlock_sock_fast(). Also document the fast path return case with a comment.
Reported-by: Sebastian Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: netdev(a)vger.kernel.org
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Jakub Kicinski <kuba(a)kernel.org>
Cc: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
include/net/sock.h | 1 +
net/core/sock.c | 37 +++++++++++++++++++++++--------------
2 files changed, 24 insertions(+), 14 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index f23cb259b0e2..e9ef47c0cfce 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1641,6 +1641,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
release_sock(sk);
__release(&sk->sk_lock.slock);
} else {
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
spin_unlock_bh(&sk->sk_lock.slock);
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index a3eea6e0b30a..54b8eeccbdf4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3158,17 +3158,15 @@ EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);
@@ -3206,24 +3204,35 @@ EXPORT_SYMBOL(release_sock);
*/
bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (!sk->sk_lock.owned)
+ if (!sk->sk_lock.owned) {
/*
- * Note : We must disable BH
+ * Fast path return with bottom halves disabled and
+ * sock::sk_lock.slock held.
+ *
+ * The 'mutex' is not contended and holding
+ * sock::sk_lock.slock prevents all other lockers to
+ * proceed so the corresponding unlock_sock_fast() can
+ * avoid the slow path of release_sock() completely and
+ * just release slock.
+ *
+ * From a semantical POV this is equivalent to 'acquiring'
+ * the 'mutex', hence the corresponding lockdep
+ * mutex_release() has to happen in the fast path of
+ * unlock_sock_fast().
*/
return false;
+ }
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
__acquire(&sk->sk_lock.slock);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
return true;
}
EXPORT_SYMBOL(lock_sock_fast);
--
2.33.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: c91145f28005 - usb: gadget: f_uac2: Populate SS descriptors' wBytesPerInterval
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
Targeted tests: NO
All kernel binaries, config files, and logs are available for download here:
https://arr-cki-prod-datawarehouse-public.s3.amazonaws.com/index.html?prefi…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: make -j24 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - nvmeof-mp
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - srp
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ Networking bridge: sanity - mlx5
⚡⚡⚡ Ethernet drivers sanity - mlx5
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ ACPI table test
✅ ACPI enabled test
⚡⚡⚡ LTP - cve
⚡⚡⚡ LTP - sched
⚡⚡⚡ LTP - syscalls
⚡⚡⚡ LTP - can
⚡⚡⚡ LTP - commands
⚡⚡⚡ LTP - containers
⚡⚡⚡ LTP - dio
⚡⚡⚡ LTP - fs
⚡⚡⚡ LTP - fsx
⚡⚡⚡ LTP - math
⚡⚡⚡ LTP - hugetlb
⚡⚡⚡ LTP - mm
⚡⚡⚡ LTP - nptl
⚡⚡⚡ LTP - pty
⚡⚡⚡ LTP - ipc
⚡⚡⚡ LTP - tracing
⚡⚡⚡ CIFS Connectathon
⚡⚡⚡ POSIX pjd-fstest suites
⚡⚡⚡ NFS Connectathon
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ jvm - jcstress tests
⚡⚡⚡ Memory: fork_mem
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking cki netfilter test
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: dm/common
⚡⚡⚡ storage: SCSI VPD
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ xarray-idr-radixtree-test
🚧 ⚡⚡⚡ i2c: i2cdetect sanity
🚧 ⚡⚡⚡ Firmware test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking: igmp conformance test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ lvm cache test
🚧 ⚡⚡⚡ lvm snapper test
Host 6:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 7:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ Networking bridge: sanity - mlx5
⚡⚡⚡ Ethernet drivers sanity - mlx5
ppc64le:
Host 1:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - srp
Host 2:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - nvmeof-mp
Host 3:
✅ Boot test
✅ Reboot test
✅ LTP - cve
✅ LTP - sched
✅ LTP - syscalls
✅ LTP - can
✅ LTP - commands
✅ LTP - containers
✅ LTP - dio
✅ LTP - fs
✅ LTP - fsx
✅ LTP - math
✅ LTP - hugetlb
✅ LTP - mm
✅ LTP - nptl
✅ LTP - pty
✅ LTP - ipc
✅ LTP - tracing
✅ CIFS Connectathon
✅ POSIX pjd-fstest suites
✅ NFS Connectathon
✅ Loopdev Sanity
✅ jvm - jcstress tests
✅ Memory: fork_mem
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking socket: fuzz
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking cki netfilter test
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ pciutils: update pci ids test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: dm/common
✅ trace: ftrace/tracer
🚧 ✅ xarray-idr-radixtree-test
🚧 ✅ Memory function: kaslr
🚧 ✅ audit: audit testsuite test
🚧 ✅ lvm cache test
🚧 ✅ lvm snapper test
Host 4:
✅ Boot test
✅ Reboot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ IPMI driver test
✅ IPMItool loop stress test
✅ selinux-policy: serge-testsuite
✅ Storage blktests - blk
✅ Storage block - filesystem fio test
✅ Storage block - queue scheduler test
✅ storage: software RAID testing
✅ Storage: swraid mdadm raid_module test
🚧 ❌ Podman system test - as root
🚧 ✅ Podman system test - as user
🚧 ✅ xfstests - btrfs
🚧 ✅ Storage blktests - nvme-tcp
🚧 ✅ Storage: lvm device-mapper test - upstream
s390x:
Host 1:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - nvmeof-mp
Host 2:
✅ Boot test
✅ Reboot test
✅ LTP - cve
✅ LTP - sched
✅ LTP - syscalls
✅ LTP - can
✅ LTP - commands
✅ LTP - containers
✅ LTP - dio
✅ LTP - fs
✅ LTP - fsx
✅ LTP - math
✅ LTP - hugetlb
✅ LTP - mm
✅ LTP - nptl
✅ LTP - pty
✅ LTP - ipc
✅ LTP - tracing
✅ CIFS Connectathon
✅ POSIX pjd-fstest suites
✅ NFS Connectathon
✅ Loopdev Sanity
✅ jvm - jcstress tests
✅ Memory: fork_mem
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking cki netfilter test
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ Libkcapi AF_ALG test
✅ storage: dm/common
✅ trace: ftrace/tracer
🚧 ❌ xarray-idr-radixtree-test
🚧 ✅ Memory function: kaslr
🚧 ✅ audit: audit testsuite test
🚧 ✅ lvm cache test
🚧 ✅ lvm snapper test
Host 3:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - srp
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ selinux-policy: serge-testsuite
✅ Storage blktests - blk
✅ Storage: swraid mdadm raid_module test
🚧 ✅ Podman system test - as root
🚧 ✅ Podman system test - as user
🚧 ✅ Storage blktests - nvme-tcp
🚧 ✅ stress: stress-ng - interrupt
🚧 ✅ stress: stress-ng - cpu
🚧 ✅ stress: stress-ng - cpu-cache
🚧 ✅ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
x86_64:
Host 1:
✅ Boot test
✅ Reboot test
🚧 ❌ Storage blktests - srp
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Reboot test
✅ ACPI table test
✅ LTP - cve
✅ LTP - sched
⚡⚡⚡ LTP - syscalls
⚡⚡⚡ LTP - can
⚡⚡⚡ LTP - commands
⚡⚡⚡ LTP - containers
⚡⚡⚡ LTP - dio
⚡⚡⚡ LTP - fs
⚡⚡⚡ LTP - fsx
⚡⚡⚡ LTP - math
⚡⚡⚡ LTP - hugetlb
⚡⚡⚡ LTP - mm
⚡⚡⚡ LTP - nptl
⚡⚡⚡ LTP - pty
⚡⚡⚡ LTP - ipc
⚡⚡⚡ LTP - tracing
⚡⚡⚡ CIFS Connectathon
⚡⚡⚡ POSIX pjd-fstest suites
⚡⚡⚡ NFS Connectathon
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ jvm - jcstress tests
⚡⚡⚡ Memory: fork_mem
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking cki netfilter test
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ Libkcapi AF_ALG test
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ pciutils: update pci ids test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ storage: dm/common
⚡⚡⚡ storage: SCSI VPD
⚡⚡⚡ trace: ftrace/tracer
🚧 ⚡⚡⚡ xarray-idr-radixtree-test
🚧 ⚡⚡⚡ i2c: i2cdetect sanity
🚧 ⚡⚡⚡ Firmware test suite
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ Networking: igmp conformance test
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ lvm cache test
🚧 ⚡⚡⚡ lvm snapper test
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ xfstests - nfsv4.2
⚡⚡⚡ xfstests - cifsv3.11
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ power-management: cpupower/sanity test
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ CPU: Idle Test
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ Storage: lvm device-mapper test - upstream
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Host 4:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - nvmeof-mp
Host 5:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
🚧 ⚡⚡⚡ Storage blktests - nvmeof-mp
Host 6:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Reboot test
⚡⚡⚡ xfstests - ext4
⚡⚡⚡ xfstests - xfs
⚡⚡⚡ xfstests - nfsv4.2
⚡⚡⚡ xfstests - cifsv3.11
⚡⚡⚡ IPMI driver test
⚡⚡⚡ IPMItool loop stress test
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ power-management: cpupower/sanity test
⚡⚡⚡ Storage blktests - blk
⚡⚡⚡ Storage block - filesystem fio test
⚡⚡⚡ Storage block - queue scheduler test
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ Storage: swraid mdadm raid_module test
🚧 ⚡⚡⚡ Podman system test - as root
🚧 ⚡⚡⚡ Podman system test - as user
🚧 ⚡⚡⚡ CPU: Idle Test
🚧 ⚡⚡⚡ xfstests - btrfs
🚧 ⚡⚡⚡ Storage blktests - nvme-tcp
🚧 ⚡⚡⚡ Storage: lvm device-mapper test - upstream
🚧 ⚡⚡⚡ stress: stress-ng - interrupt
🚧 ⚡⚡⚡ stress: stress-ng - cpu
🚧 ⚡⚡⚡ stress: stress-ng - cpu-cache
🚧 ⚡⚡⚡ stress: stress-ng - memory
🚧 ⚡⚡⚡ stress: stress-ng - os
Test sources: https://gitlab.com/cki-project/kernel-tests
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
Targeted tests
--------------
Test runs for patches always include a set of base tests, plus some
tests chosen based on the file paths modified by the patch. The latter
are called "targeted tests". If no targeted tests are run, that means
no patch-specific tests are available. Please, consider contributing a
targeted test for related patches to increase test coverage. See
https://docs.engineering.redhat.com/x/_wEZB for more details.
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: d525914b5bd8 ("mtd: rawnand: xway: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Cc: Jan Hoffmann <jan(a)3e8.eu>
Cc: Kestrel seventyfour <kestrelseventyfour(a)gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/xway_nand.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 26751976e502..236fd8c5a958 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -148,9 +148,8 @@ static void xway_write_buf(struct nand_chip *chip, const u_char *buf, int len)
static int xway_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -219,6 +218,13 @@ static int xway_nand_probe(struct platform_device *pdev)
| NAND_CON_SE_P | NAND_CON_WP_P | NAND_CON_PRE_P
| cs_flag, EBU_NAND_CON);
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
/* Scan to find existence of the device */
err = nand_scan(&data->chip, 1);
if (err)
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: b36bf0a0fe5d ("mtd: rawnand: socrates: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/socrates_nand.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 70f8305c9b6e..fb39cc7ebce0 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -119,9 +119,8 @@ static int socrates_nand_device_ready(struct nand_chip *nand_chip)
static int socrates_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -175,6 +174,13 @@ static int socrates_nand_probe(struct platform_device *ofdev)
/* TODO: I have no idea what real delay is. */
nand_chip->legacy.chip_delay = 20; /* 20us command delay time */
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
dev_set_drvdata(&ofdev->dev, host);
res = nand_scan(nand_chip, 1);
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: 612e048e6aab ("mtd: rawnand: plat_nand: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/plat_nand.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 7711e1020c21..0ee08c42cc35 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -21,9 +21,8 @@ struct plat_nand_data {
static int plat_nand_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -94,6 +93,13 @@ static int plat_nand_probe(struct platform_device *pdev)
goto out;
}
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
/* Scan to find existence of the device */
err = nand_scan(&data->chip, pdata->chip.nr_chips);
if (err)
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: 8fc6f1f042b2 ("mtd: rawnand: pasemi: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/pasemi_nand.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index 789f33312c15..c176036453ed 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -75,9 +75,8 @@ static int pasemi_device_ready(struct nand_chip *chip)
static int pasemi_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -154,6 +153,13 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
/* Enable the following for a flash based bad block table */
chip->bbt_options = NAND_BBT_USE_FLASH;
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
/* Scan to find existence of the device */
err = nand_scan(chip, 1);
if (err)
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: 553508cec2e8 ("mtd: rawnand: orion: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/orion_nand.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 66211c9311d2..2c87c7d89205 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -85,9 +85,8 @@ static void orion_nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
static int orion_nand_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -190,6 +189,13 @@ static int __init orion_nand_probe(struct platform_device *pdev)
return ret;
}
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ nc->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
ret = nand_scan(nc, 1);
if (ret)
goto no_dev;
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: 6dd09f775b72 ("mtd: rawnand: mpc5121: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/mpc5121_nfc.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index bcd4a556c959..cb293c50acb8 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -605,9 +605,8 @@ static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd)
static int mpc5121_nfc_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -772,6 +771,13 @@ static int mpc5121_nfc_probe(struct platform_device *op)
goto error;
}
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
/* Detect NAND chips */
retval = nand_scan(chip, be32_to_cpup(chips_no));
if (retval) {
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: f6341f6448e0 ("mtd: rawnand: gpio: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/gpio.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index fb7a086de35e..fdf073d2e1b6 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -163,9 +163,8 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
static int gpio_nand_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -365,6 +364,13 @@ static int gpio_nand_probe(struct platform_device *pdev)
if (gpiomtd->nwp && !IS_ERR(gpiomtd->nwp))
gpiod_direction_output(gpiomtd->nwp, 1);
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
ret = nand_scan(chip, 1);
if (ret)
goto err_wp;
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: dbffc8ccdf3a ("mtd: rawnand: au1550: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/au1550nd.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 99116896cfd6..5aa3a06d740c 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -239,9 +239,8 @@ static int au1550nd_exec_op(struct nand_chip *this,
static int au1550nd_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -310,6 +309,13 @@ static int au1550nd_probe(struct platform_device *pdev)
if (pd->devwidth)
this->options |= NAND_BUSWIDTH_16;
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
ret = nand_scan(this, 1);
if (ret) {
dev_err(&pdev->dev, "NAND scan failed with %d\n", ret);
--
2.27.0
Following the introduction of the generic ECC engine infrastructure, it
was necessary to reorganize the code and move the ECC configuration in
the ->attach_chip() hook. Failing to do that properly lead to a first
series of fixes supposed to stabilize the situation. Unfortunately, this
only fixed the use of software ECC engines, preventing any other kind of
engine to be used, including on-die ones.
It is now time to (finally) fix the situation by ensuring that we still
provide a default (eg. software ECC) but will still support different
ECC engines such as on-die ECC engines if properly described in the
device tree.
There are no changes needed on the core side in order to do this, but we
just need to leverage the logic there which allows:
1- a subsystem default (set to Host engines in the raw NAND world)
2- a driver specific default (here set to software ECC engines)
3- any type of engine requested by the user (ie. described in the DT)
As the raw NAND subsystem has not yet been fully converted to the ECC
engine infrastructure, in order to provide a default ECC engine for this
driver we need to set chip->ecc.engine_type *before* calling
nand_scan(). During the initialization step, the core will consider this
entry as the default engine for this driver. This value may of course
be overloaded by the user if the usual DT properties are provided.
Fixes: 59d93473323a ("mtd: rawnand: ams-delta: Move the ECC initialization to ->attach_chip()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/ams-delta.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index ff1697f899ba..13de39aa3288 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -217,9 +217,8 @@ static int gpio_nand_setup_interface(struct nand_chip *this, int csline,
static int gpio_nand_attach_chip(struct nand_chip *chip)
{
- chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-
- if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+ if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT &&
+ chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
return 0;
@@ -370,6 +369,13 @@ static int gpio_nand_probe(struct platform_device *pdev)
/* Release write protection */
gpiod_set_value(priv->gpiod_nwp, 0);
+ /*
+ * This driver assumes that the default ECC engine should be TYPE_SOFT.
+ * Set ->engine_type before registering the NAND devices in order to
+ * provide a driver specific default value.
+ */
+ this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
/* Scan to find existence of the device */
err = nand_scan(this, 1);
if (err)
--
2.27.0
All 3 upstream commits apply cleanly:
* 5fcfb6d0bfcd ("hso: fix bailout in error case of probe") is a support
patch needed for context
* a6ecfb39ba9d ("usb: hso: fix error handling code of hso_create_net_device")
is the actual fix
* dcb713d53e2e ("usb: hso: remove the bailout parameter") is a follow up
cleanup commit
Dongliang Mu (2):
usb: hso: fix error handling code of hso_create_net_device
usb: hso: remove the bailout parameter
Oliver Neukum (1):
hso: fix bailout in error case of probe
drivers/net/usb/hso.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
--
2.25.1
All 3 upstream commits apply cleanly:
* 5fcfb6d0bfcd ("hso: fix bailout in error case of probe") is a support
patch needed for context
* a6ecfb39ba9d ("usb: hso: fix error handling code of hso_create_net_device")
is the actual fix
* dcb713d53e2e ("usb: hso: remove the bailout parameter") is a follow up
cleanup commit
Dongliang Mu (2):
usb: hso: fix error handling code of hso_create_net_device
usb: hso: remove the bailout parameter
Oliver Neukum (1):
hso: fix bailout in error case of probe
drivers/net/usb/hso.c | 33 +++++++++++++++++++++++----------
1 file changed, 23 insertions(+), 10 deletions(-)
--
2.25.1
The driver was always parsing "s5m8767,pmic-buck-default-dvs-idx", not
"s5m8767,pmic-buck234-default-dvs-idx".
Cc: <stable(a)vger.kernel.org>
Fixes: 26aec009f6b6 ("regulator: add device tree support for s5m8767")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
---
Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
index d9cff1614f7a..6cd83d920155 100644
--- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
+++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
@@ -39,7 +39,7 @@ Optional properties of the main device node (the parent!):
Additional properties required if either of the optional properties are used:
- - s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from
+ - s5m8767,pmic-buck-default-dvs-idx: Default voltage setting selected from
the possible 8 options selectable by the dvs gpios. The value of this
property should be between 0 and 7. If not specified or if out of range, the
default value of this property is set to 0.
--
2.30.2
The driver and its bindings, before commit 04f9f068a619 ("regulator:
s5m8767: Modify parsing method of the voltage table of buck2/3/4") were
requiring to provide at least one safe/default voltage for DVS registers
if DVS GPIO is not being enabled.
IOW, if s5m8767,pmic-buck2-uses-gpio-dvs is missing, the
s5m8767,pmic-buck2-dvs-voltage should still be present and contain one
voltage.
This requirement was coming from driver behavior matching this condition
(none of DVS GPIO is enabled): it was always initializing the DVS
selector pins to 0 and keeping the DVS enable setting at reset value
(enabled). Therefore if none of DVS GPIO is enabled in devicetree,
driver was configuring the first DVS voltage for buck[234].
Mentioned commit 04f9f068a619 ("regulator: s5m8767: Modify parsing
method of the voltage table of buck2/3/4") broke it because DVS voltage
won't be parsed from devicetree if DVS GPIO is not enabled. After the
change, driver will configure bucks to use the register reset value as
voltage which might have unpleasant effects.
Fix this by relaxing the bindings constrain: if DVS GPIO is not enabled
in devicetree (therefore DVS voltage is also not parsed), explicitly
disable it.
Cc: <stable(a)vger.kernel.org>
Fixes: 04f9f068a619 ("regulator: s5m8767: Modify parsing method of the voltage table of buck2/3/4")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)canonical.com>
---
.../bindings/regulator/samsung,s5m8767.txt | 21 +++++++------------
drivers/regulator/s5m8767.c | 21 ++++++++-----------
2 files changed, 17 insertions(+), 25 deletions(-)
diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
index 093edda0c8df..d9cff1614f7a 100644
--- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
+++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt
@@ -13,6 +13,14 @@ common regulator binding documented in:
Required properties of the main device node (the parent!):
+ - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used
+ for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines.
+
+ [1] If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+ property is specified, then all the eight voltage values for the
+ 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified.
+
+Optional properties of the main device node (the parent!):
- s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
units for buck2 when changing voltage using gpio dvs. Refer to [1] below
for additional information.
@@ -25,19 +33,6 @@ Required properties of the main device node (the parent!):
units for buck4 when changing voltage using gpio dvs. Refer to [1] below
for additional information.
- - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used
- for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines.
-
- [1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
- property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage'
- property should specify atleast one voltage level (which would be a
- safe operating voltage).
-
- If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
- property is specified, then all the eight voltage values for the
- 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified.
-
-Optional properties of the main device node (the parent!):
- s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs.
- s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs.
- s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs.
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index 7c111bbdc2af..35269f998210 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -850,18 +850,15 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
/* DS4 GPIO */
gpio_direction_output(pdata->buck_ds[2], 0x0);
- if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs ||
- pdata->buck4_gpiodvs) {
- regmap_update_bits(s5m8767->iodev->regmap_pmic,
- S5M8767_REG_BUCK2CTRL, 1 << 1,
- (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1));
- regmap_update_bits(s5m8767->iodev->regmap_pmic,
- S5M8767_REG_BUCK3CTRL, 1 << 1,
- (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1));
- regmap_update_bits(s5m8767->iodev->regmap_pmic,
- S5M8767_REG_BUCK4CTRL, 1 << 1,
- (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1));
- }
+ regmap_update_bits(s5m8767->iodev->regmap_pmic,
+ S5M8767_REG_BUCK2CTRL, 1 << 1,
+ (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1));
+ regmap_update_bits(s5m8767->iodev->regmap_pmic,
+ S5M8767_REG_BUCK3CTRL, 1 << 1,
+ (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1));
+ regmap_update_bits(s5m8767->iodev->regmap_pmic,
+ S5M8767_REG_BUCK4CTRL, 1 << 1,
+ (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1));
/* Initialize GPIO DVS registers */
for (i = 0; i < 8; i++) {
--
2.30.2
This reverts commit b0b524f079a23e440dd22b04e369368dde847533.
Commit b0b524f079a2 ("brcmfmac: use ISO3166 country code and 0 rev
as fallback") changes country setup to directly use ISO3166 country
codes if no more specific code is configured. This was done under
the assumption that brcmfmac firmwares can handle such simple
direct mapping from country codes to firmware ccode values.
Unfortunately this is not true for all chipset/firmware combinations.
E.g. BCM4359/9 devices stop working as access point with this change,
so revert the offending commit to avoid the regression.
Signed-off-by: Soeren Moch <smoch(a)web.de>
Cc: stable(a)vger.kernel.org # 5.14.x
--
Cc: Shawn Guo <shawn.guo(a)linaro.org>
Cc: Arend van Spriel <aspriel(a)gmail.com>
Cc: Franky Lin <franky.lin(a)broadcom.com>
Cc: Hante Meuleman <hante.meuleman(a)broadcom.com>
Cc: Chi-hsien Lin <chi-hsien.lin(a)infineon.com>
Cc: Wright Feng <wright.feng(a)infineon.com>
Cc: Chung-hsien Hsu <chung-hsien.hsu(a)infineon.com>
Cc: Kalle Valo <kvalo(a)codeaurora.org>
Cc: linux-wireless(a)vger.kernel.org
Cc: brcm80211-dev-list.pdl(a)broadcom.com
Cc: SHA-cyfmac-dev-list(a)infineon.com
Cc: netdev(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
---
.../broadcom/brcm80211/brcmfmac/cfg80211.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index f7b96cd69242..9db12ffd2ff8 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -7463,23 +7463,18 @@ static s32 brcmf_translate_country_code(struct brcmf_pub *drvr, char alpha2[2],
s32 found_index;
int i;
+ country_codes = drvr->settings->country_codes;
+ if (!country_codes) {
+ brcmf_dbg(TRACE, "No country codes configured for device\n");
+ return -EINVAL;
+ }
+
if ((alpha2[0] == ccreq->country_abbrev[0]) &&
(alpha2[1] == ccreq->country_abbrev[1])) {
brcmf_dbg(TRACE, "Country code already set\n");
return -EAGAIN;
}
- country_codes = drvr->settings->country_codes;
- if (!country_codes) {
- brcmf_dbg(TRACE, "No country codes configured for device, using ISO3166 code and 0 rev\n");
- memset(ccreq, 0, sizeof(*ccreq));
- ccreq->country_abbrev[0] = alpha2[0];
- ccreq->country_abbrev[1] = alpha2[1];
- ccreq->ccode[0] = alpha2[0];
- ccreq->ccode[1] = alpha2[1];
- return 0;
- }
-
found_index = -1;
for (i = 0; i < country_codes->table_size; i++) {
cc = &country_codes->table[i];
--
2.25.1
From: Jan Beulich <jbeulich(a)suse.com>
[ Upstream commit 3ede7f84c7c21f93c5eac611d60eba3f2c765e0f ]
When re-entering the main loop of xenvif_tx_check_gop() a 2nd time, the
special considerations for the head of the SKB no longer apply. Don't
mistakenly report ERROR to the frontend for the first entry in the list,
even if - from all I can tell - this shouldn't matter much as the overall
transmit will need to be considered failed anyway.
Signed-off-by: Jan Beulich <jbeulich(a)suse.com>
Reviewed-by: Paul Durrant <paul(a)xen.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/xen-netback/netback.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index c8c6afc0ab51..15c73ebe5efc 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -994,7 +994,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
* the header's copy failed, and they are
* sharing a slot, send an error
*/
- if (i == 0 && sharedslot)
+ if (i == 0 && !first_shinfo && sharedslot)
xenvif_idx_release(queue, pending_idx,
XEN_NETIF_RSP_ERROR);
else
--
2.33.0
From: Vladimir Oltean <vladimir.oltean(a)nxp.com>
[ Upstream commit cf9579976f724ad517cc15b7caadea728c7e245c ]
MDIO-attached devices might have interrupts and other things that might
need quiesced when we kexec into a new kernel. Things are even more
creepy when those interrupt lines are shared, and in that case it is
absolutely mandatory to disable all interrupt sources.
Moreover, MDIO devices might be DSA switches, and DSA needs its own
shutdown method to unlink from the DSA master, which is a new
requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link
interfaces with the DSA master to get rid of lockdep warnings").
So introduce a ->shutdown method in the MDIO device driver structure.
Signed-off-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/phy/mdio_device.c | 11 +++++++++++
include/linux/mdio.h | 3 +++
2 files changed, 14 insertions(+)
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index 9c88e6749b9a..34600b0061bb 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -135,6 +135,16 @@ static int mdio_remove(struct device *dev)
return 0;
}
+static void mdio_shutdown(struct device *dev)
+{
+ struct mdio_device *mdiodev = to_mdio_device(dev);
+ struct device_driver *drv = mdiodev->dev.driver;
+ struct mdio_driver *mdiodrv = to_mdio_driver(drv);
+
+ if (mdiodrv->shutdown)
+ mdiodrv->shutdown(mdiodev);
+}
+
/**
* mdio_driver_register - register an mdio_driver with the MDIO layer
* @new_driver: new mdio_driver to register
@@ -149,6 +159,7 @@ int mdio_driver_register(struct mdio_driver *drv)
mdiodrv->driver.bus = &mdio_bus_type;
mdiodrv->driver.probe = mdio_probe;
mdiodrv->driver.remove = mdio_remove;
+ mdiodrv->driver.shutdown = mdio_shutdown;
retval = driver_register(&mdiodrv->driver);
if (retval) {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index bf9d1d750693..78b3cf50566f 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -61,6 +61,9 @@ struct mdio_driver {
/* Clears up any memory if needed */
void (*remove)(struct mdio_device *mdiodev);
+
+ /* Quiesces the device on system shutdown, turns off interrupts etc */
+ void (*shutdown)(struct mdio_device *mdiodev);
};
#define to_mdio_driver(d) \
container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv)
--
2.33.0
From: Vladimir Oltean <vladimir.oltean(a)nxp.com>
[ Upstream commit cf9579976f724ad517cc15b7caadea728c7e245c ]
MDIO-attached devices might have interrupts and other things that might
need quiesced when we kexec into a new kernel. Things are even more
creepy when those interrupt lines are shared, and in that case it is
absolutely mandatory to disable all interrupt sources.
Moreover, MDIO devices might be DSA switches, and DSA needs its own
shutdown method to unlink from the DSA master, which is a new
requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link
interfaces with the DSA master to get rid of lockdep warnings").
So introduce a ->shutdown method in the MDIO device driver structure.
Signed-off-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/phy/mdio_device.c | 11 +++++++++++
include/linux/mdio.h | 3 +++
2 files changed, 14 insertions(+)
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index e24f28924af8..5265180251ea 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -146,6 +146,16 @@ static int mdio_remove(struct device *dev)
return 0;
}
+static void mdio_shutdown(struct device *dev)
+{
+ struct mdio_device *mdiodev = to_mdio_device(dev);
+ struct device_driver *drv = mdiodev->dev.driver;
+ struct mdio_driver *mdiodrv = to_mdio_driver(drv);
+
+ if (mdiodrv->shutdown)
+ mdiodrv->shutdown(mdiodev);
+}
+
/**
* mdio_driver_register - register an mdio_driver with the MDIO layer
* @new_driver: new mdio_driver to register
@@ -160,6 +170,7 @@ int mdio_driver_register(struct mdio_driver *drv)
mdiodrv->driver.bus = &mdio_bus_type;
mdiodrv->driver.probe = mdio_probe;
mdiodrv->driver.remove = mdio_remove;
+ mdiodrv->driver.shutdown = mdio_shutdown;
retval = driver_register(&mdiodrv->driver);
if (retval) {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index ca08ab16ecdc..780c4859ce2d 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -63,6 +63,9 @@ struct mdio_driver {
/* Clears up any memory if needed */
void (*remove)(struct mdio_device *mdiodev);
+
+ /* Quiesces the device on system shutdown, turns off interrupts etc */
+ void (*shutdown)(struct mdio_device *mdiodev);
};
#define to_mdio_driver(d) \
container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv)
--
2.33.0
From: Vladimir Oltean <vladimir.oltean(a)nxp.com>
[ Upstream commit cf9579976f724ad517cc15b7caadea728c7e245c ]
MDIO-attached devices might have interrupts and other things that might
need quiesced when we kexec into a new kernel. Things are even more
creepy when those interrupt lines are shared, and in that case it is
absolutely mandatory to disable all interrupt sources.
Moreover, MDIO devices might be DSA switches, and DSA needs its own
shutdown method to unlink from the DSA master, which is a new
requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link
interfaces with the DSA master to get rid of lockdep warnings").
So introduce a ->shutdown method in the MDIO device driver structure.
Signed-off-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/phy/mdio_device.c | 11 +++++++++++
include/linux/mdio.h | 3 +++
2 files changed, 14 insertions(+)
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index c924700cf37b..922f0f8973b6 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -176,6 +176,16 @@ static int mdio_remove(struct device *dev)
return 0;
}
+static void mdio_shutdown(struct device *dev)
+{
+ struct mdio_device *mdiodev = to_mdio_device(dev);
+ struct device_driver *drv = mdiodev->dev.driver;
+ struct mdio_driver *mdiodrv = to_mdio_driver(drv);
+
+ if (mdiodrv->shutdown)
+ mdiodrv->shutdown(mdiodev);
+}
+
/**
* mdio_driver_register - register an mdio_driver with the MDIO layer
* @new_driver: new mdio_driver to register
@@ -190,6 +200,7 @@ int mdio_driver_register(struct mdio_driver *drv)
mdiodrv->driver.bus = &mdio_bus_type;
mdiodrv->driver.probe = mdio_probe;
mdiodrv->driver.remove = mdio_remove;
+ mdiodrv->driver.shutdown = mdio_shutdown;
retval = driver_register(&mdiodrv->driver);
if (retval) {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index bfa7114167d7..85325e110a79 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -66,6 +66,9 @@ struct mdio_driver {
/* Clears up any memory if needed */
void (*remove)(struct mdio_device *mdiodev);
+
+ /* Quiesces the device on system shutdown, turns off interrupts etc */
+ void (*shutdown)(struct mdio_device *mdiodev);
};
#define to_mdio_driver(d) \
container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv)
--
2.33.0
From: Vladimir Oltean <vladimir.oltean(a)nxp.com>
[ Upstream commit cf9579976f724ad517cc15b7caadea728c7e245c ]
MDIO-attached devices might have interrupts and other things that might
need quiesced when we kexec into a new kernel. Things are even more
creepy when those interrupt lines are shared, and in that case it is
absolutely mandatory to disable all interrupt sources.
Moreover, MDIO devices might be DSA switches, and DSA needs its own
shutdown method to unlink from the DSA master, which is a new
requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link
interfaces with the DSA master to get rid of lockdep warnings").
So introduce a ->shutdown method in the MDIO device driver structure.
Signed-off-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/net/phy/mdio_device.c | 11 +++++++++++
include/linux/mdio.h | 3 +++
2 files changed, 14 insertions(+)
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index c1d345c3cab3..b2dd293fc87e 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -180,6 +180,16 @@ static int mdio_remove(struct device *dev)
return 0;
}
+static void mdio_shutdown(struct device *dev)
+{
+ struct mdio_device *mdiodev = to_mdio_device(dev);
+ struct device_driver *drv = mdiodev->dev.driver;
+ struct mdio_driver *mdiodrv = to_mdio_driver(drv);
+
+ if (mdiodrv->shutdown)
+ mdiodrv->shutdown(mdiodev);
+}
+
/**
* mdio_driver_register - register an mdio_driver with the MDIO layer
* @new_driver: new mdio_driver to register
@@ -194,6 +204,7 @@ int mdio_driver_register(struct mdio_driver *drv)
mdiodrv->driver.bus = &mdio_bus_type;
mdiodrv->driver.probe = mdio_probe;
mdiodrv->driver.remove = mdio_remove;
+ mdiodrv->driver.shutdown = mdio_shutdown;
retval = driver_register(&mdiodrv->driver);
if (retval) {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index a7604248777b..0f1f784de80e 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -64,6 +64,9 @@ struct mdio_driver {
/* Clears up any memory if needed */
void (*remove)(struct mdio_device *mdiodev);
+
+ /* Quiesces the device on system shutdown, turns off interrupts etc */
+ void (*shutdown)(struct mdio_device *mdiodev);
};
#define to_mdio_driver(d) \
container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv)
--
2.33.0
From: Tobias Schramm <t.schramm(a)manjaro.org>
[ Upstream commit 5457773ef99f25fcc4b238ac76b68e28273250f4 ]
Previously zero length transfers submitted to the Rokchip SPI driver would
time out in the SPI layer. This happens because the SPI peripheral does
not trigger a transfer completion interrupt for zero length transfers.
Fix that by completing zero length transfers immediately at start of
transfer.
Signed-off-by: Tobias Schramm <t.schramm(a)manjaro.org>
Link: https://lore.kernel.org/r/20210827050357.165409-1-t.schramm@manjaro.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/spi/spi-rockchip.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 0aab37cd64e7..624273d0e727 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -582,6 +582,12 @@ static int rockchip_spi_transfer_one(
int ret;
bool use_dma;
+ /* Zero length transfers won't trigger an interrupt on completion */
+ if (!xfer->len) {
+ spi_finalize_current_transfer(ctlr);
+ return 1;
+ }
+
WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY));
--
2.33.0
From: Tobias Schramm <t.schramm(a)manjaro.org>
[ Upstream commit 5457773ef99f25fcc4b238ac76b68e28273250f4 ]
Previously zero length transfers submitted to the Rokchip SPI driver would
time out in the SPI layer. This happens because the SPI peripheral does
not trigger a transfer completion interrupt for zero length transfers.
Fix that by completing zero length transfers immediately at start of
transfer.
Signed-off-by: Tobias Schramm <t.schramm(a)manjaro.org>
Link: https://lore.kernel.org/r/20210827050357.165409-1-t.schramm@manjaro.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/spi/spi-rockchip.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 540861ca2ba3..553b6b9d0222 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -600,6 +600,12 @@ static int rockchip_spi_transfer_one(
int ret;
bool use_dma;
+ /* Zero length transfers won't trigger an interrupt on completion */
+ if (!xfer->len) {
+ spi_finalize_current_transfer(ctlr);
+ return 1;
+ }
+
WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY));
--
2.33.0
In commit 43c2de1002d2, we moved most HW configuration to bind(), but we
didn't move the runtime PM management. Therefore, depending on initial
boot state, runtime-PM workqueue delays, and other timing factors, we
may disable our power domain in between the hardware configuration
(bind()) and when we enable the display. This can cause us to lose
hardware state and fail to configure our display. For example:
dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
panel-innolux-p079zca ff960000.mipi.0: failed to write command 0
or:
dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
panel-kingdisplay-kd097d04 ff960000.mipi.0: failed write init cmds: -110
We should match the runtime PM to the lifetime of the bind()/unbind()
cycle.
Tested on Acer Chrometab 10 (RK3399 Gru-Scarlet), with panel drivers
built either as modules or built-in.
Side notes: it seems one is more likely to see this problem when the
panel driver is built into the kernel. I've also seen this problem
bisect down to commits that simply changed Kconfig dependencies, because
it changed the order in which driver init functions were compiled into
the kernel, and therefore the ordering and timing of built-in device
probe.
Fixes: 43c2de1002d2 ("drm/rockchip: dsi: move all lane config except LCDC mux to bind()")
Link: https://lore.kernel.org/linux-rockchip/9aedfb528600ecf871885f7293ca4207c84d…
Reported-by: <aleksandr.o.makarov(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado(a)collabora.com>
---
Changes in v2:
- Clean up pm-runtime state in error cases.
- Correct git hash for Fixes.
.../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 37 ++++++++++---------
1 file changed, 19 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index a2262bee5aa4..45676b23c019 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -773,10 +773,6 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
if (mux < 0)
return;
- pm_runtime_get_sync(dsi->dev);
- if (dsi->slave)
- pm_runtime_get_sync(dsi->slave->dev);
-
/*
* For the RK3399, the clk of grf must be enabled before writing grf
* register. And for RK3288 or other soc, this grf_clk must be NULL,
@@ -795,20 +791,10 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
clk_disable_unprepare(dsi->grf_clk);
}
-static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
-{
- struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder);
-
- if (dsi->slave)
- pm_runtime_put(dsi->slave->dev);
- pm_runtime_put(dsi->dev);
-}
-
static const struct drm_encoder_helper_funcs
dw_mipi_dsi_encoder_helper_funcs = {
.atomic_check = dw_mipi_dsi_encoder_atomic_check,
.enable = dw_mipi_dsi_encoder_enable,
- .disable = dw_mipi_dsi_encoder_disable,
};
static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
@@ -938,10 +924,14 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
put_device(second);
}
+ pm_runtime_get_sync(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_get_sync(dsi->slave->dev);
+
ret = clk_prepare_enable(dsi->pllref_clk);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret);
- return ret;
+ goto out_pm_runtime;
}
/*
@@ -953,7 +943,7 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
ret = clk_prepare_enable(dsi->grf_clk);
if (ret) {
DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret);
- return ret;
+ goto out_pm_runtime;
}
dw_mipi_dsi_rockchip_config(dsi);
@@ -965,16 +955,23 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to create drm encoder\n");
- return ret;
+ goto out_pm_runtime;
}
ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret);
- return ret;
+ goto out_pm_runtime;
}
return 0;
+
+out_pm_runtime:
+ pm_runtime_put(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_put(dsi->slave->dev);
+
+ return ret;
}
static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
@@ -989,6 +986,10 @@ static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
dw_mipi_dsi_unbind(dsi->dmd);
clk_disable_unprepare(dsi->pllref_clk);
+
+ pm_runtime_put(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_put(dsi->slave->dev);
}
static const struct component_ops dw_mipi_dsi_rockchip_ops = {
--
2.33.0.685.g46640cef36-goog
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5297cfa6bdf93e3889f78f9b482e2a595a376083 Mon Sep 17 00:00:00 2001
From: Sai Krishna Potthuri <lakshmi.sai.krishna.potthuri(a)xilinx.com>
Date: Wed, 18 Aug 2021 12:53:14 +0530
Subject: [PATCH] EDAC/synopsys: Fix wrong value type assignment for edac_mode
dimm->edac_mode contains values of type enum edac_type - not the
corresponding capability flags. Fix that.
Issue caught by Coverity check "enumerated type mixed with another
type."
[ bp: Rewrite commit message, add tags. ]
Fixes: ae9b56e3996d ("EDAC, synps: Add EDAC support for zynq ddr ecc controller")
Signed-off-by: Sai Krishna Potthuri <lakshmi.sai.krishna.potthuri(a)xilinx.com>
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Link: https://lkml.kernel.org/r/20210818072315.15149-1-shubhrajyoti.datta@xilinx.…
diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 7e7146b22c16..7d08627e738b 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -782,7 +782,7 @@ static void init_csrows(struct mem_ctl_info *mci)
for (j = 0; j < csi->nr_channels; j++) {
dimm = csi->channels[j]->dimm;
- dimm->edac_mode = EDAC_FLAG_SECDED;
+ dimm->edac_mode = EDAC_SECDED;
dimm->mtype = p_data->get_mtype(priv->baseaddr);
dimm->nr_pages = (size >> PAGE_SHIFT) / csi->nr_channels;
dimm->grain = SYNPS_EDAC_ERR_GRAIN;
This patch series is present in v5.14 and fixes warnings seen at insmod
with FTRACE and MODULE_PLTS enabled on ARM/Linux.
Changes in v3:
- resolved build error with allmodconfig enabling CONFIG_OLD_MCOUNT
Changes in v2:
- included build fix without DYNAMIC_FTRACE
- preserved Author's original name in 4.9 submission
Alex Sverdlin (4):
ARM: 9077/1: PLT: Move struct plt_entries definition to header
ARM: 9078/1: Add warn suppress parameter to arm_gen_branch_link()
ARM: 9079/1: ftrace: Add MODULE_PLTS support
ARM: 9098/1: ftrace: MODULE_PLT: Fix build problem without
DYNAMIC_FTRACE
arch/arm/include/asm/ftrace.h | 3 +++
arch/arm/include/asm/insn.h | 8 +++---
arch/arm/include/asm/module.h | 10 +++++++
arch/arm/kernel/ftrace.c | 50 ++++++++++++++++++++++++++++-------
arch/arm/kernel/insn.c | 19 ++++++-------
arch/arm/kernel/module-plts.c | 49 ++++++++++++++++++++++++++--------
6 files changed, 105 insertions(+), 34 deletions(-)
--
2.25.1
This patch series is present in v5.14 and fixes warnings seen at insmod
with FTRACE and MODULE_PLTS enabled on ARM/Linux.
Changes in v3:
- resolved build error with allmodconfig enabling CONFIG_OLD_MCOUNT
Changes in v2:
- included build fix without DYNAMIC_FTRACE
- preserved Author's original name in 4.9 submission
Alex Sverdlin (4):
ARM: 9077/1: PLT: Move struct plt_entries definition to header
ARM: 9078/1: Add warn suppress parameter to arm_gen_branch_link()
ARM: 9079/1: ftrace: Add MODULE_PLTS support
ARM: 9098/1: ftrace: MODULE_PLT: Fix build problem without
DYNAMIC_FTRACE
arch/arm/include/asm/ftrace.h | 3 +++
arch/arm/include/asm/insn.h | 8 +++---
arch/arm/include/asm/module.h | 10 +++++++
arch/arm/kernel/ftrace.c | 50 ++++++++++++++++++++++++++++-------
arch/arm/kernel/insn.c | 19 ++++++-------
arch/arm/kernel/module-plts.c | 49 ++++++++++++++++++++++++++--------
6 files changed, 105 insertions(+), 34 deletions(-)
--
2.25.1
This simply adds proper support for panel backlights that can be controlled
via VESA's backlight control protocol, but which also require that we
enable and disable the backlight via PWM instead of via the DPCD interface.
We also enable this by default, in order to fix some people's backlights
that were broken by not having this enabled.
For reference, backlights that require this and use VESA's backlight
interface tend to be laptops with hybrid GPUs, but this very well may
change in the future.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Link: https://gitlab.freedesktop.org/drm/intel/-/issues/3680
Fixes: fe7d52bccab6 ("drm/i915/dp: Don't use DPCD backlights that need PWM enable/disable")
Cc: <stable(a)vger.kernel.org> # v5.12+
---
.../drm/i915/display/intel_dp_aux_backlight.c | 24 ++++++++++++++-----
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 569d17b4d00f..594fdc7453ca 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -293,6 +293,10 @@ intel_dp_aux_vesa_enable_backlight(const struct intel_crtc_state *crtc_state,
struct intel_panel *panel = &connector->panel;
struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
+ if (!panel->backlight.edp.vesa.info.aux_enable)
+ panel->backlight.pwm_funcs->enable(crtc_state, conn_state,
+ panel->backlight.pwm_level_max);
+
drm_edp_backlight_enable(&intel_dp->aux, &panel->backlight.edp.vesa.info, level);
}
@@ -304,6 +308,10 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state
struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info);
+
+ if (!panel->backlight.edp.vesa.info.aux_enable)
+ panel->backlight.pwm_funcs->disable(old_conn_state,
+ intel_backlight_invert_pwm_level(connector, 0));
}
static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe)
@@ -321,6 +329,15 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector,
if (ret < 0)
return ret;
+ if (!panel->backlight.edp.vesa.info.aux_enable) {
+ ret = panel->backlight.pwm_funcs->setup(connector, pipe);
+ if (ret < 0) {
+ drm_err(&i915->drm,
+ "Failed to setup PWM backlight controls for eDP backlight: %d\n",
+ ret);
+ return ret;
+ }
+ }
panel->backlight.max = panel->backlight.edp.vesa.info.max;
panel->backlight.min = 0;
if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) {
@@ -340,12 +357,7 @@ intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector)
struct intel_dp *intel_dp = intel_attached_dp(connector);
struct drm_i915_private *i915 = dp_to_i915(intel_dp);
- /* TODO: We currently only support AUX only backlight configurations, not backlights which
- * require a mix of PWM and AUX controls to work. In the mean time, these machines typically
- * work just fine using normal PWM controls anyway.
- */
- if ((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) &&
- drm_edp_backlight_supported(intel_dp->edp_dpcd)) {
+ if (drm_edp_backlight_supported(intel_dp->edp_dpcd)) {
drm_dbg_kms(&i915->drm, "AUX Backlight Control Supported!\n");
return true;
}
--
2.31.1
On GFX9+, format modifiers are always enabled and ensure the
frame-buffers can be scanned out at ADDFB2 time.
On GFX8-, format modifiers are not supported and no other check
is performed. This means ADDFB2 IOCTLs will succeed even if the
tiling isn't supported for scan-out, and will result in garbage
displayed on screen [1].
Fix this by adding a check for tiling flags for GFX8 and older.
The check is taken from radeonsi in Mesa (see how is_displayable
is populated in gfx6_compute_surface).
Changes in v2: use drm_WARN_ONCE instead of drm_WARN (Michel)
[1]: https://github.com/swaywm/wlroots/issues/3185
Signed-off-by: Simon Ser <contact(a)emersion.fr>
Cc: stable(a)vger.kernel.org
Acked-by: Michel Dänzer <mdaenzer(a)redhat.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Harry Wentland <hwentlan(a)amd.com>
Cc: Nicholas Kazlauskas <Nicholas.Kazlauskas(a)amd.com>
Cc: Bas Nieuwenhuizen <bas(a)basnieuwenhuizen.nl>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 31 +++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 58bfc7f00d76..5faf3ef28080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -837,6 +837,28 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
return 0;
}
+/* Mirrors the is_displayable check in radeonsi's gfx6_compute_surface */
+static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
+{
+ u64 micro_tile_mode;
+
+ /* Zero swizzle mode means linear */
+ if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0)
+ return 0;
+
+ micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
+ switch (micro_tile_mode) {
+ case 0: /* DISPLAY */
+ case 3: /* RENDER */
+ return 0;
+ default:
+ drm_dbg_kms(afb->base.dev,
+ "Micro tile mode %llu not supported for scanout\n",
+ micro_tile_mode);
+ return -EINVAL;
+ }
+}
+
static void get_block_dimensions(unsigned int block_log2, unsigned int cpp,
unsigned int *width, unsigned int *height)
{
@@ -1103,6 +1125,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
const struct drm_mode_fb_cmd2 *mode_cmd,
struct drm_gem_object *obj)
{
+ struct amdgpu_device *adev = drm_to_adev(dev);
int ret, i;
/*
@@ -1122,6 +1145,14 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
if (ret)
return ret;
+ if (!dev->mode_config.allow_fb_modifiers) {
+ drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
+ "GFX9+ requires FB check based on format modifier\n");
+ ret = check_tiling_flags_gfx6(rfb);
+ if (ret)
+ return ret;
+ }
+
if (dev->mode_config.allow_fb_modifiers &&
!(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
ret = convert_tiling_flags_to_modifier(rfb);
--
2.33.0
In commit 59eb7193bef2, we moved most HW configuration to bind(), but we
didn't move the runtime PM management. Therefore, depending on initial
boot state, runtime-PM workqueue delays, and other timing factors, we
may disable our power domain in between the hardware configuration
(bind()) and when we enable the display. This can cause us to lose
hardware state and fail to configure our display. For example:
dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
panel-innolux-p079zca ff960000.mipi.0: failed to write command 0
or:
dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
panel-kingdisplay-kd097d04 ff960000.mipi.0: failed write init cmds: -110
We should match the runtime PM to the lifetime of the bind()/unbind()
cycle.
Tested on Acer Chrometab 10 (RK3399 Gru-Scarlet), with panel drivers
built either as modules or built-in.
Side notes: it seems one is more likely to see this problem when the
panel driver is built into the kernel. I've also seen this problem
bisect down to commits that simply changed Kconfig dependencies, because
it changed the order in which driver init functions were compiled into
the kernel, and therefore the ordering and timing of built-in device
probe.
Fixes: 59eb7193bef2 ("drm/rockchip: dsi: move all lane config except LCDC mux to bind()")
Link: https://lore.kernel.org/linux-rockchip/9aedfb528600ecf871885f7293ca4207c84d…
Reported-by: <aleksandr.o.makarov(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
---
.../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 22 +++++++------------
1 file changed, 8 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index a2262bee5aa4..4340a99edb97 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -773,10 +773,6 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
if (mux < 0)
return;
- pm_runtime_get_sync(dsi->dev);
- if (dsi->slave)
- pm_runtime_get_sync(dsi->slave->dev);
-
/*
* For the RK3399, the clk of grf must be enabled before writing grf
* register. And for RK3288 or other soc, this grf_clk must be NULL,
@@ -795,20 +791,10 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
clk_disable_unprepare(dsi->grf_clk);
}
-static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
-{
- struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder);
-
- if (dsi->slave)
- pm_runtime_put(dsi->slave->dev);
- pm_runtime_put(dsi->dev);
-}
-
static const struct drm_encoder_helper_funcs
dw_mipi_dsi_encoder_helper_funcs = {
.atomic_check = dw_mipi_dsi_encoder_atomic_check,
.enable = dw_mipi_dsi_encoder_enable,
- .disable = dw_mipi_dsi_encoder_disable,
};
static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
@@ -938,6 +924,10 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
put_device(second);
}
+ pm_runtime_get_sync(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_get_sync(dsi->slave->dev);
+
ret = clk_prepare_enable(dsi->pllref_clk);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret);
@@ -989,6 +979,10 @@ static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
dw_mipi_dsi_unbind(dsi->dmd);
clk_disable_unprepare(dsi->pllref_clk);
+
+ pm_runtime_put(dsi->dev);
+ if (dsi->slave)
+ pm_runtime_put(dsi->slave->dev);
}
static const struct component_ops dw_mipi_dsi_rockchip_ops = {
--
2.33.0.685.g46640cef36-goog
When updating the host's mask for its MSR_IA32_TSX_CTRL user return entry,
clear the mask in the found uret MSR instead of vmx->guest_uret_msrs[i].
Modifying guest_uret_msrs directly is completely broken as 'i' does not
point at the MSR_IA32_TSX_CTRL entry. In fact, it's guaranteed to be an
out-of-bounds accesses as is always set to kvm_nr_uret_msrs in a prior
loop. By sheer dumb luck, the fallout is limited to "only" failing to
preserve the host's TSX_CTRL_CPUID_CLEAR. The out-of-bounds access is
benign as it's guaranteed to clear a bit in a guest MSR value, which are
always zero at vCPU creation on both x86-64 and i386.
Cc: stable(a)vger.kernel.org
Fixes: 8ea8b8d6f869 ("KVM: VMX: Use common x86's uret MSR list as the one true list")
Signed-off-by: Zhenzhong Duan <zhenzhong.duan(a)intel.com>
Reviewed-by: Sean Christopherson <seanjc(a)google.com>
---
arch/x86/kvm/vmx/vmx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0c2c0d5ae873..cbf3d33432b9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6833,7 +6833,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
*/
tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
if (tsx_ctrl)
- vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+ tsx_ctrl->mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
}
err = alloc_loaded_vmcs(&vmx->vmcs01);
--
2.25.1
On GFX9+, format modifiers are always enabled and ensure the
frame-buffers can be scanned out at ADDFB2 time.
On GFX8-, format modifiers are not supported and no other check
is performed. This means ADDFB2 IOCTLs will succeed even if the
tiling isn't supported for scan-out, and will result in garbage
displayed on screen [1].
Fix this by adding a check for tiling flags for GFX8 and older.
The check is taken from radeonsi in Mesa (see how is_displayable
is populated in gfx6_compute_surface).
[1]: https://github.com/swaywm/wlroots/issues/3185
Signed-off-by: Simon Ser <contact(a)emersion.fr>
Cc: stable(a)vger.kernel.org
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Harry Wentland <hwentlan(a)amd.com>
Cc: Nicholas Kazlauskas <Nicholas.Kazlauskas(a)amd.com>
Cc: Michel Dänzer <michel(a)daenzer.net>
Cc: Bas Nieuwenhuizen <bas(a)basnieuwenhuizen.nl>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 31 +++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 58bfc7f00d76..dfe434a56a8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -837,6 +837,28 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
return 0;
}
+/* Mirrors the is_displayable check in radeonsi's gfx6_compute_surface */
+static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
+{
+ u64 micro_tile_mode;
+
+ /* Zero swizzle mode means linear */
+ if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0)
+ return 0;
+
+ micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
+ switch (micro_tile_mode) {
+ case 0: /* DISPLAY */
+ case 3: /* RENDER */
+ return 0;
+ default:
+ drm_dbg_kms(afb->base.dev,
+ "Micro tile mode %llu not supported for scanout\n",
+ micro_tile_mode);
+ return -EINVAL;
+ }
+}
+
static void get_block_dimensions(unsigned int block_log2, unsigned int cpp,
unsigned int *width, unsigned int *height)
{
@@ -1103,6 +1125,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
const struct drm_mode_fb_cmd2 *mode_cmd,
struct drm_gem_object *obj)
{
+ struct amdgpu_device *adev = drm_to_adev(dev);
int ret, i;
/*
@@ -1122,6 +1145,14 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
if (ret)
return ret;
+ if (!dev->mode_config.allow_fb_modifiers) {
+ drm_WARN(dev, adev->family >= AMDGPU_FAMILY_AI,
+ "GFX9+ requires FB check based on format modifier\n");
+ ret = check_tiling_flags_gfx6(rfb);
+ if (ret)
+ return ret;
+ }
+
if (dev->mode_config.allow_fb_modifiers &&
!(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
ret = convert_tiling_flags_to_modifier(rfb);
base-commit: cdccf1ffe1a37f5eb84a2d2096abe1dd6dbeeec3
--
2.33.0
From: Johannes Berg <johannes.berg(a)intel.com>
Even with the previous commit 27af8e2c90fb
("leds: trigger: fix potential deadlock with libata")
to this file, we still get lockdep unhappy, and Boqun
explained the report here:
https://lore.kernel.org/r/YNA+d1X4UkoQ7g8a@boqun-archlinux
Effectively, this means that the read_lock_irqsave() isn't
enough here because another CPU might be trying to do a
write lock, and thus block the readers.
This is all pretty messy, but it doesn't seem right that
the LEDs framework imposes some locking requirements on
users, in particular we'd have to make the spinlock in the
iwlwifi driver always disable IRQs, even if we don't need
that for any other reason, just to avoid this deadlock.
Since writes to the led_cdevs list are rare (and are done
by userspace), just switch the list to RCU. This costs a
synchronize_rcu() at removal time so we can ensure things
are correct, but that seems like a small price to pay for
getting lock-free iterations and no deadlocks (nor any
locking requirements imposed on users.)
Cc: stable(a)vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
---
drivers/leds/led-triggers.c | 41 +++++++++++++++++++------------------
include/linux/leds.h | 2 +-
2 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 4e7b78a84149..072491d3e17b 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -157,7 +157,6 @@ EXPORT_SYMBOL_GPL(led_trigger_read);
/* Caller must ensure led_cdev->trigger_lock held */
int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
{
- unsigned long flags;
char *event = NULL;
char *envp[2];
const char *name;
@@ -171,10 +170,13 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
/* Remove any existing trigger */
if (led_cdev->trigger) {
- write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
- list_del(&led_cdev->trig_list);
- write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock,
- flags);
+ spin_lock(&led_cdev->trigger->leddev_list_lock);
+ list_del_rcu(&led_cdev->trig_list);
+ spin_unlock(&led_cdev->trigger->leddev_list_lock);
+
+ /* ensure it's no longer visible on the led_cdevs list */
+ synchronize_rcu();
+
cancel_work_sync(&led_cdev->set_brightness_work);
led_stop_software_blink(led_cdev);
if (led_cdev->trigger->deactivate)
@@ -186,9 +188,9 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
led_set_brightness(led_cdev, LED_OFF);
}
if (trig) {
- write_lock_irqsave(&trig->leddev_list_lock, flags);
- list_add_tail(&led_cdev->trig_list, &trig->led_cdevs);
- write_unlock_irqrestore(&trig->leddev_list_lock, flags);
+ spin_lock(&trig->leddev_list_lock);
+ list_add_tail_rcu(&led_cdev->trig_list, &trig->led_cdevs);
+ spin_unlock(&trig->leddev_list_lock);
led_cdev->trigger = trig;
if (trig->activate)
@@ -223,9 +225,10 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
trig->deactivate(led_cdev);
err_activate:
- write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
- list_del(&led_cdev->trig_list);
- write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, flags);
+ spin_lock(&led_cdev->trigger->leddev_list_lock);
+ list_del_rcu(&led_cdev->trig_list);
+ spin_unlock(&led_cdev->trigger->leddev_list_lock);
+ synchronize_rcu();
led_cdev->trigger = NULL;
led_cdev->trigger_data = NULL;
led_set_brightness(led_cdev, LED_OFF);
@@ -285,7 +288,7 @@ int led_trigger_register(struct led_trigger *trig)
struct led_classdev *led_cdev;
struct led_trigger *_trig;
- rwlock_init(&trig->leddev_list_lock);
+ spin_lock_init(&trig->leddev_list_lock);
INIT_LIST_HEAD(&trig->led_cdevs);
down_write(&triggers_list_lock);
@@ -378,15 +381,14 @@ void led_trigger_event(struct led_trigger *trig,
enum led_brightness brightness)
{
struct led_classdev *led_cdev;
- unsigned long flags;
if (!trig)
return;
- read_lock_irqsave(&trig->leddev_list_lock, flags);
- list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list)
led_set_brightness(led_cdev, brightness);
- read_unlock_irqrestore(&trig->leddev_list_lock, flags);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(led_trigger_event);
@@ -397,20 +399,19 @@ static void led_trigger_blink_setup(struct led_trigger *trig,
int invert)
{
struct led_classdev *led_cdev;
- unsigned long flags;
if (!trig)
return;
- read_lock_irqsave(&trig->leddev_list_lock, flags);
- list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) {
if (oneshot)
led_blink_set_oneshot(led_cdev, delay_on, delay_off,
invert);
else
led_blink_set(led_cdev, delay_on, delay_off);
}
- read_unlock_irqrestore(&trig->leddev_list_lock, flags);
+ rcu_read_unlock();
}
void led_trigger_blink(struct led_trigger *trig,
diff --git a/include/linux/leds.h b/include/linux/leds.h
index a0b730be40ad..ba4861ec73d3 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -360,7 +360,7 @@ struct led_trigger {
struct led_hw_trigger_type *trigger_type;
/* LEDs under control by this trigger (for simple triggers) */
- rwlock_t leddev_list_lock;
+ spinlock_t leddev_list_lock;
struct list_head led_cdevs;
/* Link to next registered trigger */
--
2.31.1
This patch series is present in v5.15 and fixes the problem that the
timeout value calculated by tcp_model_timeout() is not accurate.
Pseudo-Shortlog of commits:
Eric Dumazet <edumazet(a)google.com>
tcp: address problems caused by EDT misshaps
Yuchung Cheng <ycheng(a)google.com>
tcp: always set retrans_stamp on recovery
Yuchung Cheng <ycheng(a)google.com>
tcp: create a helper to model exponential backoff
Eric Dumazet <edumazet(a)google.com>
tcp: adjust rto_base in retransmits_timed_out()
net/ipv4/tcp_input.c | 16 ++++++-----
net/ipv4/tcp_output.c | 9 +++----
net/ipv4/tcp_timer.c | 63 ++++++++++++++++++++-----------------------
3 files changed, 43 insertions(+), 45 deletions(-)
--
2.19.1