From: Peng Fan peng.fan@nxp.com
[ Upstream commit 876e0b26ccd211ca92607d83c87cc1f097784c6d ]
Address the sparse check warning:
drivers/remoteproc/remoteproc_coredump.c:169:53:
sparse: warning: incorrect type in argument 2 (different address spaces) sparse: expected void const volatile [noderef] __iomem *src sparse: got void *[assigned] ptr
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Peng Fan peng.fan@nxp.com Link: https://lore.kernel.org/r/20211110032101.517487-1-peng.fan@oss.nxp.com Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/remoteproc/remoteproc_coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c index c892f433a323e..4b093420d98aa 100644 --- a/drivers/remoteproc/remoteproc_coredump.c +++ b/drivers/remoteproc/remoteproc_coredump.c @@ -166,7 +166,7 @@ static void rproc_copy_segment(struct rproc *rproc, void *dest, memset(dest, 0xff, size); } else { if (is_iomem) - memcpy_fromio(dest, ptr, size); + memcpy_fromio(dest, (void const __iomem *)ptr, size); else memcpy(dest, ptr, size); }
From: Fabien Dessenne fabien.dessenne@foss.st.com
[ Upstream commit 60630924bb5af8751adcecc896e7763c3783ca89 ]
Set the clock during probe and keep its control during suspend / resume operations. This fixes an issue when CONFIG_PM is not set and where the clock is never enabled.
Make use of devm_ functions to simplify the code.
Signed-off-by: Fabien Dessenne fabien.dessenne@foss.st.com Signed-off-by: Bjorn Andersson bjorn.andersson@linaro.org Link: https://lore.kernel.org/r/20211011135836.1045437-1-fabien.dessenne@foss.st.c... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hwspinlock/stm32_hwspinlock.c | 58 +++++++++++++++++---------- 1 file changed, 37 insertions(+), 21 deletions(-)
diff --git a/drivers/hwspinlock/stm32_hwspinlock.c b/drivers/hwspinlock/stm32_hwspinlock.c index 3ad0ce0da4d98..5bd11a7fab65d 100644 --- a/drivers/hwspinlock/stm32_hwspinlock.c +++ b/drivers/hwspinlock/stm32_hwspinlock.c @@ -54,8 +54,23 @@ static const struct hwspinlock_ops stm32_hwspinlock_ops = { .relax = stm32_hwspinlock_relax, };
+static void stm32_hwspinlock_disable_clk(void *data) +{ + struct platform_device *pdev = data; + struct stm32_hwspinlock *hw = platform_get_drvdata(pdev); + struct device *dev = &pdev->dev; + + pm_runtime_get_sync(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_put_noidle(dev); + + clk_disable_unprepare(hw->clk); +} + static int stm32_hwspinlock_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct stm32_hwspinlock *hw; void __iomem *io_base; size_t array_size; @@ -66,41 +81,43 @@ static int stm32_hwspinlock_probe(struct platform_device *pdev) return PTR_ERR(io_base);
array_size = STM32_MUTEX_NUM_LOCKS * sizeof(struct hwspinlock); - hw = devm_kzalloc(&pdev->dev, sizeof(*hw) + array_size, GFP_KERNEL); + hw = devm_kzalloc(dev, sizeof(*hw) + array_size, GFP_KERNEL); if (!hw) return -ENOMEM;
- hw->clk = devm_clk_get(&pdev->dev, "hsem"); + hw->clk = devm_clk_get(dev, "hsem"); if (IS_ERR(hw->clk)) return PTR_ERR(hw->clk);
- for (i = 0; i < STM32_MUTEX_NUM_LOCKS; i++) - hw->bank.lock[i].priv = io_base + i * sizeof(u32); + ret = clk_prepare_enable(hw->clk); + if (ret) { + dev_err(dev, "Failed to prepare_enable clock\n"); + return ret; + }
platform_set_drvdata(pdev, hw); - pm_runtime_enable(&pdev->dev);
- ret = hwspin_lock_register(&hw->bank, &pdev->dev, &stm32_hwspinlock_ops, - 0, STM32_MUTEX_NUM_LOCKS); + pm_runtime_get_noresume(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + pm_runtime_put(dev);
- if (ret) - pm_runtime_disable(&pdev->dev); + ret = devm_add_action_or_reset(dev, stm32_hwspinlock_disable_clk, pdev); + if (ret) { + dev_err(dev, "Failed to register action\n"); + return ret; + }
- return ret; -} + for (i = 0; i < STM32_MUTEX_NUM_LOCKS; i++) + hw->bank.lock[i].priv = io_base + i * sizeof(u32);
-static int stm32_hwspinlock_remove(struct platform_device *pdev) -{ - struct stm32_hwspinlock *hw = platform_get_drvdata(pdev); - int ret; + ret = devm_hwspin_lock_register(dev, &hw->bank, &stm32_hwspinlock_ops, + 0, STM32_MUTEX_NUM_LOCKS);
- ret = hwspin_lock_unregister(&hw->bank); if (ret) - dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret); - - pm_runtime_disable(&pdev->dev); + dev_err(dev, "Failed to register hwspinlock\n");
- return 0; + return ret; }
static int __maybe_unused stm32_hwspinlock_runtime_suspend(struct device *dev) @@ -135,7 +152,6 @@ MODULE_DEVICE_TABLE(of, stm32_hwpinlock_ids);
static struct platform_driver stm32_hwspinlock_driver = { .probe = stm32_hwspinlock_probe, - .remove = stm32_hwspinlock_remove, .driver = { .name = "stm32_hwspinlock", .of_match_table = stm32_hwpinlock_ids,
From: Chao Yu chao@kernel.org
[ Upstream commit 2a64e303e3051550c75897239174e399dfcb8b7e ]
For compressed inode, in .{invalidate,release}page, we will call f2fs_invalidate_compress_pages() to drop all compressed page cache of current inode.
But we don't need to drop compressed page cache synchronously in .invalidatepage, because, all trancation paths of compressed physical block has been covered with f2fs_invalidate_compress_page().
And also we don't need to drop compressed page cache synchronously in .releasepage, because, if there is out-of-memory, we can count on page cache reclaim on sbi->compress_inode.
BTW, this patch may fix the issue reported below:
https://lore.kernel.org/linux-f2fs-devel/20211202092812.197647-1-changfengna...
Signed-off-by: Chao Yu chao@kernel.org Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/f2fs/data.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9f754aaef558b..7ee105eb6940a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3770,12 +3770,9 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
clear_page_private_gcing(page);
- if (test_opt(sbi, COMPRESS_CACHE)) { - if (f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(sbi, inode->i_ino); - if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) - clear_page_private_data(page); - } + if (test_opt(sbi, COMPRESS_CACHE) && + inode->i_ino == F2FS_COMPRESS_INO(sbi)) + clear_page_private_data(page);
if (page_private_atomic(page)) return f2fs_drop_inmem_page(inode, page); @@ -3795,12 +3792,9 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 0;
if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct inode *inode = page->mapping->host;
- if (f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(sbi, inode->i_ino); - if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) + if (inode->i_ino == F2FS_COMPRESS_INO(F2FS_I_SB(inode))) clear_page_private_data(page); }
From: Geert Uytterhoeven geert@linux-m68k.org
[ Upstream commit 9d7b3078628f591e4007210c0d5d3f94805cff55 ]
"make dtbs_check" reports:
arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dt.yaml: soc: refclk: {'compatible': ['fixed-clock'], '#clock-cells': [[0]], 'clock-frequency': [[600000000]], 'clock-output-names': ['msspllclk'], 'phandle': [[7]]} should not be valid under {'type': 'object'} From schema: dtschema/schemas/simple-bus.yaml
Fix this by moving the node out of the "soc" subnode. While at it, rename it to "msspllclk", and drop the now superfluous "clock-output-names" property. Move the actual clock-frequency value to the board DTS, since it is not set until bitstream programming time.
Signed-off-by: Geert Uytterhoeven geert@linux-m68k.org Acked-by: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com Reviewed-by: Conor Dooley conor.dooley@microchip.com Tested-by: Conor Dooley conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt palmer@rivosinc.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../boot/dts/microchip/microchip-mpfs-icicle-kit.dts | 4 ++++ arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 12 +++++------- 2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index fc1e5869df1b9..0c748ae1b0068 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -35,6 +35,10 @@ memory@80000000 { }; };
+&refclk { + clock-frequency = <600000000>; +}; + &serial0 { status = "okay"; }; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi index c9f6d205d2ba1..393f18cdbb346 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -142,6 +142,11 @@ cpu4_intc: interrupt-controller { }; };
+ refclk: msspllclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + }; + soc { #address-cells = <2>; #size-cells = <2>; @@ -191,13 +196,6 @@ dma@3000000 { #dma-cells = <1>; };
- refclk: refclk { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <600000000>; - clock-output-names = "msspllclk"; - }; - clkcfg: clkcfg@20002000 { compatible = "microchip,mpfs-clkcfg"; reg = <0x0 0x20002000 0x0 0x1000>;
From: Hyunchul Lee hyc.lee@gmail.com
[ Upstream commit 99b7650ac51847e81b4d5139824e321e6cb76130 ]
if CONFIG_LOCKDEP is enabled, the following kernel warning message is generated because rdma_accept() checks whehter the handler_mutex is held by lockdep_assert_held. CM(Connection Manager) holds the mutex before CM handler callback is called.
[ 63.211405 ] WARNING: CPU: 1 PID: 345 at drivers/infiniband/core/cma.c:4405 rdma_accept+0x17a/0x350 [ 63.212080 ] RIP: 0010:rdma_accept+0x17a/0x350 ... [ 63.214036 ] Call Trace: [ 63.214098 ] <TASK> [ 63.214185 ] smb_direct_accept_client+0xb4/0x170 [ksmbd] [ 63.214412 ] smb_direct_prepare+0x322/0x8c0 [ksmbd] [ 63.214555 ] ? rcu_read_lock_sched_held+0x3a/0x70 [ 63.214700 ] ksmbd_conn_handler_loop+0x63/0x270 [ksmbd] [ 63.214826 ] ? ksmbd_conn_alive+0x80/0x80 [ksmbd] [ 63.214952 ] kthread+0x171/0x1a0 [ 63.215039 ] ? set_kthread_struct+0x40/0x40 [ 63.215128 ] ret_from_fork+0x22/0x30
To avoid this, move creating a queue pair and accepting a client from transport_ops->prepare() to smb_direct_handle_connect_request().
Acked-by: Namjae Jeon linkinjeon@kernel.org Signed-off-by: Hyunchul Lee hyc.lee@gmail.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ksmbd/transport_rdma.c | 102 ++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 43 deletions(-)
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 7e57cbb0bb356..1d28175b90158 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -555,6 +555,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) } t->negotiation_requested = true; t->full_packet_received = true; + enqueue_reassembly(t, recvmsg, 0); wake_up_interruptible(&t->wait_status); break; case SMB_DIRECT_MSG_DATA_TRANSFER: { @@ -1581,19 +1582,13 @@ static int smb_direct_accept_client(struct smb_direct_transport *t) pr_err("error at rdma_accept: %d\n", ret); return ret; } - - wait_event_interruptible(t->wait_status, - t->status != SMB_DIRECT_CS_NEW); - if (t->status != SMB_DIRECT_CS_CONNECTED) - return -ENOTCONN; return 0; }
-static int smb_direct_negotiate(struct smb_direct_transport *t) +static int smb_direct_prepare_negotiation(struct smb_direct_transport *t) { int ret; struct smb_direct_recvmsg *recvmsg; - struct smb_direct_negotiate_req *req;
recvmsg = get_free_recvmsg(t); if (!recvmsg) @@ -1603,44 +1598,20 @@ static int smb_direct_negotiate(struct smb_direct_transport *t) ret = smb_direct_post_recv(t, recvmsg); if (ret) { pr_err("Can't post recv: %d\n", ret); - goto out; + goto out_err; }
t->negotiation_requested = false; ret = smb_direct_accept_client(t); if (ret) { pr_err("Can't accept client\n"); - goto out; + goto out_err; }
smb_direct_post_recv_credits(&t->post_recv_credits_work.work); - - ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); - ret = wait_event_interruptible_timeout(t->wait_status, - t->negotiation_requested || - t->status == SMB_DIRECT_CS_DISCONNECTED, - SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); - if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) { - ret = ret < 0 ? ret : -ETIMEDOUT; - goto out; - } - - ret = smb_direct_check_recvmsg(recvmsg); - if (ret == -ECONNABORTED) - goto out; - - req = (struct smb_direct_negotiate_req *)recvmsg->packet; - t->max_recv_size = min_t(int, t->max_recv_size, - le32_to_cpu(req->preferred_send_size)); - t->max_send_size = min_t(int, t->max_send_size, - le32_to_cpu(req->max_receive_size)); - t->max_fragmented_send_size = - le32_to_cpu(req->max_fragmented_size); - - ret = smb_direct_send_negotiate_response(t, ret); -out: - if (recvmsg) - put_recvmsg(t, recvmsg); + return 0; +out_err: + put_recvmsg(t, recvmsg); return ret; }
@@ -1877,6 +1848,47 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, static int smb_direct_prepare(struct ksmbd_transport *t) { struct smb_direct_transport *st = smb_trans_direct_transfort(t); + struct smb_direct_recvmsg *recvmsg; + struct smb_direct_negotiate_req *req; + int ret; + + ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n"); + ret = wait_event_interruptible_timeout(st->wait_status, + st->negotiation_requested || + st->status == SMB_DIRECT_CS_DISCONNECTED, + SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ); + if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED) + return ret < 0 ? ret : -ETIMEDOUT; + + recvmsg = get_first_reassembly(st); + if (!recvmsg) + return -ECONNABORTED; + + ret = smb_direct_check_recvmsg(recvmsg); + if (ret == -ECONNABORTED) + goto out; + + req = (struct smb_direct_negotiate_req *)recvmsg->packet; + st->max_recv_size = min_t(int, st->max_recv_size, + le32_to_cpu(req->preferred_send_size)); + st->max_send_size = min_t(int, st->max_send_size, + le32_to_cpu(req->max_receive_size)); + st->max_fragmented_send_size = + le32_to_cpu(req->max_fragmented_size); + + ret = smb_direct_send_negotiate_response(st, ret); +out: + spin_lock_irq(&st->reassembly_queue_lock); + st->reassembly_queue_length--; + list_del(&recvmsg->list); + spin_unlock_irq(&st->reassembly_queue_lock); + put_recvmsg(st, recvmsg); + + return ret; +} + +static int smb_direct_connect(struct smb_direct_transport *st) +{ int ret; struct ib_qp_cap qp_cap;
@@ -1898,13 +1910,11 @@ static int smb_direct_prepare(struct ksmbd_transport *t) return ret; }
- ret = smb_direct_negotiate(st); + ret = smb_direct_prepare_negotiation(st); if (ret) { pr_err("Can't negotiate: %d\n", ret); return ret; } - - st->status = SMB_DIRECT_CS_CONNECTED; return 0; }
@@ -1920,6 +1930,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs) static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) { struct smb_direct_transport *t; + int ret;
if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) { ksmbd_debug(RDMA, @@ -1932,18 +1943,23 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id) if (!t) return -ENOMEM;
+ ret = smb_direct_connect(t); + if (ret) + goto out_err; + KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop, KSMBD_TRANS(t)->conn, "ksmbd:r%u", SMB_DIRECT_PORT); if (IS_ERR(KSMBD_TRANS(t)->handler)) { - int ret = PTR_ERR(KSMBD_TRANS(t)->handler); - + ret = PTR_ERR(KSMBD_TRANS(t)->handler); pr_err("Can't start thread\n"); - free_transport(t); - return ret; + goto out_err; }
return 0; +out_err: + free_transport(t); + return ret; }
static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
From: Hans de Goede hdegoede@redhat.com
[ Upstream commit 7f7b4236f2040d19df1ddaf30047128b41e78de7 ]
Some BIOS-es contain a bug where they add addresses which map to system RAM in the PCI host bridge window returned by the ACPI _CRS method, see commit 4dc2287c1805 ("x86: avoid E820 regions when allocating address space").
To work around this bug Linux excludes E820 reserved addresses when allocating addresses from the PCI host bridge window since 2010.
Recently (2019) some systems have shown-up with E820 reservations which cover the entire _CRS returned PCI bridge memory window, causing all attempts to assign memory to PCI BARs which have not been setup by the BIOS to fail. For example here are the relevant dmesg bits from a Lenovo IdeaPad 3 15IIL 81WE:
[mem 0x000000004bc50000-0x00000000cfffffff] reserved pci_bus 0000:00: root bus resource [mem 0x65400000-0xbfffffff window]
The ACPI specifications appear to allow this new behavior:
The relationship between E820 and ACPI _CRS is not really very clear. ACPI v6.3, sec 15, table 15-374, says AddressRangeReserved means:
This range of addresses is in use or reserved by the system and is not to be included in the allocatable memory pool of the operating system's memory manager.
and it may be used when:
The address range is in use by a memory-mapped system device.
Furthermore, sec 15.2 says:
Address ranges defined for baseboard memory-mapped I/O devices, such as APICs, are returned as reserved.
A PCI host bridge qualifies as a baseboard memory-mapped I/O device, and its apertures are in use and certainly should not be included in the general allocatable pool, so the fact that some BIOS-es reports the PCI aperture as "reserved" in E820 doesn't seem like a BIOS bug.
So it seems that the excluding of E820 reserved addresses is a mistake.
Ideally Linux would fully stop excluding E820 reserved addresses, but then the old systems this was added for will regress. Instead keep the old behavior for old systems, while ignoring the E820 reservations for any systems from now on.
Old systems are defined here as BIOS year < 2018, this was chosen to make sure that E820 reservations will not be used on the currently affected systems, while at the same time also taking into account that the systems for which the E820 checking was originally added may have received BIOS updates for quite a while (esp. CVE related ones), giving them a more recent BIOS year then 2010.
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206459 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1868899 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1871793 BugLink: https://bugs.launchpad.net/bugs/1878279 BugLink: https://bugs.launchpad.net/bugs/1931715 BugLink: https://bugs.launchpad.net/bugs/1932069 BugLink: https://bugs.launchpad.net/bugs/1921649 Reviewed-by: Mika Westerberg mika.westerberg@linux.intel.com Acked-by: Bjorn Helgaas bhelgaas@google.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/kernel/resource.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 9b9fb7882c206..9ae64f9af9568 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/dmi.h> #include <linux/ioport.h> #include <asm/e820/api.h>
@@ -23,11 +24,31 @@ static void resource_clip(struct resource *res, resource_size_t start, res->start = end + 1; }
+/* + * Some BIOS-es contain a bug where they add addresses which map to + * system RAM in the PCI host bridge window returned by the ACPI _CRS + * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when + * allocating address space"). To avoid this Linux by default excludes + * E820 reservations when allocating addresses since 2010. + * In 2019 some systems have shown-up with E820 reservations which cover + * the entire _CRS returned PCI host bridge window, causing all attempts + * to assign memory to PCI BARs to fail if Linux uses E820 reservations. + * + * Ideally Linux would fully stop using E820 reservations, but then + * the old systems this was added for will regress. + * Instead keep the old behavior for old systems, while ignoring the + * E820 reservations for any systems from now on. + */ static void remove_e820_regions(struct resource *avail) { - int i; + int i, year = dmi_get_bios_year(); struct e820_entry *entry;
+ if (year >= 2018) + return; + + pr_info_once("PCI: Removing E820 reservations from host bridge windows\n"); + for (i = 0; i < e820_table->nr_entries; i++) { entry = &e820_table->entries[i];
From: Ignat Korchagin ignat@cloudflare.com
[ Upstream commit ed6ae5ca437d9d238117d90e95f7f2cc27da1b31 ]
While experimenting with FOU encapsulation Amir noticed that encapsulated IPv6 traffic fails to be delivered, if the peer IP address is configured locally.
It can be easily verified by creating a sit interface like below:
$ sudo ip link add name fou_test type sit remote 127.0.0.1 encap fou encap-sport auto encap-dport 1111 $ sudo ip link set fou_test up
and sending some IPv4 and IPv6 traffic to it
$ ping -I fou_test -c 1 1.1.1.1 $ ping6 -I fou_test -c 1 fe80::d0b0:dfff:fe4c:fcbc
"tcpdump -i any udp dst port 1111" will confirm that only the first IPv4 ping was encapsulated and attempted to be delivered.
This seems like a limitation: for example, in a cloud environment the "peer" service may be arbitrarily scheduled on any server within the cluster, where all nodes are trying to send encapsulated traffic. And the unlucky node will not be able to. Moreover, delivering encapsulated IPv4 traffic locally is allowed.
But I may not have all the context about this restriction and this code predates the observable git history.
Reported-by: Amir Razmjou arazmjou@cloudflare.com Signed-off-by: Ignat Korchagin ignat@cloudflare.com Reviewed-by: David Ahern dsahern@kernel.org Link: https://lore.kernel.org/r/20220107123842.211335-1-ignat@cloudflare.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8a3618a30632a..72968d4188b93 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -956,7 +956,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); }
- if (rt->rt_type != RTN_UNICAST) { + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { ip_rt_put(rt); dev->stats.tx_carrier_errors++; goto tx_error_icmp;
From: Jeff Layton jlayton@kernel.org
[ Upstream commit 0078ea3b0566e3da09ae8e1e4fbfd708702f2876 ]
玮文 胡 reported seeing the WARN_RATELIMIT pop when writing to an inode that had been transplanted into the stray dir. The client was trying to look up the quotarealm info from the parent and that tripped the warning.
Change the ceph_vino_is_reserved helper to not throw a warning for MDS stray directories (0x100 - 0x1ff), only for reserved dirs that are not in that range.
Also, fix ceph_has_realms_with_quotas to return false when encountering a reserved inode.
URL: https://tracker.ceph.com/issues/53180 Reported-by: Hu Weiwen sehuww@mail.scut.edu.cn Signed-off-by: Jeff Layton jlayton@kernel.org Reviewed-by: Luis Henriques lhenriques@suse.de Reviewed-by: Xiubo Li xiubli@redhat.com Signed-off-by: Ilya Dryomov idryomov@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ceph/quota.c | 3 +++ fs/ceph/super.h | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 620c691af40e7..d1158c40bb0c6 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode) /* if root is the real CephFS root, we don't have quota realms */ if (root && ceph_ino(root) == CEPH_INO_ROOT) return false; + /* MDS stray dirs have no quota realms */ + if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino)) + return false; /* otherwise, we can't know for sure */ return true; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ac331aa07cfa5..83b0b493016e1 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -535,19 +535,23 @@ static inline int ceph_ino_compare(struct inode *inode, void *data) * * These come from src/mds/mdstypes.h in the ceph sources. */ -#define CEPH_MAX_MDS 0x100 -#define CEPH_NUM_STRAY 10 +#define CEPH_MAX_MDS 0x100 +#define CEPH_NUM_STRAY 10 #define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS) +#define CEPH_MDS_INO_LOG_OFFSET (2 * CEPH_MAX_MDS) #define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
static inline bool ceph_vino_is_reserved(const struct ceph_vino vino) { - if (vino.ino < CEPH_INO_SYSTEM_BASE && - vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) { - WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino); - return true; - } - return false; + if (vino.ino >= CEPH_INO_SYSTEM_BASE || + vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET) + return false; + + /* Don't warn on mdsdirs */ + WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET, + "Attempt to access reserved inode number 0x%llx", + vino.ino); + return true; }
static inline struct inode *ceph_find_inode(struct super_block *sb,
From: Wen Gu guwen@linux.alibaba.com
[ Upstream commit 61f434b0280ed65495831f1b6e1a5c21a90f47c6 ]
We encountered some crashes caused by the race between the access and the termination of link groups.
Here are some of panic stacks we met:
1) Race between smc_clc_wait_msg() and __smc_lgr_terminate()
BUG: kernel NULL pointer dereference, address: 00000000000002f0 Workqueue: smc_hs_wq smc_listen_work [smc] RIP: 0010:smc_clc_wait_msg+0x3eb/0x5c0 [smc] Call Trace: <TASK> ? smc_clc_send_accept+0x45/0xa0 [smc] ? smc_clc_send_accept+0x45/0xa0 [smc] smc_listen_work+0x783/0x1220 [smc] ? finish_task_switch+0xc4/0x2e0 ? process_one_work+0x1ad/0x3c0 process_one_work+0x1ad/0x3c0 worker_thread+0x4c/0x390 ? rescuer_thread+0x320/0x320 kthread+0x149/0x190 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 </TASK>
smc_listen_work() abnormal case like port error --------------------------------------------------------------- | __smc_lgr_terminate() | |- smc_conn_kill() | |- smc_lgr_unregister_conn() | |- set conn->lgr = NULL smc_clc_wait_msg() | |- access conn->lgr (panic) |
2) Race between smc_setsockopt() and __smc_lgr_terminate()
BUG: kernel NULL pointer dereference, address: 00000000000002e8 RIP: 0010:smc_setsockopt+0x17a/0x280 [smc] Call Trace: <TASK> __sys_setsockopt+0xfc/0x190 __x64_sys_setsockopt+0x20/0x30 do_syscall_64+0x34/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae </TASK>
smc_setsockopt() abnormal case like port error -------------------------------------------------------------- | __smc_lgr_terminate() | |- smc_conn_kill() | |- smc_lgr_unregister_conn() | |- set conn->lgr = NULL mod_delayed_work() | |- access conn->lgr (panic) |
There are some other panic places and they are caused by the similar reason as described above, which is accessing link group after termination, thus getting a NULL pointer or invalid resource.
Currently, there seems to be no synchronization between the link group access and a sudden termination of it. This patch tries to fix this by introducing reference count of link group and not freeing link group until reference count is zero.
Link group might be referred to by links or smc connections. So the operation to the link group reference count can be concluded as follows:
object [hold or initialized as 1] [put] ------------------------------------------------------------------- link group smc_lgr_create() smc_lgr_free() connections smc_conn_create() smc_conn_free() links smcr_link_init() smcr_link_clear()
Througth this way, we extend the life cycle of link group and ensure it is longer than the life cycle of connections and links above it, so that avoid invalid access to link group after its termination.
Signed-off-by: Wen Gu guwen@linux.alibaba.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/smc/smc.h | 1 + net/smc/smc_core.c | 60 +++++++++++++++++++++++++++++++++++++--------- net/smc/smc_core.h | 3 +++ 3 files changed, 53 insertions(+), 11 deletions(-)
diff --git a/net/smc/smc.h b/net/smc/smc.h index 1a4fc1c6c4ab6..3d0b8e300deb3 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -221,6 +221,7 @@ struct smc_connection { */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ + u8 freed : 1; /* normal termiation */ u8 out_of_sync : 1; /* out of sync with peer */ };
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a6849362f4ddd..84b89d13c3359 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -216,7 +216,6 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - conn->lgr = NULL; }
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) @@ -749,6 +748,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; + smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); @@ -803,6 +803,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&smcibdev->lnk_cnt)) wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold above */ return rc; }
@@ -841,6 +842,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->terminating = 0; lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; + refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */ mutex_init(&lgr->sndbufs_lock); mutex_init(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); @@ -1120,8 +1122,19 @@ void smc_conn_free(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr;
- if (!lgr) + if (!lgr || conn->freed) + /* Connection has never been registered in a + * link group, or has already been freed. + */ return; + + conn->freed = 1; + if (!conn->alert_token_local) + /* Connection has already unregistered from + * link group. + */ + goto lgr_put; + if (lgr->is_smcd) { if (!list_empty(&lgr->list)) smc_ism_unset_conn(conn); @@ -1138,6 +1151,8 @@ void smc_conn_free(struct smc_connection *conn)
if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); +lgr_put: + smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ }
/* unregister a link from a buf_desc */ @@ -1196,9 +1211,10 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) /* must be called under lgr->llc_conf_mutex lock */ void smcr_link_clear(struct smc_link *lnk, bool log) { + struct smc_link_group *lgr = lnk->lgr; struct smc_ib_device *smcibdev;
- if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED) + if (!lgr || lnk->state == SMC_LNK_UNUSED) return; lnk->peer_qpn = 0; smc_llc_link_clear(lnk, log); @@ -1216,6 +1232,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&smcibdev->lnk_cnt)) wake_up(&smcibdev->lnks_deleted); + smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ }
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, @@ -1280,6 +1297,21 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) __smc_lgr_free_bufs(lgr, true); }
+/* won't be freed until no one accesses to lgr anymore */ +static void __smc_lgr_free(struct smc_link_group *lgr) +{ + smc_lgr_free_bufs(lgr); + if (lgr->is_smcd) { + if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) + wake_up(&lgr->smcd->lgrs_deleted); + } else { + smc_wr_free_lgr_mem(lgr); + if (!atomic_dec_return(&lgr_cnt)) + wake_up(&lgrs_deleted); + } + kfree(lgr); +} + /* remove a link group */ static void smc_lgr_free(struct smc_link_group *lgr) { @@ -1295,19 +1327,23 @@ static void smc_lgr_free(struct smc_link_group *lgr) smc_llc_lgr_clear(lgr); }
- smc_lgr_free_bufs(lgr); destroy_workqueue(lgr->tx_wq); if (lgr->is_smcd) { smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); - if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) - wake_up(&lgr->smcd->lgrs_deleted); - } else { - smc_wr_free_lgr_mem(lgr); - if (!atomic_dec_return(&lgr_cnt)) - wake_up(&lgrs_deleted); } - kfree(lgr); + smc_lgr_put(lgr); /* theoretically last lgr_put */ +} + +void smc_lgr_hold(struct smc_link_group *lgr) +{ + refcount_inc(&lgr->refcnt); +} + +void smc_lgr_put(struct smc_link_group *lgr) +{ + if (refcount_dec_and_test(&lgr->refcnt)) + __smc_lgr_free(lgr); }
static void smc_sk_wake_ups(struct smc_sock *smc) @@ -1835,6 +1871,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) if (rc) goto out; } + smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */ + conn->freed = 0; conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index d63b08274197e..51203b16307be 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -249,6 +249,7 @@ struct smc_link_group { u8 terminating : 1;/* lgr is terminating */ u8 freeing : 1; /* lgr is being freed */
+ refcount_t refcnt; /* lgr reference count */ bool is_smcd; /* SMC-R or SMC-D */ u8 smc_version; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -470,6 +471,8 @@ struct smc_clc_msg_accept_confirm;
void smc_lgr_cleanup_early(struct smc_connection *conn); void smc_lgr_terminate_sched(struct smc_link_group *lgr); +void smc_lgr_hold(struct smc_link_group *lgr); +void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
From: Wen Gu guwen@linux.alibaba.com
[ Upstream commit 20c9398d3309d170300d67643b851fd26783af24 ]
We encountered some crashes caused by the race between SMC-R link access and link clear that triggered by abnormal link group termination, such as port error.
Here is an example of this kind of crashes:
BUG: kernel NULL pointer dereference, address: 0000000000000000 Workqueue: smc_hs_wq smc_listen_work [smc] RIP: 0010:smc_llc_flow_initiate+0x44/0x190 [smc] Call Trace: <TASK> ? __smc_buf_create+0x75a/0x950 [smc] smcr_lgr_reg_rmbs+0x2a/0xbf [smc] smc_listen_work+0xf72/0x1230 [smc] ? process_one_work+0x25c/0x600 process_one_work+0x25c/0x600 worker_thread+0x4f/0x3a0 ? process_one_work+0x600/0x600 kthread+0x15d/0x1a0 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 </TASK>
smc_listen_work() __smc_lgr_terminate() --------------------------------------------------------------- | smc_lgr_free() | |- smcr_link_clear() | |- memset(lnk, 0) smc_listen_rdma_reg() | |- smcr_lgr_reg_rmbs() | |- smc_llc_flow_initiate() | |- access lnk->lgr (panic) |
These crashes are similarly caused by clearing SMC-R link resources when some functions is still accessing to them. This patch tries to fix the issue by introducing reference count of SMC-R links and ensuring that the sensitive resources of links won't be cleared until reference count reaches zero.
The operation to the SMC-R link reference count can be concluded as follows:
object [hold or initialized as 1] [put] -------------------------------------------------------------------- links smcr_link_init() smcr_link_clear() connections smc_conn_create() smc_conn_free()
Through this way, the clear of SMC-R links is later than the free of all the smc connections above it, thus avoiding the unsafe reference to SMC-R links.
Signed-off-by: Wen Gu guwen@linux.alibaba.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/smc/smc_core.c | 52 +++++++++++++++++++++++++++++++++++----------- net/smc/smc_core.h | 4 ++++ 2 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 84b89d13c3359..f4de45ac88189 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -745,6 +745,8 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, } get_device(&lnk->smcibdev->ibdev->dev); atomic_inc(&lnk->smcibdev->lnk_cnt); + refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */ + lnk->clearing = 0; lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; @@ -994,8 +996,12 @@ void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk) { atomic_dec(&conn->lnk->conn_cnt); + /* link_hold in smc_conn_create() */ + smcr_link_put(conn->lnk); conn->lnk = to_lnk; atomic_inc(&conn->lnk->conn_cnt); + /* link_put in smc_conn_free() */ + smcr_link_hold(conn->lnk); }
struct smc_link *smc_switch_conns(struct smc_link_group *lgr, @@ -1152,6 +1158,8 @@ void smc_conn_free(struct smc_connection *conn) if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); lgr_put: + if (!lgr->is_smcd) + smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */ smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ }
@@ -1208,22 +1216,11 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk) } }
-/* must be called under lgr->llc_conf_mutex lock */ -void smcr_link_clear(struct smc_link *lnk, bool log) +static void __smcr_link_clear(struct smc_link *lnk) { struct smc_link_group *lgr = lnk->lgr; struct smc_ib_device *smcibdev;
- if (!lgr || lnk->state == SMC_LNK_UNUSED) - return; - lnk->peer_qpn = 0; - smc_llc_link_clear(lnk, log); - smcr_buf_unmap_lgr(lnk); - smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_error(lnk); - smc_wr_free_link(lnk); - smc_ib_destroy_queue_pair(lnk); - smc_ib_dealloc_protection_domain(lnk); smc_wr_free_link_mem(lnk); smc_ibdev_cnt_dec(lnk); put_device(&lnk->smcibdev->ibdev->dev); @@ -1235,6 +1232,35 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ }
+/* must be called under lgr->llc_conf_mutex lock */ +void smcr_link_clear(struct smc_link *lnk, bool log) +{ + if (!lnk->lgr || lnk->clearing || + lnk->state == SMC_LNK_UNUSED) + return; + lnk->clearing = 1; + lnk->peer_qpn = 0; + smc_llc_link_clear(lnk, log); + smcr_buf_unmap_lgr(lnk); + smcr_rtoken_clear_link(lnk); + smc_ib_modify_qp_error(lnk); + smc_wr_free_link(lnk); + smc_ib_destroy_queue_pair(lnk); + smc_ib_dealloc_protection_domain(lnk); + smcr_link_put(lnk); /* theoretically last link_put */ +} + +void smcr_link_hold(struct smc_link *lnk) +{ + refcount_inc(&lnk->refcnt); +} + +void smcr_link_put(struct smc_link *lnk) +{ + if (refcount_dec_and_test(&lnk->refcnt)) + __smcr_link_clear(lnk); +} + static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc) { @@ -1872,6 +1898,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) goto out; } smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */ + if (!conn->lgr->is_smcd) + smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */ conn->freed = 0; conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 51203b16307be..e73217f52f3dd 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -137,6 +137,8 @@ struct smc_link { u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ u8 link_idx; /* index in lgr link array */ u8 link_is_asym; /* is link asymmetric? */ + u8 clearing : 1; /* link is being cleared */ + refcount_t refcnt; /* link reference count */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ char ibname[IB_DEVICE_NAME_MAX]; /* ib device name */ @@ -504,6 +506,8 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); void smcr_link_clear(struct smc_link *lnk, bool log); +void smcr_link_hold(struct smc_link *lnk); +void smcr_link_put(struct smc_link *lnk); void smc_switch_link_and_count(struct smc_connection *conn, struct smc_link *to_lnk); int smcr_buf_map_lgr(struct smc_link *lnk);
From: Michael Ellerman mpe@ellerman.id.au
[ Upstream commit 6c8dc12cd925e5fa8c152633338b2b35c4c89258 ]
Since commit adeef3e32146 ("net: constify netdev->dev_addr") the mace driver no longer builds with various errors (pmac32_defconfig):
linux/drivers/net/ethernet/apple/mace.c: In function ‘mace_probe’: linux/drivers/net/ethernet/apple/mace.c:170:20: error: assignment of read-only location ‘*(dev->dev_addr + (sizetype)j)’ 170 | dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j]; | ^ linux/drivers/net/ethernet/apple/mace.c: In function ‘mace_reset’: linux/drivers/net/ethernet/apple/mace.c:349:32: warning: passing argument 2 of ‘__mace_set_address’ discards ‘const’ qualifier from pointer target type 349 | __mace_set_address(dev, dev->dev_addr); | ~~~^~~~~~~~~~ linux/drivers/net/ethernet/apple/mace.c:93:62: note: expected ‘void *’ but argument is of type ‘const unsigned char *’ 93 | static void __mace_set_address(struct net_device *dev, void *addr); | ~~~~~~^~~~ linux/drivers/net/ethernet/apple/mace.c: In function ‘__mace_set_address’: linux/drivers/net/ethernet/apple/mace.c:388:36: error: assignment of read-only location ‘*(dev->dev_addr + (sizetype)i)’ 388 | out_8(&mb->padr, dev->dev_addr[i] = p[i]); | ^
Fix it by making the modifications to a local macaddr variable and then passing that to eth_hw_addr_set(), as well as adding some missing const qualifiers.
Signed-off-by: Michael Ellerman mpe@ellerman.id.au Reviewed-by: Jakub Kicinski kuba@kernel.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/apple/mace.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index 4b80e3a52a199..6f8c91eb1263d 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -90,7 +90,7 @@ static void mace_set_timeout(struct net_device *dev); static void mace_tx_timeout(struct timer_list *t); static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma); static inline void mace_clean_rings(struct mace_data *mp); -static void __mace_set_address(struct net_device *dev, void *addr); +static void __mace_set_address(struct net_device *dev, const void *addr);
/* * If we can't get a skbuff when we need it, we use this area for DMA. @@ -112,6 +112,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match) struct net_device *dev; struct mace_data *mp; const unsigned char *addr; + u8 macaddr[ETH_ALEN]; int j, rev, rc = -EBUSY;
if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) { @@ -167,8 +168,9 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
rev = addr[0] == 0 && addr[1] == 0xA0; for (j = 0; j < 6; ++j) { - dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j]; + macaddr[j] = rev ? bitrev8(addr[j]): addr[j]; } + eth_hw_addr_set(dev, macaddr); mp->chipid = (in_8(&mp->mace->chipid_hi) << 8) | in_8(&mp->mace->chipid_lo);
@@ -369,11 +371,12 @@ static void mace_reset(struct net_device *dev) out_8(&mb->plscc, PORTSEL_GPSI + ENPLSIO); }
-static void __mace_set_address(struct net_device *dev, void *addr) +static void __mace_set_address(struct net_device *dev, const void *addr) { struct mace_data *mp = netdev_priv(dev); volatile struct mace __iomem *mb = mp->mace; - unsigned char *p = addr; + const unsigned char *p = addr; + u8 macaddr[ETH_ALEN]; int i;
/* load up the hardware address */ @@ -385,7 +388,10 @@ static void __mace_set_address(struct net_device *dev, void *addr) ; } for (i = 0; i < 6; ++i) - out_8(&mb->padr, dev->dev_addr[i] = p[i]); + out_8(&mb->padr, macaddr[i] = p[i]); + + eth_hw_addr_set(dev, macaddr); + if (mp->chipid != BROKEN_ADDRCHG_REV) out_8(&mb->iac, 0); }
From: Michael Ellerman mpe@ellerman.id.au
[ Upstream commit ea938248557a52e231a31f338eac4baee36a8626 ]
Since commit adeef3e32146 ("net: constify netdev->dev_addr") the bmac driver no longer builds with the following errors (pmac32_defconfig):
linux/drivers/net/ethernet/apple/bmac.c: In function ‘bmac_probe’: linux/drivers/net/ethernet/apple/bmac.c:1287:20: error: assignment of read-only location ‘*(dev->dev_addr + (sizetype)j)’ 1287 | dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j]; | ^
Fix it by making the modifications to a local macaddr variable and then passing that to eth_hw_addr_set().
We don't use the existing addr variable because the bitrev8() would mutate it, but it is already used unreversed later in the function.
Signed-off-by: Michael Ellerman mpe@ellerman.id.au Reviewed-by: Jakub Kicinski kuba@kernel.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ethernet/apple/bmac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index 9a650d1c1bdd1..4d2ba30c2fbd8 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -1237,6 +1237,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match) struct bmac_data *bp; const unsigned char *prop_addr; unsigned char addr[6]; + u8 macaddr[6]; struct net_device *dev; int is_bmac_plus = ((int)match->data) != 0;
@@ -1284,7 +1285,9 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
rev = addr[0] == 0 && addr[1] == 0xA0; for (j = 0; j < 6; ++j) - dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j]; + macaddr[j] = rev ? bitrev8(addr[j]): addr[j]; + + eth_hw_addr_set(dev, macaddr);
/* Enable chip without interrupts for now */ bmac_enable_and_reset_chip(dev);
On Sat, 22 Jan 2022 19:11:05 -0500 Sasha Levin wrote:
From: Michael Ellerman mpe@ellerman.id.au
[ Upstream commit ea938248557a52e231a31f338eac4baee36a8626 ]
Since commit adeef3e32146 ("net: constify netdev->dev_addr") the bmac driver no longer builds with the following errors (pmac32_defconfig):
linux/drivers/net/ethernet/apple/bmac.c: In function ‘bmac_probe’: linux/drivers/net/ethernet/apple/bmac.c:1287:20: error: assignment of read-only location ‘*(dev->dev_addr + (sizetype)j)’ 1287 | dev->dev_addr[j] = rev ? bitrev8(addr[j]): addr[j]; | ^
Fix it by making the modifications to a local macaddr variable and then passing that to eth_hw_addr_set().
We don't use the existing addr variable because the bitrev8() would mutate it, but it is already used unreversed later in the function.
Patches 11 and 12 are another case of prep for netdev->dev_addr being const in 5.17, we don't need those backported.
From: Eli Cohen elic@nvidia.com
[ Upstream commit 60af39c1f4cc92cc2785ef745c0c97558134d539 ]
Make sure to offer VIRTIO_NET_F_MTU since we configure the MTU based on what was queried from the device.
This allows the virtio driver to allocate large enough buffers based on the reported MTU.
Signed-off-by: Eli Cohen elic@nvidia.com Link: https://lore.kernel.org/r/20211124170949.51725-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Acked-by: Jason Wang jasowang@redhat.com Reviewed-by: Si-Wei Liu si-wei.liu@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 63813fbb5f62a..d8e69340a25ae 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1895,6 +1895,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MTU);
print_features(mvdev, ndev->mvdev.mlx_features, false); return ndev->mvdev.mlx_features;
From: 王贇 yun.wang@linux.alibaba.com
[ Upstream commit 6017599bb25c20b7a68cbb8e7d534bdc1c36b5e4 ]
The error message on the failure of pfn check should tell virtio-pci rather than virtio-mmio, just fix it.
Signed-off-by: Michael Wang yun.wang@linux.alibaba.com Suggested-by: Michael S. Tsirkin mst@redhat.com Link: https://lore.kernel.org/r/ae5e154e-ac59-f0fa-a7c7-091a2201f581@linux.alibaba... Signed-off-by: Michael S. Tsirkin mst@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/virtio/virtio_pci_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index b3f8128b7983b..34141b9abe278 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -138,7 +138,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; if (q_pfn >> 32) { dev_err(&vp_dev->pci_dev->dev, - "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n", + "platform bug: legacy virtio-pci must not be used with RAM above 0x%llxGB\n", 0x1ULL << (32 + PAGE_SHIFT - 30)); err = -E2BIG; goto out_del_vq;
From: Xianting Tian xianting.tian@linux.alibaba.com
[ Upstream commit 080063920777af65105e5953e2851e036376e3ea ]
We need free the vqs in .release(), which are allocated in .open().
Signed-off-by: Xianting Tian xianting.tian@linux.alibaba.com Link: https://lore.kernel.org/r/20211228030924.3468439-1-xianting.tian@linux.aliba... Signed-off-by: Michael S. Tsirkin mst@redhat.com Acked-by: Jason Wang jasowang@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vhost/test.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index a09dedc79f682..05740cba1cd89 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -166,6 +166,7 @@ static int vhost_test_release(struct inode *inode, struct file *f) /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_test_flush(n); + kfree(n->dev.vqs); kfree(n); return 0; }
From: Laura Abbott labbott@kernel.org
[ Upstream commit 870aaff92e959e29d40f9cfdb5ed06ba2fc2dae0 ]
The return type of get_config_size is size_t so it makes sense to change the type of the variable holding its result.
That said, this already got taken care of (differently, and arguably not as well) by commit 3ed21c1451a1 ("vdpa: check that offsets are within bounds").
The added 'c->off > size' test in that commit will be done as an unsigned comparison on 32-bit (safe due to not being signed).
On a 64-bit platform, it will be done as a signed comparison, but in that case the comparison will be done in 64-bit, and 'c->off' being an u32 it will be valid thanks to the extended range (ie both values will be positive in 64 bits).
So this was a real bug, but it was already addressed and marked for stable.
Signed-off-by: Laura Abbott labbott@kernel.org Reported-by: Luo Likang luolikang@nsfocus.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e3c4f059b21a2..c31737a1da6aa 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -195,7 +195,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v, struct vhost_vdpa_config *c) { struct vdpa_device *vdpa = v->vdpa; - long size = vdpa->config->get_config_size(vdpa); + size_t size = vdpa->config->get_config_size(vdpa);
if (c->len == 0 || c->off > size) return -EINVAL;
From: Eli Cohen elic@nvidia.com
[ Upstream commit f8ae3a489b21b05c39a0a1a7734f2a0188852177 ]
Make sure the decision whether an index received through a callback is valid or not consults the negotiated features.
The motivation for this was due to a case encountered where I shut down the VM. After the reset operation was called features were already clear, I got get_vq_state() call which caused out array bounds access since is_index_valid() reported the index value.
So this is more of not hit a bug since the call shouldn't have been made first place.
Signed-off-by: Eli Cohen elic@nvidia.com Link: https://lore.kernel.org/r/20220111183400.38418-4-elic@nvidia.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Reviewed-by: Si-Wei Liusi-wei.liu@oracle.com Acked-by: Jason Wang jasowang@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index d8e69340a25ae..68ace0ad659f2 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -138,10 +138,14 @@ struct mlx5_vdpa_virtqueue {
static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) { - if (unlikely(idx > mvdev->max_idx)) - return false; + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) + return idx < 2; + else + return idx < 3; + }
- return true; + return idx <= mvdev->max_idx; }
struct mlx5_vdpa_net {
From: Jens Axboe axboe@kernel.dk
[ Upstream commit ccbf726171b7328f800bc98005132fd77eb1a175 ]
An active work can have poll armed, hence it's not enough to just do the async work removal and return the value if it's different from "not found". Rather than make poll removal special, just fall through to do the remaining type lookups and removals.
Reported-by: Florian Fischer florian.fl.fischer@fau.de Link: https://lore.kernel.org/io-uring/20220118151337.fac6cthvbnu7icoc@pasture/ Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- fs/io_uring.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index fb2a0cb4aaf83..a958457b2af07 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6316,16 +6316,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr) WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); - if (ret != -ENOENT) - return ret; + /* + * Fall-through even for -EALREADY, as we may have poll armed + * that need unarming. + */ + if (!ret) + return 0;
spin_lock(&ctx->completion_lock); + ret = io_poll_cancel(ctx, sqe_addr, false); + if (ret != -ENOENT) + goto out; + spin_lock_irq(&ctx->timeout_lock); ret = io_timeout_cancel(ctx, sqe_addr); spin_unlock_irq(&ctx->timeout_lock); - if (ret != -ENOENT) - goto out; - ret = io_poll_cancel(ctx, sqe_addr, false); out: spin_unlock(&ctx->completion_lock); return ret;
From: OGAWA Hirofumi hirofumi@mail.parknet.co.jp
[ Upstream commit 3ee859e384d453d6ac68bfd5971f630d9fa46ad3 ]
bio_truncate() clears the buffer outside of last block of bdev, however current bio_truncate() is using the wrong offset of page. So it can return the uninitialized data.
This happened when both of truncated/corrupted FS and userspace (via bdev) are trying to read the last of bdev.
Reported-by: syzbot+ac94ae5f68b84197f41c@syzkaller.appspotmail.com Signed-off-by: OGAWA Hirofumi hirofumi@mail.parknet.co.jp Reviewed-by: Ming Lei ming.lei@redhat.com Link: https://lore.kernel.org/r/875yqt1c9g.fsf@mail.parknet.co.jp Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- block/bio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/block/bio.c b/block/bio.c index 15ab0d6d1c06e..99cad261ec531 100644 --- a/block/bio.c +++ b/block/bio.c @@ -569,7 +569,8 @@ static void bio_truncate(struct bio *bio, unsigned new_size) offset = new_size - done; else offset = 0; - zero_user(bv.bv_page, offset, bv.bv_len - offset); + zero_user(bv.bv_page, bv.bv_offset + offset, + bv.bv_len - offset); truncated = true; } done += bv.bv_len;
linux-stable-mirror@lists.linaro.org