From: Namjae Jeon linkinjeon@kernel.org
[ Upstream commit fe0fde09e1cb83effcf8fafa372533f438d93a1a ]
I found that normally it is O_NONBLOCK but there are different value for some arch.
/include/linux/net.h: #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif
/arch/alpha/include/asm/socket.h: #define SOCK_NONBLOCK 0x40000000
Use SOCK_NONBLOCK instead of O_NONBLOCK for kernel_accept().
Suggested-by: David Howells dhowells@redhat.com Signed-off-by: Namjae Jeon linkinjeon@kerne.org Reviewed-by: Hyunchul Lee hyc.lee@gmail.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ksmbd/transport_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 8fef9de787d3..143bba4e4db8 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) break; } ret = kernel_accept(iface->ksmbd_socket, &client_sk, - O_NONBLOCK); + SOCK_NONBLOCK); mutex_unlock(&iface->sock_release_lock); if (ret) { if (ret == -EAGAIN)
From: Nathan Lynch nathanl@linux.ibm.com
[ Upstream commit 19fc5bb93c6bbdce8292b4d7eed04e2fa118d2fe ]
kasan detects access beyond the end of the xibm->bitmap allocation:
BUG: KASAN: slab-out-of-bounds in _find_first_zero_bit+0x40/0x140 Read of size 8 at addr c00000001d1d0118 by task swapper/0/1
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-rc2-00001-g90df023b36dd #28 Call Trace: [c00000001d98f770] [c0000000012baab8] dump_stack_lvl+0xac/0x108 (unreliable) [c00000001d98f7b0] [c00000000068faac] print_report+0x37c/0x710 [c00000001d98f880] [c0000000006902c0] kasan_report+0x110/0x354 [c00000001d98f950] [c000000000692324] __asan_load8+0xa4/0xe0 [c00000001d98f970] [c0000000011c6ed0] _find_first_zero_bit+0x40/0x140 [c00000001d98f9b0] [c0000000000dbfbc] xive_spapr_get_ipi+0xcc/0x260 [c00000001d98fa70] [c0000000000d6d28] xive_setup_cpu_ipi+0x1e8/0x450 [c00000001d98fb30] [c000000004032a20] pSeries_smp_probe+0x5c/0x118 [c00000001d98fb60] [c000000004018b44] smp_prepare_cpus+0x944/0x9ac [c00000001d98fc90] [c000000004009f9c] kernel_init_freeable+0x2d4/0x640 [c00000001d98fd90] [c0000000000131e8] kernel_init+0x28/0x1d0 [c00000001d98fe10] [c00000000000cd54] ret_from_kernel_thread+0x5c/0x64
Allocated by task 0: kasan_save_stack+0x34/0x70 __kasan_kmalloc+0xb4/0xf0 __kmalloc+0x268/0x540 xive_spapr_init+0x4d0/0x77c pseries_init_irq+0x40/0x27c init_IRQ+0x44/0x84 start_kernel+0x2a4/0x538 start_here_common+0x1c/0x20
The buggy address belongs to the object at c00000001d1d0118 which belongs to the cache kmalloc-8 of size 8 The buggy address is located 0 bytes inside of 8-byte region [c00000001d1d0118, c00000001d1d0120)
The buggy address belongs to the physical page: page:c00c000000074740 refcount:1 mapcount:0 mapping:0000000000000000 index:0xc00000001d1d0558 pfn:0x1d1d flags: 0x7ffff000000200(slab|node=0|zone=0|lastcpupid=0x7ffff) raw: 007ffff000000200 c00000001d0003c8 c00000001d0003c8 c00000001d010480 raw: c00000001d1d0558 0000000001e1000a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected
Memory state around the buggy address: c00000001d1d0000: fc 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc c00000001d1d0080: fc fc 00 fc fc fc fc fc fc fc fc fc fc fc fc fc
c00000001d1d0100: fc fc fc 02 fc fc fc fc fc fc fc fc fc fc fc fc
^ c00000001d1d0180: fc fc fc fc 04 fc fc fc fc fc fc fc fc fc fc fc c00000001d1d0200: fc fc fc fc fc 04 fc fc fc fc fc fc fc fc fc fc
This happens because the allocation uses the wrong unit (bits) when it should pass (BITS_TO_LONGS(count) * sizeof(long)) or equivalent. With small numbers of bits, the allocated object can be smaller than sizeof(long), which results in invalid accesses.
Use bitmap_zalloc() to allocate and initialize the irq bitmap, paired with bitmap_free() for consistency.
Signed-off-by: Nathan Lynch nathanl@linux.ibm.com Reviewed-by: Cédric Le Goater clg@kaod.org Signed-off-by: Michael Ellerman mpe@ellerman.id.au Link: https://lore.kernel.org/r/20220623182509.3985625-1-nathanl@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/sysdev/xive/spapr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 503f544d28e2..b0d36e430dbc 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -13,6 +13,7 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/bitmap.h> #include <linux/cpumask.h> #include <linux/mm.h> #include <linux/delay.h> @@ -55,7 +56,7 @@ static int __init xive_irq_bitmap_add(int base, int count) spin_lock_init(&xibm->lock); xibm->base = base; xibm->count = count; - xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL); + xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL); if (!xibm->bitmap) { kfree(xibm); return -ENOMEM; @@ -73,7 +74,7 @@ static void xive_irq_bitmap_remove_all(void)
list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { list_del(&xibm->list); - kfree(xibm->bitmap); + bitmap_free(xibm->bitmap); kfree(xibm); } }
From: Eli Cohen elic@nvidia.com
[ Upstream commit ace9252446ec615cd79a5f77d90edb25c0b9d024 ]
Currently, CVQ vringh is initialized inside setup_virtqueues() which is called every time a memory update is done. This is undesirable since it resets all the context of the vring, including the available and used indices.
Move the initialization to mlx5_vdpa_set_status() when VIRTIO_CONFIG_S_DRIVER_OK is set.
Signed-off-by: Eli Cohen elic@nvidia.com Message-Id: 20220613075958.511064-2-elic@nvidia.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Acked-by: Jason Wang jasowang@redhat.com Acked-by: Eugenio Pérez eperezma@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-)
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index e0de44000d92..2baab94bf806 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1963,7 +1963,6 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_control_vq *cvq = &mvdev->cvq; int err; int i;
@@ -1973,16 +1972,6 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) goto err_vq; }
- if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { - err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, - (struct vring_desc *)(uintptr_t)cvq->desc_addr, - (struct vring_avail *)(uintptr_t)cvq->driver_addr, - (struct vring_used *)(uintptr_t)cvq->device_addr); - if (err) - goto err_vq; - } - return 0;
err_vq: @@ -2255,6 +2244,21 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) ndev->mvdev.cvq.ready = false; }
+static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_control_vq *cvq = &mvdev->cvq; + int err = 0; + + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + + return err; +} + static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2267,6 +2271,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_cvq_vring(mvdev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); + goto err_setup; + } err = setup_driver(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
From: Parav Pandit parav@nvidia.com
[ Upstream commit 0e0348ac3f0a6e6606f1aa5acb1803ada913aa3d ]
vduse devices are not backed by any real devices such as PCI. Hence it doesn't have any parent device linked to it.
Kernel driver model in [1] suggests to avoid an empty device release callback.
Hence tie the mgmtdevice object's life cycle to an allocate dummy struct device instead of static one.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Docu...
Signed-off-by: Parav Pandit parav@nvidia.com Message-Id: 20220613195223.473966-1-parav@nvidia.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Reviewed-by: Xie Yongji xieyongji@bytedance.com Acked-by: Jason Wang jasowang@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/vdpa/vdpa_user/vduse_dev.c | 60 ++++++++++++++++++------------ 1 file changed, 37 insertions(+), 23 deletions(-)
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 160e40d03084..02709f8a78bd 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1475,16 +1475,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); }
-static void vduse_mgmtdev_release(struct device *dev) -{ -} - -static struct device vduse_mgmtdev = { - .init_name = "vduse", - .release = vduse_mgmtdev_release, +struct vduse_mgmt_dev { + struct vdpa_mgmt_dev mgmt_dev; + struct device dev; };
-static struct vdpa_mgmt_dev mgmt_dev; +static struct vduse_mgmt_dev *vduse_mgmt;
static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { @@ -1509,7 +1505,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) } set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); vdev->vdpa.dma_dev = &vdev->vdpa.dev; - vdev->vdpa.mdev = &mgmt_dev; + vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
return 0; } @@ -1555,34 +1551,52 @@ static struct virtio_device_id id_table[] = { { 0 }, };
-static struct vdpa_mgmt_dev mgmt_dev = { - .device = &vduse_mgmtdev, - .id_table = id_table, - .ops = &vdpa_dev_mgmtdev_ops, -}; +static void vduse_mgmtdev_release(struct device *dev) +{ + struct vduse_mgmt_dev *mgmt_dev; + + mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); + kfree(mgmt_dev); +}
static int vduse_mgmtdev_init(void) { int ret;
- ret = device_register(&vduse_mgmtdev); - if (ret) + vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); + if (!vduse_mgmt) + return -ENOMEM; + + ret = dev_set_name(&vduse_mgmt->dev, "vduse"); + if (ret) { + kfree(vduse_mgmt); return ret; + }
- ret = vdpa_mgmtdev_register(&mgmt_dev); + vduse_mgmt->dev.release = vduse_mgmtdev_release; + + ret = device_register(&vduse_mgmt->dev); if (ret) - goto err; + goto dev_reg_err;
- return 0; -err: - device_unregister(&vduse_mgmtdev); + vduse_mgmt->mgmt_dev.id_table = id_table; + vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; + vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; + ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); + if (ret) + device_unregister(&vduse_mgmt->dev); + + return ret; + +dev_reg_err: + put_device(&vduse_mgmt->dev); return ret; }
static void vduse_mgmtdev_exit(void) { - vdpa_mgmtdev_unregister(&mgmt_dev); - device_unregister(&vduse_mgmtdev); + vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); + device_unregister(&vduse_mgmt->dev); }
static int vduse_init(void)
From: Gayatri Kammela gayatri.kammela@linux.intel.com
[ Upstream commit d63eae6747eb8b3192e89712f6553c6aa162f872 ]
Add Alder Lake N (ADL-N) to the list of the platforms that Intel's PMC core driver supports. Alder Lake N reuses all the TigerLake PCH IPs.
Cc: Srinivas Pandruvada srinivas.pandruvada@intel.com Cc: Andy Shevchenko andriy.shevchenko@linux.intel.com Cc: David E. Box david.e.box@linux.intel.com Signed-off-by: Gayatri Kammela gayatri.kammela@linux.intel.com Reviewed-by: Rajneesh Bhardwaj irenic.rajneesh@gmail.com Link: https://lore.kernel.org/r/20220615002751.3371730-1-gayatri.kammela@linux.int... Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/intel/pmc/core.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 8ee15a7252c7..c3ec5dc88bbf 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1911,6 +1911,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &icl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &tgl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &tgl_reg_map), {}
From: Stephan Gerhold stephan.gerhold@kernkonzept.com
[ Upstream commit ed7ac37fde33ccd84e4bd2b9363c191f925364c7 ]
Most virtio drivers provide freeze/restore callbacks to finish up device usage before suspend and to reinitialize the virtio device after resume. However, these callbacks are currently only called when using virtio_pci. virtio_mmio does not have any PM ops defined.
This causes problems for example after suspend to disk (hibernation), since the virtio devices might lose their state after the VMM is restarted. Calling virtio_device_freeze()/restore() ensures that the virtio devices are re-initialized correctly.
Fix this by implementing the dev_pm_ops for virtio_mmio, similar to virtio_pci_common.
Signed-off-by: Stephan Gerhold stephan.gerhold@kernkonzept.com Message-Id: 20220621110621.3638025-2-stephan.gerhold@kernkonzept.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/virtio/virtio_mmio.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 1dd396d4bebb..7522832529dd 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -62,6 +62,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/pm.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/virtio.h> @@ -543,6 +544,25 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .get_shm_region = vm_get_shm_region, };
+#ifdef CONFIG_PM_SLEEP +static int virtio_mmio_freeze(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_freeze(&vm_dev->vdev); +} + +static int virtio_mmio_restore(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_restore(&vm_dev->vdev); +} + +static const struct dev_pm_ops virtio_mmio_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore) +}; +#endif
static void virtio_mmio_release_dev(struct device *_d) { @@ -786,6 +806,9 @@ static struct platform_driver virtio_mmio_driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match), +#ifdef CONFIG_PM_SLEEP + .pm = &virtio_mmio_pm_ops, +#endif }, };
From: Stephan Gerhold stephan.gerhold@kernkonzept.com
[ Upstream commit e0c2ce8217955537dd5434baeba061f209797119 ]
Virtio devices might lose their state when the VMM is restarted after a suspend to disk (hibernation) cycle. This means that the guest page size register must be restored for the virtio_mmio legacy interface, since otherwise the virtio queues are not functional.
This is particularly problematic for QEMU that currently still defaults to using the legacy interface for virtio_mmio. Write the guest page size register again in virtio_mmio_restore() to make legacy virtio_mmio devices work correctly after hibernation.
Signed-off-by: Stephan Gerhold stephan.gerhold@kernkonzept.com Message-Id: 20220621110621.3638025-3-stephan.gerhold@kernkonzept.com Signed-off-by: Michael S. Tsirkin mst@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/virtio/virtio_mmio.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 7522832529dd..fe696aafaed8 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -556,6 +556,9 @@ static int virtio_mmio_restore(struct device *dev) { struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev);
+ if (vm_dev->version == 1) + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + return virtio_device_restore(&vm_dev->vdev); }
From: Florian Westphal fw@strlen.de
[ Upstream commit e34b9ed96ce3b06c79bf884009b16961ca478f87 ]
When verdict is NF_STOLEN, the skb might have been freed.
When tracing is enabled, this can result in a use-after-free: 1. access to skb->nf_trace 2. access to skb->mark 3. computation of trace id 4. dump of packet payload
To avoid 1, keep a cached copy of skb->nf_trace in the trace state struct. Refresh this copy whenever verdict is != STOLEN.
Avoid 2 by skipping skb->mark access if verdict is STOLEN.
3 is avoided by precomputing the trace id.
Only dump the packet when verdict is not "STOLEN".
Reported-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/net/netfilter/nf_tables.h | 16 ++++++----- net/netfilter/nf_tables_core.c | 24 ++++++++++++++--- net/netfilter/nf_tables_trace.c | 44 +++++++++++++++++-------------- 3 files changed, 55 insertions(+), 29 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 279ae0fff7ad..5c4e5a96a984 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1338,24 +1338,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; };
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 53f40e473855..3ddce24ac76d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, enum nft_trace_types type) { - const struct nft_pktinfo *pkt = info->pkt; - - if (!info->trace || !pkt->skb->nf_trace) + if (!info->trace || !info->nf_trace) return;
info->chain = chain; @@ -42,11 +40,24 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + info->nf_trace = pkt->skb->nf_trace; info->rule = rule; __nft_trace_packet(info, chain, type); } }
+static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info) +{ + if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + if (info->trace) + info->nf_trace = pkt->skb->nf_trace; + } +} + static void nft_bitwise_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { @@ -85,6 +96,7 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, const struct nft_chain *chain, const struct nft_regs *regs) { + const struct nft_pktinfo *pkt = info->pkt; enum nft_trace_types type;
switch (regs->verdict.code) { @@ -92,8 +104,13 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, case NFT_RETURN: type = NFT_TRACETYPE_RETURN; break; + case NF_STOLEN: + type = NFT_TRACETYPE_RULE; + /* can't access skb->nf_trace; use copy */ + break; default: type = NFT_TRACETYPE_RULE; + info->nf_trace = pkt->skb->nf_trace; break; }
@@ -254,6 +271,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) switch (regs.verdict.code) { case NFT_BREAK: regs.verdict.code = NFT_CONTINUE; + nft_trace_copy_nftrace(&info); continue; case NFT_CONTINUE: nft_trace_packet(&info, chain, rule, diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 5041725423c2..1163ba9c1401 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -7,7 +7,7 @@ #include <linux/module.h> #include <linux/static_key.h> #include <linux/hash.h> -#include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/skbuff.h> @@ -25,22 +25,6 @@ DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); EXPORT_SYMBOL_GPL(nft_trace_enabled);
-static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) -{ - __be32 id; - - /* using skb address as ID results in a limited number of - * values (and quick reuse). - * - * So we attempt to use as many skb members that will not - * change while skb is with netfilter. - */ - id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), - skb->skb_iif); - - return nla_put_be32(nlskb, NFTA_TRACE_ID, id); -} - static int trace_fill_header(struct sk_buff *nlskb, u16 type, const struct sk_buff *skb, int off, unsigned int len) @@ -186,6 +170,7 @@ void nft_trace_notify(struct nft_traceinfo *info) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; + u32 mark = 0; u16 event;
if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) @@ -229,7 +214,7 @@ void nft_trace_notify(struct nft_traceinfo *info) if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) goto nla_put_failure;
- if (trace_fill_id(skb, pkt->skb)) + if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) goto nla_put_failure;
if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) @@ -249,16 +234,24 @@ void nft_trace_notify(struct nft_traceinfo *info) case NFT_TRACETYPE_RULE: if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) goto nla_put_failure; + + /* pkt->skb undefined iff NF_STOLEN, disable dump */ + if (info->verdict->code == NF_STOLEN) + info->packet_dumped = true; + else + mark = pkt->skb->mark; + break; case NFT_TRACETYPE_POLICY: + mark = pkt->skb->mark; + if (nla_put_be32(skb, NFTA_TRACE_POLICY, htonl(info->basechain->policy))) goto nla_put_failure; break; }
- if (pkt->skb->mark && - nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark))) goto nla_put_failure;
if (!info->packet_dumped) { @@ -283,9 +276,20 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_chain *chain) { + static siphash_key_t trace_key __read_mostly; + struct sk_buff *skb = pkt->skb; + info->basechain = nft_base_chain(chain); info->trace = true; + info->nf_trace = pkt->skb->nf_trace; info->packet_dumped = false; info->pkt = pkt; info->verdict = verdict; + + net_get_random_once(&trace_key, sizeof(trace_key)); + + info->skbid = (u32)siphash_3u32(hash32_ptr(skb), + skb_get_hash(skb), + skb->skb_iif, + &trace_key); }
From: Florian Westphal fw@strlen.de
[ Upstream commit c2577862eeb0be94f151f2f1fff662b028061b00 ]
When br_netfilter module is loaded, skbs may be diverted to the ipv4/ipv6 hooks, just like as if we were routing.
Unfortunately, bridge filter hooks with priority 0 may be skipped in this case.
Example: 1. an nftables bridge ruleset is loaded, with a prerouting hook that has priority 0. 2. interface is added to the bridge. 3. no tcp packet is ever seen by the bridge prerouting hook. 4. flush the ruleset 5. load the bridge ruleset again. 6. tcp packets are processed as expected.
After 1) the only registered hook is the bridge prerouting hook, but its not called yet because the bridge hasn't been brought up yet.
After 2), hook order is: 0 br_nf_pre_routing // br_netfilter internal hook 0 chain bridge f prerouting // nftables bridge ruleset
The packet is diverted to br_nf_pre_routing. If call-iptables is off, the nftables bridge ruleset is called as expected.
But if its enabled, br_nf_hook_thresh() will skip it because it assumes that all 0-priority hooks had been called previously in bridge context.
To avoid this, check for the br_nf_pre_routing hook itself, we need to resume directly after it, even if this hook has a priority of 0.
Unfortunately, this still results in different packet flow. With this fix, the eval order after in 3) is: 1. br_nf_pre_routing 2. ip(6)tables (if enabled) 3. nftables bridge
but after 5 its the much saner: 1. nftables bridge 2. br_nf_pre_routing 3. ip(6)tables (if enabled)
Unfortunately I don't see a solution here: It would be possible to move br_nf_pre_routing to a higher priority so that it will be called later in the pipeline, but this also impacts ebtables evaluation order, and would still result in this very ordering problem for all nftables-bridge hooks with the same priority as the br_nf_pre_routing one.
Searching back through the git history I don't think this has ever behaved in any other way, hence, no fixes-tag.
Reported-by: Radim Hrazdil rhrazdil@redhat.com Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org --- net/bridge/br_netfilter_hooks.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 4fd882686b04..ff4779036649 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + }
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
From: John Garry john.garry@huawei.com
[ Upstream commit fce54ed027577517df1e74b7d54dc2b1bd536887 ]
If the controller is behind an IOMMU then the IOMMU IOVA caching range can affect performance, as discussed in [0].
Limit the max HW sectors to not exceed this limit. We need to hardcode the value until a proper DMA mapping API is available.
[0] https://lore.kernel.org/linux-iommu/20210129092120.1482-1-thunder.leizhen@hu...
Link: https://lore.kernel.org/r/1655988119-223714-1-git-send-email-john.garry@huaw... Signed-off-by: John Garry john.garry@huawei.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 7d819fc0395e..eb86afb21aab 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2782,6 +2782,7 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) struct hisi_hba *hisi_hba = shost_priv(shost); struct device *dev = hisi_hba->dev; int ret = sas_slave_configure(sdev); + unsigned int max_sectors;
if (ret) return ret; @@ -2799,6 +2800,12 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) } }
+ /* Set according to IOMMU IOVA caching limit */ + max_sectors = min_t(size_t, queue_max_hw_sectors(sdev->request_queue), + (PAGE_SIZE * 32) >> SECTOR_SHIFT); + + blk_queue_max_hw_sectors(sdev->request_queue, max_sectors); + return 0; }
From: Liang He windhl@126.com
[ Upstream commit ccd7567d4b6cf187fdfa55f003a9e461ee629e36 ]
In pmac_cpufreq_init_MacRISC3(), we need to add corresponding of_node_put() for the three node pointers whose refcount have been incremented by of_find_node_by_name().
Signed-off-by: Liang He windhl@126.com Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/cpufreq/pmac32-cpufreq.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 4f20c6a9108d..8e41fe9ee870 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -470,6 +470,10 @@ static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) if (slew_done_gpio_np) slew_done_gpio = read_gpio(slew_done_gpio_np);
+ of_node_put(volt_gpio_np); + of_node_put(freq_gpio_np); + of_node_put(slew_done_gpio_np); + /* If we use the frequency GPIOs, calculate the min/max speeds based * on the bus frequencies */
From: Mark Pearson markpearson@lenovo.com
[ Upstream commit 42504af775361ca2330a2bfde496a5ebc5655c86 ]
Currently the active mode (PSC/MMC) is stored in an enum and queried throughout the driver.
Other driver changes will enumerate additional submodes that are relevant to be tracked, so instead track PSC/MMC in a single integer variable.
Co-developed-by: Mario Limonciello mario.limonciello@amd.com Signed-off-by: Mario Limonciello mario.limonciello@amd.com Signed-off-by: Mark Pearson markpearson@lenovo.com Link: https://lore.kernel.org/r/20220603170212.164963-1-markpearson@lenovo.com Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/thinkpad_acpi.c | 45 +++++++++++----------------- 1 file changed, 18 insertions(+), 27 deletions(-)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e6cb4a14cdd4..5d1e0a3a5c1e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10299,21 +10299,15 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_DISABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 0) #define DYTC_ENABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 1)
-enum dytc_profile_funcmode { - DYTC_FUNCMODE_NONE = 0, - DYTC_FUNCMODE_MMC, - DYTC_FUNCMODE_PSC, -}; - -static enum dytc_profile_funcmode dytc_profile_available; static enum platform_profile_option dytc_current_profile; static atomic_t dytc_ignore_event = ATOMIC_INIT(0); static DEFINE_MUTEX(dytc_mutex); +static int dytc_capabilities; static bool dytc_mmc_get_available;
static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) { - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10330,7 +10324,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p } return 0; } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10352,21 +10346,21 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe { switch (profile) { case PLATFORM_PROFILE_LOW_POWER: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_LOWPOWER; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_LOWPOWER; break; case PLATFORM_PROFILE_BALANCED: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_BALANCE; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_BALANCE; break; case PLATFORM_PROFILE_PERFORMANCE: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_PERFORM; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_PERFORM; break; default: /* Unknown profile */ @@ -10445,7 +10439,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, if (err) goto unlock;
- if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (profile == PLATFORM_PROFILE_BALANCED) { /* * To get back to balanced mode we need to issue a reset command. @@ -10464,7 +10458,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, goto unlock; } } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; @@ -10483,12 +10477,12 @@ static void dytc_profile_refresh(void) int perfmode;
mutex_lock(&dytc_mutex); - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (dytc_mmc_get_available) err = dytc_command(DYTC_CMD_MMC_GET, &output); else err = dytc_cql_command(DYTC_CMD_GET, &output); - } else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) err = dytc_command(DYTC_CMD_GET, &output);
mutex_unlock(&dytc_mutex); @@ -10517,7 +10511,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) set_bit(PLATFORM_PROFILE_BALANCED, dytc_profile.choices); set_bit(PLATFORM_PROFILE_PERFORMANCE, dytc_profile.choices);
- dytc_profile_available = DYTC_FUNCMODE_NONE; err = dytc_command(DYTC_CMD_QUERY, &output); if (err) return err; @@ -10530,13 +10523,12 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) return -ENODEV;
/* Check what capabilities are supported */ - err = dytc_command(DYTC_CMD_FUNC_CAP, &output); + err = dytc_command(DYTC_CMD_FUNC_CAP, &dytc_capabilities); if (err) return err;
- if (output & BIT(DYTC_FC_MMC)) { /* MMC MODE */ - dytc_profile_available = DYTC_FUNCMODE_MMC; - + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { /* MMC MODE */ + pr_debug("MMC is supported\n"); /* * Check if MMC_GET functionality available * Version > 6 and return success from MMC_GET command @@ -10547,8 +10539,8 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (!err && ((output & DYTC_ERR_MASK) == DYTC_ERR_SUCCESS)) dytc_mmc_get_available = true; } - } else if (output & BIT(DYTC_FC_PSC)) { /* PSC MODE */ - dytc_profile_available = DYTC_FUNCMODE_PSC; + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ + pr_debug("PSC is supported\n"); } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); return -ENODEV; @@ -10574,7 +10566,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm)
static void dytc_profile_exit(void) { - dytc_profile_available = DYTC_FUNCMODE_NONE; platform_profile_remove(); }
From: Mark Pearson markpearson@lenovo.com
[ Upstream commit bce6243f767f7da88aa4674d5d678f9f156eaba9 ]
PSC platform profile mode is only supported on Linux for AMD platforms.
Some older Intel platforms (e.g T490) are advertising it's capability as Windows uses it - but on Linux we should only be using MMC profile for Intel systems.
Add a check to prevent it being enabled incorrectly.
Signed-off-by: Mark Pearson markpearson@lenovo.com Link: https://lore.kernel.org/r/20220627181449.3537-1-markpearson@lenovo.com Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/thinkpad_acpi.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 5d1e0a3a5c1e..832ab77ab961 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10540,6 +10540,11 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) dytc_mmc_get_available = true; } } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ + /* Support for this only works on AMD platforms */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + dbg_printk(TPACPI_DBG_INIT, "PSC not support on Intel platforms\n"); + return -ENODEV; + } pr_debug("PSC is supported\n"); } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n");
From: Kai-Heng Feng kai.heng.feng@canonical.com
[ Upstream commit 9ab762a84b8094540c18a170e5ddd6488632c456 ]
After system resume the hp-wmi driver may complain: [ 702.620180] hp_wmi: Unknown event_id - 23 - 0x0
According to HP it means 'Sanitization Mode' and it's harmless to just ignore the event.
Cc: Jorge Lopez jorge.lopez2@hp.com Signed-off-by: Kai-Heng Feng kai.heng.feng@canonical.com Link: https://lore.kernel.org/r/20220628123726.250062-1-kai.heng.feng@canonical.co... Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/hp-wmi.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 0e6ed75c70f3..c63ec1471b84 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -89,6 +89,7 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, };
/* @@ -846,6 +847,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break;
From: Javier Martinez Canillas javierm@redhat.com
[ Upstream commit 9e121040e54abef9ed5542e5fdfa87911cd96204 ]
This function just returned 0 on success or an errno code on error, but it could be useful for sysfb_init() callers to have a pointer to the device.
Signed-off-by: Javier Martinez Canillas javierm@redhat.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Reviewed-by: Thomas Zimmermann tzimmermann@suse.de Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-2-javier... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/firmware/sysfb.c | 4 ++-- drivers/firmware/sysfb_simplefb.c | 16 ++++++++-------- include/linux/sysfb.h | 10 +++++----- 3 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 2bfbb05f7d89..b032f40a92de 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -46,8 +46,8 @@ static __init int sysfb_init(void) /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { - ret = sysfb_create_simplefb(si, &mode); - if (!ret) + pd = sysfb_create_simplefb(si, &mode); + if (!IS_ERR(pd)) return 0; }
diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index bda8712bfd8c..a353e27f83f5 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -57,8 +57,8 @@ __init bool sysfb_parse_mode(const struct screen_info *si, return false; }
-__init int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +__init struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { struct platform_device *pd; struct resource res; @@ -76,7 +76,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, base |= (u64)si->ext_lfb_base << 32; if (!base || (u64)(resource_size_t)base != base) { printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); }
/* @@ -93,7 +93,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, length = mode->height * mode->stride; if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } length = PAGE_ALIGN(length);
@@ -104,11 +104,11 @@ __init int sysfb_create_simplefb(const struct screen_info *si, res.start = base; res.end = res.start + length - 1; if (res.end <= res.start) - return -EINVAL; + return ERR_PTR(-EINVAL);
pd = platform_device_alloc("simple-framebuffer", 0); if (!pd) - return -ENOMEM; + return ERR_PTR(-ENOMEM);
sysfb_apply_efi_quirks(pd);
@@ -124,10 +124,10 @@ __init int sysfb_create_simplefb(const struct screen_info *si, if (ret) goto err_put_device;
- return 0; + return pd;
err_put_device: platform_device_put(pd);
- return ret; + return ERR_PTR(ret); } diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index b0dcfa26d07b..708152e9037b 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -72,8 +72,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd)
bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); -int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); +struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode);
#else /* CONFIG_SYSFB_SIMPLE */
@@ -83,10 +83,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, return false; }
-static inline int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { - return -EINVAL; + return ERR_PTR(-EINVAL); }
#endif /* CONFIG_SYSFB_SIMPLE */
From: Javier Martinez Canillas javierm@redhat.com
[ Upstream commit bde376e9de3c0bc55eedc8956b0f114c05531595 ]
This can be used by subsystems to unregister a platform device registered by sysfb and also to disable future platform device registration in sysfb.
Suggested-by: Daniel Vetter daniel.vetter@ffwll.ch Signed-off-by: Javier Martinez Canillas javierm@redhat.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-3-javier... Signed-off-by: Sasha Levin sashal@kernel.org --- .../driver-api/firmware/other_interfaces.rst | 6 +++ drivers/firmware/sysfb.c | 54 ++++++++++++++++--- include/linux/sysfb.h | 12 +++++ 3 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst index b81794e0cfbb..06ac89adaafb 100644 --- a/Documentation/driver-api/firmware/other_interfaces.rst +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -13,6 +13,12 @@ EDD Interfaces .. kernel-doc:: drivers/firmware/edd.c :internal:
+Generic System Framebuffers Interface +------------------------------------- + +.. kernel-doc:: drivers/firmware/sysfb.c + :export: + Intel Stratix10 SoC Service Layer --------------------------------- Some features of the Intel Stratix10 SoC require a level of privilege diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index b032f40a92de..1f276f108cc9 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -34,21 +34,59 @@ #include <linux/screen_info.h> #include <linux/sysfb.h>
+static struct platform_device *pd; +static DEFINE_MUTEX(disable_lock); +static bool disabled; + +static bool sysfb_unregister(void) +{ + if (IS_ERR_OR_NULL(pd)) + return false; + + platform_device_unregister(pd); + pd = NULL; + + return true; +} + +/** + * sysfb_disable() - disable the Generic System Framebuffers support + * + * This disables the registration of system framebuffer devices that match the + * generic drivers that make use of the system framebuffer set up by firmware. + * + * It also unregisters a device if this was already registered by sysfb_init(). + * + * Context: The function can sleep. A @disable_lock mutex is acquired to serialize + * against sysfb_init(), that registers a system framebuffer device. + */ +void sysfb_disable(void) +{ + mutex_lock(&disable_lock); + sysfb_unregister(); + disabled = true; + mutex_unlock(&disable_lock); +} +EXPORT_SYMBOL_GPL(sysfb_disable); + static __init int sysfb_init(void) { struct screen_info *si = &screen_info; struct simplefb_platform_data mode; - struct platform_device *pd; const char *name; bool compatible; - int ret; + int ret = 0; + + mutex_lock(&disable_lock); + if (disabled) + goto unlock_mutex;
/* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { pd = sysfb_create_simplefb(si, &mode); if (!IS_ERR(pd)) - return 0; + goto unlock_mutex; }
/* if the FB is incompatible, create a legacy framebuffer device */ @@ -60,8 +98,10 @@ static __init int sysfb_init(void) name = "platform-framebuffer";
pd = platform_device_alloc(name, 0); - if (!pd) - return -ENOMEM; + if (!pd) { + ret = -ENOMEM; + goto unlock_mutex; + }
sysfb_apply_efi_quirks(pd);
@@ -73,9 +113,11 @@ static __init int sysfb_init(void) if (ret) goto err;
- return 0; + goto unlock_mutex; err: platform_device_put(pd); +unlock_mutex: + mutex_unlock(&disable_lock); return ret; }
diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 708152e9037b..8ba8b5be5567 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -55,6 +55,18 @@ struct efifb_dmi_info { int flags; };
+#ifdef CONFIG_SYSFB + +void sysfb_disable(void); + +#else /* CONFIG_SYSFB */ + +static inline void sysfb_disable(void) +{ +} + +#endif /* CONFIG_SYSFB */ + #ifdef CONFIG_EFI
extern struct efifb_dmi_info efifb_dmi_list[];
From: Javier Martinez Canillas javierm@redhat.com
[ Upstream commit ee7a69aa38d87a3bbced7b8245c732c05ed0c6ec ]
The platform devices registered by sysfb match with firmware-based DRM or fbdev drivers, that are used to have early graphics using a framebuffer provided by the system firmware.
DRM or fbdev drivers later are probed and remove conflicting framebuffers, leading to these platform devices for generic drivers to be unregistered.
But the current solution has a race, since the sysfb_init() function could be called after a DRM or fbdev driver is probed and request to unregister the devices for drivers with conflicting framebuffes.
To prevent this, disable any future sysfb platform device registration by calling sysfb_disable(), if a driver requests to remove the conflicting framebuffers.
Suggested-by: Daniel Vetter daniel.vetter@ffwll.ch Signed-off-by: Javier Martinez Canillas javierm@redhat.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Link: https://patchwork.freedesktop.org/patch/msgid/20220607182338.344270-4-javier... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/video/fbdev/core/fbmem.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index a6bb0e438216..70b67e24e830 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/major.h> #include <linux/slab.h> +#include <linux/sysfb.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/vt.h> @@ -1775,6 +1776,17 @@ int remove_conflicting_framebuffers(struct apertures_struct *a, do_free = true; }
+ /* + * If a driver asked to unregister a platform device registered by + * sysfb, then can be assumed that this is a driver for a display + * that is set up by the system firmware and has a generic driver. + * + * Drivers for devices that don't have a generic driver will never + * ask for this, so let's assume that a real driver for the display + * was already probed and prevent sysfb to register devices later. + */ + sysfb_disable(); + mutex_lock(®istration_lock); do_remove_conflicting_framebuffers(a, name, primary); mutex_unlock(®istration_lock);
From: Hangyu Hua hbh25y@gmail.com
[ Upstream commit 00aff3590fc0a73bddd3b743863c14e76fd35c0c ]
Free sk in case tipc_sk_insert() fails.
Signed-off-by: Hangyu Hua hbh25y@gmail.com Reviewed-by: Tung Nguyen tung.q.nguyen@dektech.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 17f8c523e33b..43509c7e90fc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; }
From: Michael Walle michael@walle.cc
[ Upstream commit 9577fc5fdc8b07b891709af6453545db405e24ad ]
Don't print a misleading header length mismatch error if the i2c call returns an error. Instead just return the error code without any error message.
Signed-off-by: Michael Walle michael@walle.cc Reviewed-by: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nfc/nxp-nci/i2c.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 7e451c10985d..3767a900cd6e 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN);
r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len); - if (r != frame_len) { + if (r < 0) { + goto fw_read_exit_free_skb; + } else if (r != frame_len) { nfc_err(&client->dev, "Invalid frame length: %u (expected %zu)\n", r, frame_len); @@ -163,7 +165,9 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE);
r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); - if (r != header.plen) { + if (r < 0) { + goto nci_read_exit_free_skb; + } else if (r != header.plen) { nfc_err(&client->dev, "Invalid frame payload length: %u (expected %u)\n", r, header.plen);
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit 41d07df7de841bfbc32725ce21d933ad358f2844 ]
queue stoppage and inflight requests cancellation is fully fenced from io_work and thus failing a request from this context. Hence we don't need to try to guess from the socket retcode if this failure is because the queue is about to be torn down or not.
We are perfectly safe to just fail it, the request will not be cancelled later on.
This solves possible very long shutdown delays when the users issues a 'nvme disconnect-all'
Reported-by: Daniel Wagner dwagner@suse.de Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ad3a2bf2f1e9..e44d0570e694 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1180,8 +1180,7 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) } else if (ret < 0) { dev_err(queue->ctrl->ctrl.device, "failed to send request %d\n", ret); - if (ret != -EPIPE && ret != -ECONNRESET) - nvme_tcp_fail_request(queue->request); + nvme_tcp_fail_request(queue->request); nvme_tcp_done_send_req(queue); } return ret;
From: Ruozhu Li liruozhu@huawei.com
[ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ]
We encountered a problem that the disconnect command hangs. After analyzing the log and stack, we found that the triggering process is as follows: CPU0 CPU1 nvme_rdma_error_recovery_work nvme_rdma_teardown_io_queues nvme_do_delete_ctrl nvme_stop_queues nvme_remove_namespaces --clear ctrl->namespaces nvme_start_queues --no ns in ctrl->namespaces nvme_ns_remove return(because ctrl is deleting) blk_freeze_queue blk_mq_freeze_queue_wait --wait for ns to unquiesce to clean infligt IO, hang forever
This problem was not found in older kernels because we will flush err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not seem to be modified for functional reasons, the patch can be revert to solve the problem.
Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout")
Signed-off-by: Ruozhu Li liruozhu@huawei.com Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/rdma.c | 12 +++++++++--- drivers/nvme/host/tcp.c | 10 +++++++--- 4 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a2862a56fadc..dc981fbeb51c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4519,6 +4519,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a2b53ca63335..337ae1e3ad25 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -501,6 +501,7 @@ struct nvme_ctrl_ops { void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); };
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d9f19d901313..5aef2b81dbec 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1048,6 +1048,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, } }
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -2255,9 +2263,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - nvme_rdma_teardown_io_queues(ctrl, shutdown); nvme_stop_admin_queue(&ctrl->ctrl); if (shutdown) @@ -2307,6 +2312,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, };
/* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e44d0570e694..1fb4f9b1621e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2193,9 +2193,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); - cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); - nvme_tcp_teardown_io_queues(ctrl, shutdown); nvme_stop_admin_queue(ctrl); if (shutdown) @@ -2235,6 +2232,12 @@ static void nvme_reset_ctrl_work(struct work_struct *work) nvme_tcp_reconnect_or_remove(ctrl); }
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) +{ + cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); + cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); +} + static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); @@ -2559,6 +2562,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, };
static bool
From: Jianglei Nie niejianglei2021@163.com
[ Upstream commit 0a18d802d65cf662644fd1d369c86d84a5630652 ]
sfp_probe() allocates a memory chunk from sfp with sfp_alloc(). When devm_add_action() fails, sfp is not freed, which leads to a memory leak.
We should use devm_add_action_or_reset() instead of devm_add_action().
Signed-off-by: Jianglei Nie niejianglei2021@163.com Reviewed-by: Russell King (Oracle) rmk+kernel@armlinux.org.uk Link: https://lore.kernel.org/r/20220629075550.2152003-1-niejianglei2021@163.com Signed-off-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/phy/sfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 9a5d5a10560f..e7b0e12cc75b 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2516,7 +2516,7 @@ static int sfp_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sfp);
- err = devm_add_action(sfp->dev, sfp_cleanup, sfp); + err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp); if (err < 0) return err;
linux-stable-mirror@lists.linaro.org