April 2025 - Linux-stable-mirror

[Patch v2 1/4] Drivers: hv: Allocate interrupt and monitor pages aligned to system page boundary

by longli＠linuxonhyperv.com

From: Long Li <longli(a)microsoft.com> There are use cases that interrupt and monitor pages are mapped to user-mode through UIO, they need to be system page aligned. Some Hyper-V allocation APIs introduced earlier broke those requirements. Fix those APIs by always allocating Hyper-V page at system page boundaries. Cc: stable(a)vger.kernel.org Fixes: ca48739e59df ("Drivers: hv: vmbus: Move Hyper-V page allocator to arch neutral code") Signed-off-by: Long Li <longli(a)microsoft.com> --- drivers/hv/hv_common.c | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index a7d7494feaca..297ccd7d4997 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -106,41 +106,26 @@ void __init hv_common_free(void) } /* - * Functions for allocating and freeing memory with size and - * alignment HV_HYP_PAGE_SIZE. These functions are needed because - * the guest page size may not be the same as the Hyper-V page - * size. We depend upon kmalloc() aligning power-of-two size - * allocations to the allocation size boundary, so that the - * allocated memory appears to Hyper-V as a page of the size - * it expects. + * A Hyper-V page can be used by UIO for mapping to user-space, it should + * always be allocated on system page boundaries. */ - void *hv_alloc_hyperv_page(void) { - BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); - - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL); - else - return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + return (void *)__get_free_page(GFP_KERNEL); } EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); void *hv_alloc_hyperv_zeroed_page(void) { - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - else - return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); void hv_free_hyperv_page(void *addr) { - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - free_page((unsigned long)addr); - else - kfree(addr); + free_page((unsigned long)addr); } EXPORT_SYMBOL_GPL(hv_free_hyperv_page); @@ -272,7 +257,7 @@ static void hv_kmsg_dump_unregister(void) atomic_notifier_chain_unregister(&panic_notifier_list, &hyperv_panic_report_block); - hv_free_hyperv_page(hv_panic_page); + kfree(hv_panic_page); hv_panic_page = NULL; } @@ -280,7 +265,7 @@ static void hv_kmsg_dump_register(void) { int ret; - hv_panic_page = hv_alloc_hyperv_zeroed_page(); + hv_panic_page = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!hv_panic_page) { pr_err("Hyper-V: panic message page memory allocation failed\n"); return; @@ -289,7 +274,7 @@ static void hv_kmsg_dump_register(void) ret = kmsg_dump_register(&hv_kmsg_dumper); if (ret) { pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); - hv_free_hyperv_page(hv_panic_page); + kfree(hv_panic_page); hv_panic_page = NULL; } } -- 2.34.1

2 months, 1 week

3
2
0 0

[PATCH v2] video: screen_info: Relocate framebuffers behind PCI bridges

by Thomas Zimmermann

Apply bridge window offsets to screen_info framebuffers during relocation. Fixes invalid access to I/O memory. Resources behind a PCI bridge can be located at a certain offset in the kernel's I/O range. The framebuffer memory range stored in screen_info refers to the offset as seen during boot (essentialy 0). During boot up, the kernel may assign a different memory offset to the bridge device and thereby relocating the framebuffer address of the PCI graphics device as seen by the kernel. The information in screen_info must be updated as well. The helper pcibios_bus_to_resource() performs the relocation of the screen_info resource. The result now matches the I/O-memory resource of the PCI graphics device. As before, we store away the information necessary to update the information in screen_info. Commit 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated EFI framebuffers") added the code for updating screen_info. It is based on similar functionality that pre-existed in efifb. Efifb uses a pointer to the PCI resource, while the newer code does a memcpy of the region. Hence efifb sees any updates to the PCI resource and avoids the issue. v2: - Fixed tags (Takashi, Ivan) - Updated information on efifb Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de> Reported-by: "Ivan T. Ivanov" <iivanov(a)suse.de> Closes: https://bugzilla.suse.com/show_bug.cgi?id=1240696 Tested-by: "Ivan T. Ivanov" <iivanov(a)suse.de> Fixes: 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated EFI framebuffers") Cc: dri-devel(a)lists.freedesktop.org Cc: <stable(a)vger.kernel.org> # v6.9+ --- drivers/video/screen_info_pci.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/video/screen_info_pci.c b/drivers/video/screen_info_pci.c index 6c5833517141..c46c75dc3fae 100644 --- a/drivers/video/screen_info_pci.c +++ b/drivers/video/screen_info_pci.c @@ -8,7 +8,7 @@ static struct pci_dev *screen_info_lfb_pdev; static size_t screen_info_lfb_bar; static resource_size_t screen_info_lfb_offset; -static struct resource screen_info_lfb_res = DEFINE_RES_MEM(0, 0); +static struct pci_bus_region screen_info_lfb_region; static bool __screen_info_relocation_is_valid(const struct screen_info *si, struct resource *pr) { @@ -31,7 +31,7 @@ void screen_info_apply_fixups(void) if (screen_info_lfb_pdev) { struct resource *pr = &screen_info_lfb_pdev->resource[screen_info_lfb_bar]; - if (pr->start != screen_info_lfb_res.start) { + if (pr->start != screen_info_lfb_region.start) { if (__screen_info_relocation_is_valid(si, pr)) { /* * Only update base if we have an actual @@ -69,10 +69,21 @@ static void screen_info_fixup_lfb(struct pci_dev *pdev) for (i = 0; i < numres; ++i) { struct resource *r = &res[i]; + struct pci_bus_region bus_region = { + .start = r->start, + .end = r->end, + }; const struct resource *pr; if (!(r->flags & IORESOURCE_MEM)) continue; + + /* + * Translate the address to resource if the framebuffer + * is behind a PCI bridge. + */ + pcibios_bus_to_resource(pdev->bus, r, &bus_region); + pr = pci_find_resource(pdev, r); if (!pr) continue; @@ -85,7 +96,7 @@ static void screen_info_fixup_lfb(struct pci_dev *pdev) screen_info_lfb_pdev = pdev; screen_info_lfb_bar = pr - pdev->resource; screen_info_lfb_offset = r->start - pr->start; - memcpy(&screen_info_lfb_res, r, sizeof(screen_info_lfb_res)); + memcpy(&screen_info_lfb_region, &bus_region, sizeof(screen_info_lfb_region)); } } DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY, 16, -- 2.49.0

2 months, 2 weeks

2
3
0 0

[PATCH v2 rc] iommu: Skip PASID validation for devices without PASID capability

by Tushar Dave

Generally PASID support requires ACS settings that usually create single device groups, but there are some niche cases where we can get multi-device groups and still have working PASID support. The primary issue is that PCI switches are not required to treat PASID tagged TLPs specially so appropriate ACS settings are required to route all TLPs to the host bridge if PASID is going to work properly. pci_enable_pasid() does check that each device that will use PASID has the proper ACS settings to achieve this routing. However, no-PASID devices can be combined with PASID capable devices within the same topology using non-uniform ACS settings. In this case the no-PASID devices may not have strict route to host ACS flags and end up being grouped with the PASID devices. This configuration fails to allow use of the PASID within the iommu core code which wrongly checks if the no-PASID device supports PASID. Fix this by ignoring no-PASID devices during the PASID validation. They will never issue a PASID TLP anyhow so they can be ignored. Fixes: c404f55c26fc ("iommu: Validate the PASID in iommu_attach_device_pasid()") Cc: stable(a)vger.kernel.org Signed-off-by: Tushar Dave <tdave(a)nvidia.com> --- changes in v2: - added no-pasid check in __iommu_set_group_pasid and __iommu_remove_group_pasid drivers/iommu/iommu.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 60aed01e54f2..8251b07f4022 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3329,8 +3329,9 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, int ret; for_each_group_device(group, device) { - ret = domain->ops->set_dev_pasid(domain, device->dev, - pasid, NULL); + if (device->dev->iommu->max_pasids > 0) + ret = domain->ops->set_dev_pasid(domain, device->dev, + pasid, NULL); if (ret) goto err_revert; } @@ -3342,7 +3343,8 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, for_each_group_device(group, device) { if (device == last_gdev) break; - iommu_remove_dev_pasid(device->dev, pasid, domain); + if (device->dev->iommu->max_pasids > 0) + iommu_remove_dev_pasid(device->dev, pasid, domain); } return ret; } @@ -3353,8 +3355,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, { struct group_device *device; - for_each_group_device(group, device) - iommu_remove_dev_pasid(device->dev, pasid, domain); + for_each_group_device(group, device) { + if (device->dev->iommu->max_pasids > 0) + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } /* @@ -3391,7 +3395,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, mutex_lock(&group->mutex); for_each_group_device(group, device) { - if (pasid >= device->dev->iommu->max_pasids) { + /* + * Skip PASID validation for devices without PASID support + * (max_pasids = 0). These devices cannot issue transactions + * with PASID, so they don't affect group's PASID usage. + */ + if ((device->dev->iommu->max_pasids > 0) && + (pasid >= device->dev->iommu->max_pasids)) { ret = -EINVAL; goto out_unlock; } -- 2.34.1

2 months, 2 weeks

3
4
0 0

[PATCH] xfs: fix diff_two_keys calculation for cnt btree

by Fedor Pchelkin

Currently the difference is computed on 32-bit unsigned values although eventually it is stored in a variable of int64_t type. This gives awkward results, e.g. when the diff _should_ be negative, it is represented as some large positive int64_t value. Perform the calculations directly in int64_t as all other diff_two_keys routines actually do. Found by Linux Verification Center (linuxtesting.org) with Svace static analysis tool. Fixes: 08438b1e386b ("xfs: plumb in needed functions for range querying of the freespace btrees") Cc: stable(a)vger.kernel.org Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru> --- fs/xfs/libxfs/xfs_alloc_btree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index a4ac37ba5d51..b3c54ae90e25 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -238,13 +238,13 @@ xfs_cntbt_diff_two_keys( ASSERT(!mask || (mask->alloc.ar_blockcount && mask->alloc.ar_startblock)); - diff = be32_to_cpu(k1->alloc.ar_blockcount) - - be32_to_cpu(k2->alloc.ar_blockcount); + diff = (int64_t)be32_to_cpu(k1->alloc.ar_blockcount) - + be32_to_cpu(k2->alloc.ar_blockcount); if (diff) return diff; - return be32_to_cpu(k1->alloc.ar_startblock) - - be32_to_cpu(k2->alloc.ar_startblock); + return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) - + be32_to_cpu(k2->alloc.ar_startblock); } static xfs_failaddr_t -- 2.49.0

2 months, 2 weeks

3
4
0 0

[PATCH 0/7] accel/ivpu: Add context violation handling for 6.12

by Jacek Lawrynowicz

These patchset adds support for VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW message added in recent VPU firmware. Without it the driver will not be able to process any jobs after this message is received and would need to be reloaded. Most patches are as-is from upstream besides these two: - Fix locking order in ivpu_job_submit - Abort all jobs after command queue unregister Both these patches need to be rebased because of missing new CMDQ UAPI changes that should not be backported to stable. Andrew Kreimer (1): accel/ivpu: Fix a typo Andrzej Kacprowski (1): accel/ivpu: Update VPU FW API headers Karol Wachowski (4): accel/ivpu: Use xa_alloc_cyclic() instead of custom function accel/ivpu: Abort all jobs after command queue unregister accel/ivpu: Fix locking order in ivpu_job_submit accel/ivpu: Add handling of VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW Tomasz Rusinowicz (1): accel/ivpu: Make DB_ID and JOB_ID allocations incremental drivers/accel/ivpu/ivpu_drv.c | 38 ++-- drivers/accel/ivpu/ivpu_drv.h | 9 + drivers/accel/ivpu/ivpu_job.c | 125 +++++++++--- drivers/accel/ivpu/ivpu_job.h | 1 + drivers/accel/ivpu/ivpu_jsm_msg.c | 3 +- drivers/accel/ivpu/ivpu_mmu.c | 3 +- drivers/accel/ivpu/ivpu_sysfs.c | 5 +- drivers/accel/ivpu/vpu_boot_api.h | 45 +++-- drivers/accel/ivpu/vpu_jsm_api.h | 303 +++++++++++++++++++++++++----- 9 files changed, 412 insertions(+), 120 deletions(-) -- 2.45.1

2 months, 2 weeks

3
16
0 0

[PATCH 0/3] accel/ivpu: Add context violation handling for 6.14

by Jacek Lawrynowicz

These patchset adds support for VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW message added in recent VPU firmware. Without it the driver will not be able to process any jobs after this message is received and would need to be reloaded. The last patch in this series is as-is from upstream, but other two patches had to be rebased because of missing new CMDQ UAPI changes that should not be backported to stable. Karol Wachowski (3): accel/ivpu: Abort all jobs after command queue unregister accel/ivpu: Fix locking order in ivpu_job_submit accel/ivpu: Add handling of VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW drivers/accel/ivpu/ivpu_drv.c | 32 ++------- drivers/accel/ivpu/ivpu_drv.h | 2 + drivers/accel/ivpu/ivpu_job.c | 111 ++++++++++++++++++++++++++------ drivers/accel/ivpu/ivpu_job.h | 1 + drivers/accel/ivpu/ivpu_mmu.c | 3 +- drivers/accel/ivpu/ivpu_sysfs.c | 5 +- 6 files changed, 103 insertions(+), 51 deletions(-) -- 2.45.1

2 months, 2 weeks

3
7
0 0

[PATCH 0/3] Preserve the request order in the block layer

by Bart Van Assche

Hi Greg, In kernel v6.10 the zoned storage approach was changed from zoned write locking to zone write plugging. Because of this change the block layer must preserve the request order. Hence this backport of Christoph's "don't reorder requests passed to ->queue_rqs" patch series. Please consider this patch series for inclusion in the 6.12 stable kernel. See also https://lore.kernel.org/linux-block/20241113152050.157179-1-hch@lst.de/. Thanks, Bart. Christoph Hellwig (3): block: remove rq_list_move block: add a rq_list type block: don't reorder requests in blk_add_rq_to_plug block/blk-core.c | 6 +-- block/blk-merge.c | 2 +- block/blk-mq.c | 42 +++++++-------- block/blk-mq.h | 2 +- drivers/block/null_blk/main.c | 9 ++-- drivers/block/virtio_blk.c | 13 +++-- drivers/nvme/host/apple.c | 2 +- drivers/nvme/host/pci.c | 15 +++--- include/linux/blk-mq.h | 99 +++++++++++++++++------------------ include/linux/blkdev.h | 11 ++-- io_uring/rw.c | 4 +- 11 files changed, 102 insertions(+), 103 deletions(-)

2 months, 2 weeks

3
8
0 0

[PATCH RESEND] drm/msm: fix a potential memory leak issue in submit_create()

by Haoxiang Li

The memory allocated by msm_fence_alloc() actually is the container of msm_fence_alloc()'s return value. Thus, just free its return value is not enough. Add a helper 'msm_fence_free()' in msm_fence.h/msm_fence.c to do the complete job. Fixes: f94e6a51e17c ("drm/msm: Pre-allocate hw_fence") Cc: stable(a)vger.kernel.org Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com> --- drivers/gpu/drm/msm/msm_fence.c | 7 +++++++ drivers/gpu/drm/msm/msm_fence.h | 1 + drivers/gpu/drm/msm/msm_gem_submit.c | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index d41e5a6bbee0..72641e6a627d 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -183,6 +183,13 @@ msm_fence_alloc(void) return &f->base; } +void msm_fence_free(struct dma_fence *fence) +{ + struct msm_fence *f = to_msm_fence(fence); + + kfree(f); +} + void msm_fence_init(struct dma_fence *fence, struct msm_fence_context *fctx) { diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 148196375a0b..635c68629070 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -82,6 +82,7 @@ bool msm_fence_completed(struct msm_fence_context *fctx, uint32_t fence); void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence); struct dma_fence * msm_fence_alloc(void); +void msm_fence_free(struct dma_fence *fence); void msm_fence_init(struct dma_fence *fence, struct msm_fence_context *fctx); static inline bool diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3e9aa2cc38ef..213baa5bca5e 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -56,7 +56,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue); if (ret) { - kfree(submit->hw_fence); + msm_fence_free(submit->hw_fence); kfree(submit); return ERR_PTR(ret); } -- 2.25.1

2 months, 2 weeks

2
1
0 0

[PATCH] x86/boot/sev: Support memory acceptance in the EFI stub under SVSM

by Ard Biesheuvel

From: Ard Biesheuvel <ardb(a)kernel.org> Commit d54d610243a4 ("x86/boot/sev: Avoid shared GHCB page for early memory acceptance") provided a fix for SEV-SNP memory acceptance from the EFI stub when running at VMPL #0. However, that fix was insufficient for SVSM SEV-SNP guests running at VMPL >0, as those rely on a SVSM calling area, which is a shared buffer whose address is programmed into a SEV-SNP MSR, and the SEV init code that sets up this calling area executes much later during the boot. Given that booting via the EFI stub at VMPL >0 implies that the firmware has configured this calling area already, reuse it for performing memory acceptance in the EFI stub. Cc: Borislav Petkov <bp(a)alien8.de> Cc: Ingo Molnar <mingo(a)kernel.org> Cc: Dionna Amalie Glaze <dionnaglaze(a)google.com> Cc: Kevin Loughlin <kevinloughlin(a)google.com> Cc: <stable(a)vger.kernel.org> Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0") Co-developed-by: Tom Lendacky <thomas.lendacky(a)amd.com> Signed-off-by: Tom Lendacky <thomas.lendacky(a)amd.com> Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org> --- Tom, Please confirm that this works as you intended. Thanks, arch/x86/boot/compressed/mem.c | 5 +-- arch/x86/boot/compressed/sev.c | 40 ++++++++++++++++++++ arch/x86/boot/compressed/sev.h | 2 + 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index f676156d9f3d..0e9f84ab4bdc 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -34,14 +34,11 @@ static bool early_is_tdx_guest(void) void arch_accept_memory(phys_addr_t start, phys_addr_t end) { - static bool sevsnp; - /* Platform-specific memory-acceptance call goes here */ if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) { - sevsnp = true; + } else if (early_is_sevsnp_guest()) { snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 89ba168f4f0f..0003e4416efd 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -645,3 +645,43 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) sev_verify_cbit(top_level_pgt); } + +bool early_is_sevsnp_guest(void) +{ + static bool sevsnp; + + if (sevsnp) + return true; + + if (!(sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) + return false; + + sevsnp = true; + + if (!snp_vmpl) { + unsigned int eax, ebx, ecx, edx; + + /* + * CPUID Fn8000_001F_EAX[28] - SVSM support + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax & BIT(28)) { + struct msr m; + + /* Obtain the address of the calling area to use */ + boot_rdmsr(MSR_SVSM_CAA, &m); + boot_svsm_caa = (void *)m.q; + boot_svsm_caa_pa = m.q; + + /* + * The real VMPL level cannot be discovered, but the + * memory acceptance routines make no use of that so + * any non-zero value suffices here. + */ + snp_vmpl = U8_MAX; + } + } + return true; +} diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index 4e463f33186d..d3900384b8ab 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -13,12 +13,14 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 sev_get_status(void); +bool early_is_sevsnp_guest(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 sev_get_status(void) { return 0; } +static inline bool early_is_sevsnp_guest(void) { return false; } #endif base-commit: b4432656b36e5cc1d50a1f2dc15357543add530e -- 2.49.0.906.g1f30a19c02-goog

2 months, 2 weeks

5
5
0 0

[PATCH v3] powerpc/bpf: fix JIT code size calculation of bpf trampoline

by Hari Bathini

arch_bpf_trampoline_size() provides JIT size of the BPF trampoline before the buffer for JIT'ing it is allocated. The total number of instructions emitted for BPF trampoline JIT code depends on where the final image is located. So, the size arrived at with the dummy pass in arch_bpf_trampoline_size() can vary from the actual size needed in arch_prepare_bpf_trampoline(). When the instructions accounted in arch_bpf_trampoline_size() is less than the number of instructions emitted during the actual JIT compile of the trampoline, the below warning is produced: WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc which is: /* Make sure the trampoline generation logic doesn't overflow */ if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) { So, during the dummy pass, instead of providing some arbitrary image location, account for maximum possible instructions if and when there is a dependency with image location for JIT'ing. Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines") Reported-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com> Closes: https://lore.kernel.org/all/6168bfc8-659f-4b5a-a6fb-90a916dde3b3@linux.ibm.… Cc: stable(a)vger.kernel.org # v6.13+ Acked-by: Naveen N Rao (AMD) <naveen(a)kernel.org> Tested-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com> Signed-off-by: Hari Bathini <hbathini(a)linux.ibm.com> --- Changes since v2: - Address review comments from Naveen: - Remove additional padding for 'case BPF_LD | BPF_IMM | BPF_DW:' in arch/powerpc/net/bpf_jit_comp*.c - Merge the if sequence in bpf_jit_emit_func_call_rel() with the other conditionals - Naveen, carried your Acked-by tag as the additional changes are minimal and in line with your suggestion. Feel free to update if you look at it differently. - Venkat, carried your Tested-by tag from v2 as the changes in v3 should not alter the test result. Feel free to update if you look at it differently. Changes since v1: - Pass NULL for image during intial pass and account for max. possible instruction during this pass as Naveen suggested. arch/powerpc/net/bpf_jit.h | 20 ++++++++++++++++--- arch/powerpc/net/bpf_jit_comp.c | 33 ++++++++++--------------------- arch/powerpc/net/bpf_jit_comp32.c | 6 ------ arch/powerpc/net/bpf_jit_comp64.c | 15 +++++++------- 4 files changed, 35 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 6beacaec63d3..4c26912c2e3c 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -51,8 +51,16 @@ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ } while (0) -/* Sign-extended 32-bit immediate load */ +/* + * Sign-extended 32-bit immediate load + * + * If this is a dummy pass (!image), account for + * maximum possible instructions. + */ #define PPC_LI32(d, i) do { \ + if (!image) \ + ctx->idx += 2; \ + else { \ if ((int)(uintptr_t)(i) >= -32768 && \ (int)(uintptr_t)(i) < 32768) \ EMIT(PPC_RAW_LI(d, i)); \ @@ -60,10 +68,15 @@ EMIT(PPC_RAW_LIS(d, IMM_H(i))); \ if (IMM_L(i)) \ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ - } } while(0) + } \ + } } while (0) #ifdef CONFIG_PPC64 +/* If dummy pass (!image), account for maximum possible instructions */ #define PPC_LI64(d, i) do { \ + if (!image) \ + ctx->idx += 5; \ + else { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ PPC_LI32(d, i); \ @@ -84,7 +97,8 @@ if ((uintptr_t)(i) & 0x000000000000ffffULL) \ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ - } } while (0) + } \ + } } while (0) #define PPC_LI_ADDR PPC_LI64 #ifndef CONFIG_PPC_KERNEL_PCREL diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 2991bb171a9b..c0684733e9d6 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); if (!p->jited) PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); - if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func, - BRANCH_SET_LINK)) { - if (image) - image[ctx->idx] = ppc_inst_val(branch_insn); + /* Account for max possible instructions during dummy pass for size calculation */ + if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], + (unsigned long)p->bpf_func, + BRANCH_SET_LINK)) { + image[ctx->idx] = ppc_inst_val(branch_insn); ctx->idx++; } else { EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); @@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); /* Reserve space to patch branch instruction to skip fexit progs */ - im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; + if (ro_image) /* image is NULL for dummy pass */ + im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; EMIT(PPC_RAW_NOP()); } @@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } if (flags & BPF_TRAMP_F_CALL_ORIG) { - im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; + if (ro_image) /* image is NULL for dummy pass */ + im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; PPC_LI_ADDR(_R3, im); ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, (unsigned long)__bpf_tramp_exit); @@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { struct bpf_tramp_image im; - void *image; int ret; - /* - * Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). - * This will NOT cause fragmentation in direct map, as we do not - * call set_memory_*() on this buffer. - * - * We cannot use kvmalloc here, because we need image to be in - * module memory range. - */ - image = bpf_jit_alloc_exec(PAGE_SIZE); - if (!image) - return -ENOMEM; - - ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, - m, flags, tlinks, func_addr); - bpf_jit_free_exec(image); - + ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr); return ret; } diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index c4db278dae36..0aace304dfe1 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -313,7 +313,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u64 func_addr; u32 true_cond; u32 tmp_idx; - int j; if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) && (BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) && @@ -1099,13 +1098,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code * 16 byte instruction that uses two 'struct bpf_insn' */ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ - tmp_idx = ctx->idx; PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); - /* padding to allow full 4 instructions for later patching */ - if (!image) - for (j = ctx->idx - tmp_idx; j < 4; j++) - EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 233703b06d7c..5daa77aee7f7 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -227,7 +227,14 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context * #ifdef CONFIG_PPC_KERNEL_PCREL reladdr = func_addr - local_paca->kernelbase; - if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { + /* + * If fimage is NULL (the initial pass to find image size), + * account for the maximum no. of instructions possible. + */ + if (!fimage) { + ctx->idx += 7; + return 0; + } else if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase))); /* Align for subsequent prefix instruction */ if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8)) @@ -412,7 +419,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u64 imm64; u32 true_cond; u32 tmp_idx; - int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -1046,12 +1052,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ imm64 = ((u64)(u32) insn[i].imm) | (((u64)(u32) insn[i+1].imm) << 32); - tmp_idx = ctx->idx; PPC_LI64(dst_reg, imm64); - /* padding to allow full 5 instructions for later patching */ - if (!image) - for (j = ctx->idx - tmp_idx; j < 5; j++) - EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; -- 2.49.0

2 months, 2 weeks

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror April 2025