June 2022 - Linux-stable-mirror

[PATCH v2 2/2] powerpc/kvm: don't crash on missing rng, and use darn

by Jason A. Donenfeld

On POWER8 systems that don't have ibm,power-rng available, a guest that ignores the KVM_CAP_PPC_HWRNG flag and calls H_RANDOM anyway will dereference a NULL pointer. And on machines with darn instead of ibm,power-rng, H_RANDOM won't work at all. This patch kills two birds with one stone, by routing H_RANDOM calls to ppc_md.get_random_seed, and doing the real mode check inside of it. Cc: stable(a)vger.kernel.org # v4.1+ Cc: Michael Ellerman <mpe(a)ellerman.id.au> Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.") Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com> --- arch/powerpc/include/asm/archrandom.h | 5 ---- arch/powerpc/kvm/book3s_hv_builtin.c | 7 +++--- arch/powerpc/platforms/powernv/rng.c | 33 +++++++-------------------- 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 11d4815841ab..3af27bb84a3d 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV -int pnv_hwrng_present(void); int pnv_get_random_long(unsigned long *v); -int pnv_get_random_real_mode(unsigned long *v); -#else -static inline int pnv_hwrng_present(void) { return 0; } -static inline int pnv_get_random_real_mode(unsigned long *v) { return 0; } #endif #endif /* _ASM_POWERPC_ARCHRANDOM_H */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 799d40c2ab4f..3abaef5f9ac2 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -19,7 +19,7 @@ #include <asm/interrupt.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/archrandom.h> +#include <asm/machdep.h> #include <asm/xics.h> #include <asm/xive.h> #include <asm/dbell.h> @@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); int kvmppc_hwrng_present(void) { - return pnv_hwrng_present(); + return ppc_md.get_random_seed != NULL; } EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); long kvmppc_rm_h_random(struct kvm_vcpu *vcpu) { - if (pnv_get_random_real_mode(&vcpu->arch.regs.gpr[4])) + if (ppc_md.get_random_seed && + ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4])) return H_SUCCESS; return H_HARDWARE; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 868bb9777425..c748567cd47e 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -29,15 +29,6 @@ struct pnv_rng { static DEFINE_PER_CPU(struct pnv_rng *, pnv_rng); -int pnv_hwrng_present(void) -{ - struct pnv_rng *rng; - - rng = get_cpu_var(pnv_rng); - put_cpu_var(rng); - return rng != NULL; -} - static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val) { unsigned long parity; @@ -58,17 +49,6 @@ static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val) return val; } -int pnv_get_random_real_mode(unsigned long *v) -{ - struct pnv_rng *rng; - - rng = raw_cpu_read(pnv_rng); - - *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); - - return 1; -} - static int pnv_get_random_darn(unsigned long *v) { unsigned long val; @@ -105,11 +85,14 @@ int pnv_get_random_long(unsigned long *v) { struct pnv_rng *rng; - rng = get_cpu_var(pnv_rng); - - *v = rng_whiten(rng, in_be64(rng->regs)); - - put_cpu_var(rng); + if (mfmsr() & MSR_DR) { + rng = raw_cpu_read(pnv_rng); + *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); + } else { + rng = get_cpu_var(pnv_rng); + *v = rng_whiten(rng, in_be64(rng->regs)); + put_cpu_var(rng); + } return 1; } -- 2.35.1

3 years, 3 months

2
1
0 0

[PATCH] powerpc/kvm: don't crash on missing rng, and use darn

by Jason A. Donenfeld

On POWER8 systems that don't have ibm,power-rng available, a guest that ignores the KVM_CAP_PPC_HWRNG flag and calls H_RANDOM anyway will dereference a NULL pointer. And on machines with darn instead of ibm,power-rng, H_RANDOM won't work at all. This patch kills two birds with one stone, by routing H_RANDOM calls to ppc_md.get_random_seed, and doing the real mode check inside of it. Cc: stable(a)vger.kernel.org # v4.1+ Cc: Michael Ellerman <mpe(a)ellerman.id.au> Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.") Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com> --- This patch must be applied ontop of: 1) https://github.com/linuxppc/linux/commit/f3eac426657d985b97c92fa5f7ae1d43f0… 2) https://lore.kernel.org/all/20220622102532.173393-1-Jason@zx2c4.com/ arch/powerpc/include/asm/archrandom.h | 5 ---- arch/powerpc/kvm/book3s_hv_builtin.c | 5 ++-- arch/powerpc/platforms/powernv/rng.c | 33 +++++++-------------------- 3 files changed, 11 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 11d4815841ab..3af27bb84a3d 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV -int pnv_hwrng_present(void); int pnv_get_random_long(unsigned long *v); -int pnv_get_random_real_mode(unsigned long *v); -#else -static inline int pnv_hwrng_present(void) { return 0; } -static inline int pnv_get_random_real_mode(unsigned long *v) { return 0; } #endif #endif /* _ASM_POWERPC_ARCHRANDOM_H */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 799d40c2ab4f..1c6672826db5 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); int kvmppc_hwrng_present(void) { - return pnv_hwrng_present(); + return ppc_md.get_random_seed != NULL; } EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); long kvmppc_rm_h_random(struct kvm_vcpu *vcpu) { - if (pnv_get_random_real_mode(&vcpu->arch.regs.gpr[4])) + if (ppc_md.get_random_seed && + ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4])) return H_SUCCESS; return H_HARDWARE; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 868bb9777425..c748567cd47e 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -29,15 +29,6 @@ struct pnv_rng { static DEFINE_PER_CPU(struct pnv_rng *, pnv_rng); -int pnv_hwrng_present(void) -{ - struct pnv_rng *rng; - - rng = get_cpu_var(pnv_rng); - put_cpu_var(rng); - return rng != NULL; -} - static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val) { unsigned long parity; @@ -58,17 +49,6 @@ static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val) return val; } -int pnv_get_random_real_mode(unsigned long *v) -{ - struct pnv_rng *rng; - - rng = raw_cpu_read(pnv_rng); - - *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); - - return 1; -} - static int pnv_get_random_darn(unsigned long *v) { unsigned long val; @@ -105,11 +85,14 @@ int pnv_get_random_long(unsigned long *v) { struct pnv_rng *rng; - rng = get_cpu_var(pnv_rng); - - *v = rng_whiten(rng, in_be64(rng->regs)); - - put_cpu_var(rng); + if (mfmsr() & MSR_DR) { + rng = raw_cpu_read(pnv_rng); + *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); + } else { + rng = get_cpu_var(pnv_rng); + *v = rng_whiten(rng, in_be64(rng->regs)); + put_cpu_var(rng); + } return 1; } -- 2.35.1

3 years, 3 months

2
2
0 0

[PATCH v2] MAINTAINERS: Add new IOMMU development mailing list

by Joerg Roedel

From: Joerg Roedel <jroedel(a)suse.de> The IOMMU mailing list will move from lists.linux-foundation.org to lists.linux.dev. The hard switch of the archive will happen on July 5th, but add the new list now already so that people start using the list when sending patches. After July 5th the old list will disappear. Cc: stable(a)vger.kernel.org Signed-off-by: Joerg Roedel <jroedel(a)suse.de> --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3cf9842d9233..36d1bc999815 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -427,6 +427,7 @@ ACPI VIOT DRIVER M: Jean-Philippe Brucker <jean-philippe(a)linaro.org> L: linux-acpi(a)vger.kernel.org L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Maintained F: drivers/acpi/viot.c F: include/linux/acpi_viot.h @@ -960,6 +961,7 @@ AMD IOMMU (AMD-VI) M: Joerg Roedel <joro(a)8bytes.org> R: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/amd/ @@ -5962,6 +5964,7 @@ M: Christoph Hellwig <hch(a)lst.de> M: Marek Szyprowski <m.szyprowski(a)samsung.com> R: Robin Murphy <robin.murphy(a)arm.com> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -5974,6 +5977,7 @@ F: kernel/dma/ DMA MAPPING BENCHMARK M: Xiang Chen <chenxiang66(a)hisilicon.com> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ @@ -7558,6 +7562,7 @@ F: drivers/gpu/drm/exynos/exynos_dp* EXYNOS SYSMMU (IOMMU) driver M: Marek Szyprowski <m.szyprowski(a)samsung.com> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Maintained F: drivers/iommu/exynos-iommu.c @@ -9977,6 +9982,7 @@ INTEL IOMMU (VT-d) M: David Woodhouse <dwmw2(a)infradead.org> M: Lu Baolu <baolu.lu(a)linux.intel.com> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ @@ -10356,6 +10362,7 @@ IOMMU DRIVERS M: Joerg Roedel <joro(a)8bytes.org> M: Will Deacon <will(a)kernel.org> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: Documentation/devicetree/bindings/iommu/ @@ -12504,6 +12511,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c MEDIATEK IOMMU DRIVER M: Yong Wu <yong.wu(a)mediatek.com> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev L: linux-mediatek(a)lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/iommu/mediatek* @@ -16545,6 +16553,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c QUALCOMM IOMMU M: Rob Clark <robdclark(a)gmail.com> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev L: linux-arm-msm(a)vger.kernel.org S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -19170,6 +19179,7 @@ F: arch/x86/boot/video* SWIOTLB SUBSYSTEM M: Christoph Hellwig <hch(a)infradead.org> L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git @@ -21844,6 +21854,7 @@ M: Juergen Gross <jgross(a)suse.com> M: Stefano Stabellini <sstabellini(a)kernel.org> L: xen-devel(a)lists.xenproject.org (moderated for non-subscribers) L: iommu(a)lists.linux-foundation.org +L: iommu(a)lists.linux.dev S: Supported F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* -- 2.36.1

3 years, 3 months

3
2
0 0

[PATCH v4] xen/gntdev: Avoid blocking in unmap_grant_pages()

by Demi Marie Obenour

unmap_grant_pages() currently waits for the pages to no longer be used. In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a deadlock against i915: i915 was waiting for gntdev's MMU notifier to finish, while gntdev was waiting for i915 to free its pages. I also believe this is responsible for various deadlocks I have experienced in the past. Avoid these problems by making unmap_grant_pages async. This requires making it return void, as any errors will not be available when the function returns. Fortunately, the only use of the return value is a WARN_ON(), which can be replaced by a WARN_ON when the error is detected. Additionally, a failed call will not prevent further calls from being made, but this is harmless. Because unmap_grant_pages is now async, the grant handle will be sent to INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same handle. Instead, a separate bool array is allocated for this purpose. This wastes memory, but stuffing this information in padding bytes is too fragile. Furthermore, it is necessary to grab a reference to the map before making the asynchronous call, and release the reference when the call returns. It is also necessary to guard against reentrancy in gntdev_map_put(), and to handle the case where userspace tries to map a mapping whose contents have not all been freed yet. Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use") Cc: stable(a)vger.kernel.org Signed-off-by: Demi Marie Obenour <demi(a)invisiblethingslab.com> --- drivers/xen/gntdev-common.h | 7 ++ drivers/xen/gntdev.c | 157 ++++++++++++++++++++++++------------ 2 files changed, 113 insertions(+), 51 deletions(-) diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 20d7d059dadb..40ef379c28ab 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -16,6 +16,7 @@ #include <linux/mmu_notifier.h> #include <linux/types.h> #include <xen/interface/event_channel.h> +#include <xen/grant_table.h> struct gntdev_dmabuf_priv; @@ -56,6 +57,7 @@ struct gntdev_grant_map { struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops; + bool *being_removed; struct page **pages; unsigned long pages_vm_start; @@ -73,6 +75,11 @@ struct gntdev_grant_map { /* Needed to avoid allocation in gnttab_dma_free_pages(). */ xen_pfn_t *frames; #endif + + /* Number of live grants */ + atomic_t live_grants; + /* Needed to avoid allocation in __unmap_grant_pages */ + struct gntab_unmap_queue_data unmap_data; }; struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 59ffea800079..4b56c39f766d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/refcount.h> +#include <linux/workqueue.h> #include <xen/xen.h> #include <xen/grant_table.h> @@ -60,10 +61,11 @@ module_param(limit, uint, 0644); MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by one mapping request"); +/* True in PV mode, false otherwise */ static int use_ptemod; -static int unmap_grant_pages(struct gntdev_grant_map *map, - int offset, int pages); +static void unmap_grant_pages(struct gntdev_grant_map *map, + int offset, int pages); static struct miscdevice gntdev_miscdev; @@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map) kvfree(map->unmap_ops); kvfree(map->kmap_ops); kvfree(map->kunmap_ops); + kvfree(map->being_removed); kfree(map); } @@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); + add->being_removed = + kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || - NULL == add->pages) + NULL == add->pages || + NULL == add->being_removed) goto err; if (use_ptemod) { add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]), @@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; - if (map->pages && !use_ptemod) + if (map->pages && !use_ptemod) { + /* + * Increment the reference count. This ensures that the + * subsequent call to unmap_grant_pages() will not wind up + * re-entering itself. It *can* wind up calling + * gntdev_put_map() recursively, but such calls will be with a + * reference count greater than 1, so they will return before + * this code is reached. The recursion depth is thus limited to + * 1. Do NOT use refcount_inc() here, as it will detect that + * the reference count is zero and WARN(). + */ + refcount_set(&map->users, 1); + + /* + * Unmap the grants. This may or may not be asynchronous, so it + * is possible that the reference count is 1 on return, but it + * could also be greater than 1. + */ unmap_grant_pages(map, 0, map->count); + /* Check if the memory now needs to be freed */ + if (!refcount_dec_and_test(&map->users)) + return; + + /* + * All pages have been returned to the hypervisor, so free the + * map. + */ + } + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) int gntdev_map_grant_pages(struct gntdev_grant_map *map) { + size_t alloced = 0; int i, err = 0; if (!use_ptemod) { @@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) map->count); for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status == GNTST_okay) + if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - else if (!err) + if (!use_ptemod) + alloced++; + } else if (!err) err = -EINVAL; if (map->flags & GNTMAP_device_map) map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; if (use_ptemod) { - if (map->kmap_ops[i].status == GNTST_okay) + if (map->kmap_ops[i].status == GNTST_okay) { + if (map->map_ops[i].status == GNTST_okay) + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; - else if (!err) + } else if (!err) err = -EINVAL; } } + atomic_add(alloced, &map->live_grants); return err; } -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages_done(int result, + struct gntab_unmap_queue_data *data) { - int i, err = 0; - struct gntab_unmap_queue_data unmap_data; - - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { - int pgno = (map->notify.addr >> PAGE_SHIFT); - if (pgno >= offset && pgno < offset + pages) { - /* No need for kmap, pages are in lowmem */ - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; - } - } - - unmap_data.unmap_ops = map->unmap_ops + offset; - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; - unmap_data.pages = map->pages + offset; - unmap_data.count = pages; - - err = gnttab_unmap_refs_sync(&unmap_data); - if (err) - return err; + unsigned int i; + struct gntdev_grant_map *map = data->data; + unsigned int offset = data->unmap_ops - map->unmap_ops; - for (i = 0; i < pages; i++) { - if (map->unmap_ops[offset+i].status) - err = -EINVAL; + for (i = 0; i < data->count; i++) { + WARN_ON(map->unmap_ops[offset+i].status); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - if (map->kunmap_ops[offset+i].status) - err = -EINVAL; + WARN_ON(map->kunmap_ops[offset+i].status); pr_debug("kunmap handle=%u st=%d\n", map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].status); map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } } - return err; + /* + * Decrease the live-grant counter. This must happen after the loop to + * prevent premature reuse of the grants by gnttab_mmap(). + */ + atomic_sub(data->count, &map->live_grants); + + /* Release reference taken by __unmap_grant_pages */ + gntdev_put_map(NULL, map); +} + +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) +{ + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { + int pgno = (map->notify.addr >> PAGE_SHIFT); + + if (pgno >= offset && pgno < offset + pages) { + /* No need for kmap, pages are in lowmem */ + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); + + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; + } + } + + map->unmap_data.unmap_ops = map->unmap_ops + offset; + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + map->unmap_data.pages = map->pages + offset; + map->unmap_data.count = pages; + map->unmap_data.done = __unmap_grant_pages_done; + map->unmap_data.data = map; + refcount_inc(&map->users); /* to keep map alive during async call below */ + + gnttab_unmap_refs_async(&map->unmap_data); } -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) { - int range, err = 0; + int range; + + if (atomic_read(&map->live_grants) == 0) + return; /* Nothing to do */ pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); /* It is possible the requested range will have a "hole" where we * already unmapped some of the grants. Only unmap valid ranges. */ - while (pages && !err) { - while (pages && - map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { + while (pages) { + while (pages && map->being_removed[offset]) { offset++; pages--; } range = 0; while (range < pages) { - if (map->unmap_ops[offset + range].handle == - INVALID_GRANT_HANDLE) + if (map->being_removed[offset + range]) break; + map->being_removed[offset + range] = true; range++; } - err = __unmap_grant_pages(map, offset, range); + if (range) + __unmap_grant_pages(map, offset, range); offset += range; pages -= range; } - - return err; } /* ------------------------------------------------------------------ */ @@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; - int err; if (!mmu_notifier_range_blockable(range)) return false; @@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, map->index, map->count, map->vma->vm_start, map->vma->vm_end, range->start, range->end, mstart, mend); - err = unmap_grant_pages(map, + unmap_grant_pages(map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); - WARN_ON(err); return true; } @@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; if (use_ptemod && map->vma) goto unlock_out; + if (atomic_read(&map->live_grants)) { + err = -EAGAIN; + goto unlock_out; + } refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; -- 2.36.1

3 years, 3 months

2
2
0 0

[PATCH v5 1/9] ia64, processor: fix -Wincompatible-pointer-types in ia64_get_irr()

by Alexander Lobakin

test_bit(), as any other bitmap op, takes `unsigned long *` as a second argument (pointer to the actual bitmap), as any bitmap itself is an array of unsigned longs. However, the ia64_get_irr() code passes a ref to `u64` as a second argument. This works with the ia64 bitops implementation due to that they have `void *` as the second argument and then cast it later on. This works with the bitmap API itself due to that `unsigned long` has the same size on ia64 as `u64` (`unsigned long long`), but from the compiler PoV those two are different. Define @irr as `unsigned long` to fix that. That implies no functional changes. Has been hidden for 16 years! Fixes: a58786917ce2 ("[IA64] avoid broken SAL_CACHE_FLUSH implementations") Cc: stable(a)vger.kernel.org # 2.6.16+ Reported-by: kernel test robot <lkp(a)intel.com> Signed-off-by: Alexander Lobakin <alexandr.lobakin(a)intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com> Reviewed-by: Yury Norov <yury.norov(a)gmail.com> --- arch/ia64/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 7cbce290f4e5..757c2f6d8d4b 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -538,7 +538,7 @@ ia64_get_irr(unsigned int vector) { unsigned int reg = vector / 64; unsigned int bit = vector % 64; - u64 irr; + unsigned long irr; switch (reg) { case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break; -- 2.36.1

3 years, 3 months

1
0
0 0

patch "usb: chipidea: udc: check request status before setting device" added to usb-linus

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled usb: chipidea: udc: check request status before setting device to my usb git tree which can be found at git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git in the usb-linus branch. The patch will show up in the next release of the linux-next tree (usually sometime within the next 24 hours during the week.) The patch will hopefully also be merged in Linus's tree for the next -rc kernel release. If you have any questions about this process, please let me know. From b24346a240b36cfc4df194d145463874985aa29b Mon Sep 17 00:00:00 2001 From: Xu Yang <xu.yang_2(a)nxp.com> Date: Thu, 23 Jun 2022 11:02:42 +0800 Subject: usb: chipidea: udc: check request status before setting device address The complete() function may be called even though request is not completed. In this case, it's necessary to check request status so as not to set device address wrongly. Fixes: 10775eb17bee ("usb: chipidea: udc: update gadget states according to ch9") cc: <stable(a)vger.kernel.org> Signed-off-by: Xu Yang <xu.yang_2(a)nxp.com> Link: https://lore.kernel.org/r/20220623030242.41796-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/usb/chipidea/udc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index dc6c96e04bcf..3b8bf6daf7d0 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1048,6 +1048,9 @@ isr_setup_status_complete(struct usb_ep *ep, struct usb_request *req) struct ci_hdrc *ci = req->context; unsigned long flags; + if (req->status < 0) + return; + if (ci->setaddr) { hw_usb_set_address(ci, ci->address); ci->setaddr = false; -- 2.36.1

3 years, 3 months

1
0
0 0

patch "USB: gadget: Fix double-free bug in raw_gadget driver" added to usb-linus

by gregkh＠linuxfoundation.org

This is a note to let you know that I've just added the patch titled USB: gadget: Fix double-free bug in raw_gadget driver to my usb git tree which can be found at git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git in the usb-linus branch. The patch will show up in the next release of the linux-next tree (usually sometime within the next 24 hours during the week.) The patch will hopefully also be merged in Linus's tree for the next -rc kernel release. If you have any questions about this process, please let me know. From 90bc2af24638659da56397ff835f3c95a948f991 Mon Sep 17 00:00:00 2001 From: Alan Stern <stern(a)rowland.harvard.edu> Date: Wed, 22 Jun 2022 10:46:31 -0400 Subject: USB: gadget: Fix double-free bug in raw_gadget driver Re-reading a recently merged fix to the raw_gadget driver showed that it inadvertently introduced a double-free bug in a failure pathway. If raw_ioctl_init() encounters an error after the driver ID number has been allocated, it deallocates the ID number before returning. But when dev_free() runs later on, it will then try to deallocate the ID number a second time. Closely related to this issue is another error in the recent fix: The ID number is stored in the raw_dev structure before the code checks to see whether the structure has already been initialized, in which case the new ID number would overwrite the earlier value. The solution to both bugs is to keep the new ID number in a local variable, and store it in the raw_dev structure only after the check for prior initialization. No errors can occur after that point, so the double-free will never happen. Fixes: f2d8c2606825 ("usb: gadget: Fix non-unique driver names in raw-gadget driver") CC: Andrey Konovalov <andreyknvl(a)gmail.com> CC: <stable(a)vger.kernel.org> Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu> Link: https://lore.kernel.org/r/YrMrRw5AyIZghN0v@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> --- drivers/usb/gadget/legacy/raw_gadget.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 5c8481cef35f..2acece16b890 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -430,6 +430,7 @@ static int raw_release(struct inode *inode, struct file *fd) static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) { int ret = 0; + int driver_id_number; struct usb_raw_init arg; char *udc_driver_name; char *udc_device_name; @@ -452,10 +453,9 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) return -EINVAL; } - ret = ida_alloc(&driver_id_numbers, GFP_KERNEL); - if (ret < 0) - return ret; - dev->driver_id_number = ret; + driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL); + if (driver_id_number < 0) + return driver_id_number; driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL); if (!driver_driver_name) { @@ -463,7 +463,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) goto out_free_driver_id_number; } snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX, - DRIVER_NAME ".%d", dev->driver_id_number); + DRIVER_NAME ".%d", driver_id_number); udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_driver_name) { @@ -507,6 +507,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) dev->driver.driver.name = driver_driver_name; dev->driver.udc_name = udc_device_name; dev->driver.match_existing_only = 1; + dev->driver_id_number = driver_id_number; dev->state = STATE_DEV_INITIALIZED; spin_unlock_irqrestore(&dev->lock, flags); @@ -521,7 +522,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) out_free_driver_driver_name: kfree(driver_driver_name); out_free_driver_id_number: - ida_free(&driver_id_numbers, dev->driver_id_number); + ida_free(&driver_id_numbers, driver_id_number); return ret; } -- 2.36.1

3 years, 3 months

1
0
0 0

[PATCH RESEND] tools/vm/slabinfo: Handle files in debugfs

by Stéphane Graber

Commit 64dd68497be76 relocated and renamed the alloc_calls and free_calls files from /sys/kernel/slab/NAME/*_calls over to /sys/kernel/debug/slab/NAME/*_calls but didn't update the slabinfo tool with the new location. This change will now have slabinfo look at the new location (and filenames) with a fallback to the prior files. Fixes: 64dd68497be76 ("mm: slub: move sysfs slab alloc/free interfaces to debugfs") Cc: stable(a)vger.kernel.org Signed-off-by: Stéphane Graber <stgraber(a)ubuntu.com> Tested-by: Stéphane Graber <stgraber(a)ubuntu.com> --- tools/vm/slabinfo.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 9b68658b6bb8..5b98f3ee58a5 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -233,6 +233,24 @@ static unsigned long read_slab_obj(struct slabinfo *s, const char *name) return l; } +static unsigned long read_debug_slab_obj(struct slabinfo *s, const char *name) +{ + char x[128]; + FILE *f; + size_t l; + + snprintf(x, 128, "/sys/kernel/debug/slab/%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} /* * Put a size string together @@ -409,14 +427,18 @@ static void show_tracking(struct slabinfo *s) { printf("\n%s: Kernel object allocation\n", s->name); printf("-----------------------------------------------------------------------\n"); - if (read_slab_obj(s, "alloc_calls")) + if (read_debug_slab_obj(s, "alloc_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "alloc_calls")) printf("%s", buffer); else printf("No Data\n"); printf("\n%s: Kernel object freeing\n", s->name); printf("------------------------------------------------------------------------\n"); - if (read_slab_obj(s, "free_calls")) + if (read_debug_slab_obj(s, "free_traces")) + printf("%s", buffer); + else if (read_slab_obj(s, "free_calls")) printf("%s", buffer); else printf("No Data\n"); -- 2.34.1

3 years, 3 months

2
1
0 0

Re: [PATCH v2 2/2] fsnotify: consistent behavior for parent not watching children

by Amir Goldstein

On Wed, May 11, 2022 at 10:02 PM Amir Goldstein <amir73il(a)gmail.com> wrote: > > The logic for handling events on child in groups that have a mark on > the parent inode, but without FS_EVENT_ON_CHILD flag in the mask is > duplicated in several places and inconsistent. > > Move the logic into the preparation of mark type iterator, so that the > parent mark type will be excluded from all mark type iterations in that > case. > > This results in several subtle changes of behavior, hopefully all > desired changes of behavior, for example: > > - Group A has a mount mark with FS_MODIFY in mask > - Group A has a mark with ignore mask that does not survive FS_MODIFY > and does not watch children on directory D. > - Group B has a mark with FS_MODIFY in mask that does watch children > on directory D. > - FS_MODIFY event on file D/foo should not clear the ignore mask of > group A, but before this change it does > > And if group A ignore mask was set to survive FS_MODIFY: > - FS_MODIFY event on file D/foo should be reported to group A on account > of the mount mark, but before this change it is wrongly ignored > > Fixes: 2f02fd3fa13e ("fanotify: fix ignore mask logic for events on child and on dir") > Reported-by: Jan Kara <jack(a)suse.com> > Link: https://lore.kernel.org/linux-fsdevel/20220314113337.j7slrb5srxukztje@quack… > Signed-off-by: Amir Goldstein <amir73il(a)gmail.com> > --- Greg, FYI, this needs the previous commit to apply to 5.18.y: e730558adffb fsnotify: consistent behavior for parent not watching children 14362a254179 fsnotify: introduce mark type iterator They won't apply to earlier versions and this is a fix for a very minor bug that existed forever, so no need to bother. Thanks, Amir.

3 years, 3 months

2
4
0 0

[PATCH v3 1/1] iommu/vt-d: Fix RID2PASID setup/teardown failure

by Lu Baolu

The IOMMU driver shares the pasid table for PCI alias devices. When the RID2PASID entry of the shared pasid table has been filled by the first device, the subsequent device will encounter the "DMAR: Setup RID2PASID failed" failure as the pasid entry has already been marked as present. As the result, the IOMMU probing process will be aborted. On the contrary, when any alias device is hot-removed from the system, for example, by writing to /sys/bus/pci/devices/.../remove, the shared RID2PASID will be cleared without any notifications to other devices. As the result, any DMAs from those rest devices are blocked. Sharing pasid table among PCI alias devices could save two memory pages for devices underneath the PCIe-to-PCI bridges. Anyway, considering that those devices are rare on modern platforms that support VT-d in scalable mode and the saved memory is negligible, it's reasonable to remove this part of immature code to make the driver feasible and stable. Fixes: ef848b7e5a6a0 ("iommu/vt-d: Setup pasid entry for RID2PASID support") Reported-by: Chenyi Qiang <chenyi.qiang(a)intel.com> Reported-by: Ethan Zhao <haifeng.zhao(a)linux.intel.com> Cc: stable(a)vger.kernel.org Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com> --- include/linux/intel-iommu.h | 3 -- drivers/iommu/intel/pasid.h | 1 - drivers/iommu/intel/iommu.c | 24 ------------- drivers/iommu/intel/pasid.c | 69 ++----------------------------------- 4 files changed, 3 insertions(+), 94 deletions(-) Change log: v3: - Ethan pointed out that there's also problem in the device release path. Let's remove this part of immature code for now. v2: - Add domain validity check in RID2PASID entry setup. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4f29139bbfc3..5fcf89faa31a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -612,7 +612,6 @@ struct intel_iommu { struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ - struct list_head table; /* link to pasid table */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ @@ -729,8 +728,6 @@ extern int dmar_ir_support(void); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); -int for_each_device_domain(int (*fn)(struct device_domain_info *info, - void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 583ea67fc783..bf5b937848b4 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -74,7 +74,6 @@ struct pasid_table { void *table; /* pasid table pointer */ int order; /* page order of pasid table */ u32 max_pasid; /* max pasid */ - struct list_head dev; /* device list */ }; /* Get PRESENT bit of a PASID directory entry. */ diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 44016594831d..5c0dce78586a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -320,30 +320,6 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); -/* - * Iterate over elements in device_domain_list and call the specified - * callback @fn against each element. - */ -int for_each_device_domain(int (*fn)(struct device_domain_info *info, - void *data), void *data) -{ - int ret = 0; - unsigned long flags; - struct device_domain_info *info; - - spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(info, &device_domain_list, global) { - ret = fn(info, data); - if (ret) { - spin_unlock_irqrestore(&device_domain_lock, flags); - return ret; - } - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - const struct iommu_ops intel_iommu_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index cb4c1d0cf25c..17cad7c1f62d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -86,54 +86,6 @@ void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid) /* * Per device pasid table management: */ -static inline void -device_attach_pasid_table(struct device_domain_info *info, - struct pasid_table *pasid_table) -{ - info->pasid_table = pasid_table; - list_add(&info->table, &pasid_table->dev); -} - -static inline void -device_detach_pasid_table(struct device_domain_info *info, - struct pasid_table *pasid_table) -{ - info->pasid_table = NULL; - list_del(&info->table); -} - -struct pasid_table_opaque { - struct pasid_table **pasid_table; - int segment; - int bus; - int devfn; -}; - -static int search_pasid_table(struct device_domain_info *info, void *opaque) -{ - struct pasid_table_opaque *data = opaque; - - if (info->iommu->segment == data->segment && - info->bus == data->bus && - info->devfn == data->devfn && - info->pasid_table) { - *data->pasid_table = info->pasid_table; - return 1; - } - - return 0; -} - -static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) -{ - struct pasid_table_opaque *data = opaque; - - data->segment = pci_domain_nr(pdev->bus); - data->bus = PCI_BUS_NUM(alias); - data->devfn = alias & 0xff; - - return for_each_device_domain(&search_pasid_table, data); -} /* * Allocate a pasid table for @dev. It should be called in a @@ -143,28 +95,18 @@ int intel_pasid_alloc_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; - struct pasid_table_opaque data; struct page *pages; u32 max_pasid = 0; - int ret, order; - int size; + int order, size; might_sleep(); info = dev_iommu_priv_get(dev); if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; - /* DMA alias device already has a pasid table, use it: */ - data.pasid_table = &pasid_table; - ret = pci_for_each_dma_alias(to_pci_dev(dev), - &get_alias_pasid_table, &data); - if (ret) - goto attach_out; - pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) return -ENOMEM; - INIT_LIST_HEAD(&pasid_table->dev); if (info->pasid_supported) max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), @@ -182,9 +124,7 @@ int intel_pasid_alloc_table(struct device *dev) pasid_table->table = page_address(pages); pasid_table->order = order; pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); - -attach_out: - device_attach_pasid_table(info, pasid_table); + info->pasid_table = pasid_table; return 0; } @@ -202,10 +142,7 @@ void intel_pasid_free_table(struct device *dev) return; pasid_table = info->pasid_table; - device_detach_pasid_table(info, pasid_table); - - if (!list_empty(&pasid_table->dev)) - return; + info->pasid_table = NULL; /* Free scalable mode PASID directory tables: */ dir = pasid_table->table; -- 2.25.1

3 years, 3 months

4
4
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror June 2022