Linaro-mm-sig

linaro-mm-sig@lists.linaro.org

43 participants
6398 discussions

Re: [PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage

by Byungchul Park

On Thu, Oct 02, 2025 at 05:12:44PM +0900, Byungchul Park wrote: > False positive reports have been observed since dept works with the > assumption that all the pages have the same dept class, but the class > should be split since the problematic call paths are different depending > on what the page is used for. > > At least, ones in block device's address_space and ones in regular > file's address_space have exclusively different usages. > > Thus, define usage candidates like: > > DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */ > DEPT_PAGE_BDEV_CACHE /* page in block device's address_space */ > DEPT_PAGE_DEFAULT /* the others */ 1. I'd like to annotate a page to DEPT_PAGE_REGFILE_CACHE when the page starts to be associated with a page cache for fs data. 2. And I'd like to annotate a page to DEPT_PAGE_BDEV_CACHE when the page starts to be associated with meta data of fs e.g. super block. 3. Lastly, I'd like to reset the annotated value if any, that has been set in the page, when the page ends the assoication with either page cache or meta block of fs e.g. freeing the page. Can anyone suggest good places in code for the annotation 1, 2, 3? It'd be totally appreciated. :-) Byungchul > Introduce APIs to set each page's usage properly and make sure not to > interact between at least between DEPT_PAGE_REGFILE_CACHE and > DEPT_PAGE_BDEV_CACHE. However, besides the exclusive usages, allow any > other combinations to interact to the other for example: > > PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for > DEPT_PAGE_REGFILE_CACHE page and vice versa. > > PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for > DEPT_PAGE_BDEV_CACHE page and vice versa. > > PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for > DEPT_PAGE_DEFAULT page. > > Signed-off-by: Byungchul Park <byungchul(a)sk.com> > --- > include/linux/dept.h | 31 +++++++++++++++- > include/linux/mm_types.h | 1 + > include/linux/page-flags.h | 76 +++++++++++++++++++++++++++++++++++++- > 3 files changed, 104 insertions(+), 4 deletions(-) > > diff --git a/include/linux/dept.h b/include/linux/dept.h > index 0ac13129f308..fbbc41048fac 100644 > --- a/include/linux/dept.h > +++ b/include/linux/dept.h > @@ -21,8 +21,8 @@ struct task_struct; > #define DEPT_MAX_WAIT_HIST 64 > #define DEPT_MAX_ECXT_HELD 48 > > -#define DEPT_MAX_SUBCLASSES 16 > -#define DEPT_MAX_SUBCLASSES_EVT 2 > +#define DEPT_MAX_SUBCLASSES 24 > +#define DEPT_MAX_SUBCLASSES_EVT 3 > #define DEPT_MAX_SUBCLASSES_USR (DEPT_MAX_SUBCLASSES / DEPT_MAX_SUBCLASSES_EVT) > #define DEPT_MAX_SUBCLASSES_CACHE 2 > > @@ -390,6 +390,32 @@ struct dept_ext_wgen { > unsigned int wgen; > }; > > +enum { > + DEPT_PAGE_DEFAULT = 0, > + DEPT_PAGE_REGFILE_CACHE, /* regular file page cache */ > + DEPT_PAGE_BDEV_CACHE, /* block device cache */ > + DEPT_PAGE_USAGE_NR, /* nr of usages options */ > +}; > + > +#define DEPT_PAGE_USAGE_SHIFT 16 > +#define DEPT_PAGE_USAGE_MASK ((1U << DEPT_PAGE_USAGE_SHIFT) - 1) > +#define DEPT_PAGE_USAGE_PENDING_MASK (DEPT_PAGE_USAGE_MASK << DEPT_PAGE_USAGE_SHIFT) > + > +/* > + * Identify each page's usage type > + */ > +struct dept_page_usage { > + /* > + * low 16 bits : the current usage type > + * high 16 bits : usage type requested to be set > + * > + * Do not apply the type requested immediately but defer until > + * after clearing PG_locked bit of the folio or page e.g. by > + * folio_unlock(). > + */ > + atomic_t type; /* Update and read atomically */ > +}; > + > struct dept_event_site { > /* > * event site name > @@ -562,6 +588,7 @@ extern void dept_hardirqs_off(void); > struct dept_key { }; > struct dept_map { }; > struct dept_ext_wgen { }; > +struct dept_page_usage { }; > struct dept_event_site { }; > > #define DEPT_MAP_INITIALIZER(n, k) { } > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 5ebc565309af..8ccbb030500c 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -224,6 +224,7 @@ struct page { > struct page *kmsan_shadow; > struct page *kmsan_origin; > #endif > + struct dept_page_usage usage; > struct dept_ext_wgen pg_locked_wgen; > } _struct_page_alignment; > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index d3c4954c4218..3fd3660ddc6f 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -204,6 +204,68 @@ enum pageflags { > > extern struct dept_map pg_locked_map; > > +static inline int dept_set_page_usage(struct page *p, > + unsigned int new_type) > +{ > + unsigned int type = atomic_read(&p->usage.type); > + > + if (WARN_ON_ONCE(new_type >= DEPT_PAGE_USAGE_NR)) > + return -1; > + > + new_type <<= DEPT_PAGE_USAGE_SHIFT; > +retry: > + new_type &= ~DEPT_PAGE_USAGE_MASK; > + new_type |= type & DEPT_PAGE_USAGE_MASK; > + > + if (!atomic_try_cmpxchg(&p->usage.type, &type, new_type)) > + goto retry; > + > + return 0; > +} > + > +static inline int dept_reset_page_usage(struct page *p) > +{ > + return dept_set_page_usage(p, DEPT_PAGE_DEFAULT); > +} > + > +static inline void dept_update_page_usage(struct page *p) > +{ > + unsigned int type = atomic_read(&p->usage.type); > + unsigned int new_type; > + > +retry: > + new_type = type & DEPT_PAGE_USAGE_PENDING_MASK; > + new_type >>= DEPT_PAGE_USAGE_SHIFT; > + new_type |= type & DEPT_PAGE_USAGE_PENDING_MASK; > + > + /* > + * Already updated by others. > + */ > + if (type == new_type) > + return; > + > + if (!atomic_try_cmpxchg(&p->usage.type, &type, new_type)) > + goto retry; > +} > + > +static inline unsigned long dept_event_flags(struct page *p, bool wait) > +{ > + unsigned int type; > + > + type = atomic_read(&p->usage.type) & DEPT_PAGE_USAGE_MASK; > + > + if (WARN_ON_ONCE(type >= DEPT_PAGE_USAGE_NR)) > + return 0; > + > + /* > + * event > + */ > + if (!wait) > + return 1UL << type; > + > + return (1UL << DEPT_PAGE_DEFAULT) | (1UL << type); > +} > + > /* > * Place the following annotations in its suitable point in code: > * > @@ -214,20 +276,28 @@ extern struct dept_map pg_locked_map; > > static inline void dept_page_set_bit(struct page *p, int bit_nr) > { > + dept_update_page_usage(p); > if (bit_nr == PG_locked) > dept_request_event(&pg_locked_map, &p->pg_locked_wgen); > } > > static inline void dept_page_clear_bit(struct page *p, int bit_nr) > { > + unsigned long evt_f; > + > + evt_f = dept_event_flags(p, false); > if (bit_nr == PG_locked) > - dept_event(&pg_locked_map, 1UL, _RET_IP_, __func__, &p->pg_locked_wgen); > + dept_event(&pg_locked_map, evt_f, _RET_IP_, __func__, &p->pg_locked_wgen); > } > > static inline void dept_page_wait_on_bit(struct page *p, int bit_nr) > { > + unsigned long evt_f; > + > + dept_update_page_usage(p); > + evt_f = dept_event_flags(p, true); > if (bit_nr == PG_locked) > - dept_wait(&pg_locked_map, 1UL, _RET_IP_, __func__, 0, -1L); > + dept_wait(&pg_locked_map, evt_f, _RET_IP_, __func__, 0, -1L); > } > > static inline void dept_folio_set_bit(struct folio *f, int bit_nr) > @@ -245,6 +315,8 @@ static inline void dept_folio_wait_on_bit(struct folio *f, int bit_nr) > dept_page_wait_on_bit(&f->page, bit_nr); > } > #else > +#define dept_set_page_usage(p, t) do { } while (0) > +#define dept_reset_page_usage(p) do { } while (0) > #define dept_page_set_bit(p, bit_nr) do { } while (0) > #define dept_page_clear_bit(p, bit_nr) do { } while (0) > #define dept_page_wait_on_bit(p, bit_nr) do { } while (0) > -- > 2.17.1

5 months, 1 week

[PATCH 0/6] dma-fence: Remove return code of dma_fence_signal() et al.

by Philipp Stanner

Barely anyone uses dma_fence_signal()'s (and similar functions') return code. Checking it is pretty much useless anyways, because what are you going to do if a fence was already signal it? Unsignal it and signal it again? ;p Removing the return code simplifies the API and makes it easier for me to sit on top with Rust DmaFence. Philipp Stanner (6): dma-buf/dma-fence: Add dma_fence_test_signaled_flag() amd/amdkfd: Ignore return code of dma_fence_signal() drm/gpu/xe: Ignore dma_fenc_signal() return code dma-buf: Don't misuse dma_fence_signal() drm/ttm: Remove return check of dma_fence_signal() dma-buf/dma-fence: Remove return code of signaling-functions drivers/dma-buf/dma-fence.c | 59 ++++++------------- drivers/dma-buf/st-dma-fence.c | 7 +-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 +- .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 3 +- drivers/gpu/drm/xe/xe_hw_fence.c | 5 +- include/linux/dma-fence.h | 33 ++++++++--- 6 files changed, 53 insertions(+), 59 deletions(-) -- 2.49.0

5 months, 2 weeks

[PATCH] dma-buf: fix integer overflow in fill_sg_entry() for buffers >= 8GiB

by Alex Mastro

fill_sg_entry() splits large DMA buffers into multiple scatter-gather entries, each holding up to UINT_MAX bytes. When calculating the DMA address for entries beyond the second one, the expression (i * UINT_MAX) causes integer overflow due to 32-bit arithmetic. This manifests when the input arg length >= 8 GiB results in looping for i >= 2. Fix by casting i to dma_addr_t before multiplication. Fixes: 3aa31a8bb11e ("dma-buf: provide phys_vec to scatter-gather mapping routine") Signed-off-by: Alex Mastro <amastro(a)fb.com> --- More color about how I discovered this in [1] for the commit at [2]: [1] https://lore.kernel.org/all/aSZHO6otK0Heh+Qj@devgpu015.cco6.facebook.com [2] https://lore.kernel.org/all/20251120-dmabuf-vfio-v9-6-d7f71607f371@nvidia.c… --- drivers/dma-buf/dma-buf-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-buf-mapping.c b/drivers/dma-buf/dma-buf-mapping.c index b4819811a64a..b7352e609fbd 100644 --- a/drivers/dma-buf/dma-buf-mapping.c +++ b/drivers/dma-buf/dma-buf-mapping.c @@ -24,7 +24,7 @@ static struct scatterlist *fill_sg_entry(struct scatterlist *sgl, size_t length, * does not require the CPU list for mapping or unmapping. */ sg_set_page(sgl, NULL, 0, 0); - sg_dma_address(sgl) = addr + i * UINT_MAX; + sg_dma_address(sgl) = addr + (dma_addr_t)i * UINT_MAX; sg_dma_len(sgl) = len; sgl = sg_next(sgl); } --- base-commit: 5415d887db0e059920cb5673a32cc4d66daa280f change-id: 20251125-dma-buf-overflow-e3253f108e36 Best regards, -- Alex Mastro <amastro(a)fb.com>

5 months, 2 weeks

Independence for dma_fences! v3

by Christian König

Hi everyone, dma_fences have ever lived under the tyranny dictated by the module lifetime of their issuer, leading to crashes should anybody still holding a reference to a dma_fence when the module of the issuer was unloaded. The basic problem is that when buffer are shared between drivers dma_fence objects can leak into external drivers and stay there even after they are signaled. The dma_resv object for example only lazy releases dma_fences. So what happens is that when the module who originally created the dma_fence unloads the dma_fence_ops function table becomes unavailable as well and so any attempt to release the fence crashes the system. Previously various approaches have been discussed, including changing the locking semantics of the dma_fence callbacks (by me) as well as using the drm scheduler as intermediate layer (by Sima) to disconnect dma_fences from their actual users, but none of them are actually solving all problems. Tvrtko did some really nice prerequisite work by protecting the returned strings of the dma_fence_ops by RCU. This way dma_fence creators where able to just wait for an RCU grace period after fence signaling before they could be save to free those data structures. Now this patch set here goes a step further and protects the whole dma_fence_ops structure by RCU, so that after the fence signals the pointer to the dma_fence_ops is set to NULL when there is no wait nor release callback given. All functionality which use the dma_fence_ops reference are put inside an RCU critical section, except for the deprecated issuer specific wait and of course the optional release callback. Additional to the RCU changes the lock protecting the dma_fence state previously had to be allocated external. This set here now changes the functionality to make that external lock optional and allows dma_fences to use an inline lock and be self contained. This patch set addressed all previous code review comments and is based on drm-tip, includes my changes for amdgpu as well as Mathew's patches for XE. Going to push the core DMA-buf changes to drm-misc-next as soon as I get the appropriate rb. The driver specific changes can go upstream through the driver channels as necessary. Please review and comment, Christian.

5 months, 2 weeks

[PATCH] dma-buf: add no-op stubs when CONFIG_DMA_SHARED_BUFFER is disabled

by viresh.kumar＠linaro.org

The content of this message was lost. It was probably cross-posted to multiple lists and previously handled on another list.

5 months, 2 weeks

[PATCH v9 00/11] vfio/pci: Allow MMIO regions to be exported through dma-buf

by Leon Romanovsky

Changelog: v9: * Added Reviewed-by tags. * Fixes to p2pdma documentation. * Renamed dma_buf_map and unmap. * Moved them to separate file. * Used nvgrace_gpu_memregion() function instead of open-coded variant. * Paired get_file_active() with fput(). v8: https://patch.msgid.link/20251111-dmabuf-vfio-v8-0-fd9aa5df478f@nvidia.com * Fixed spelling errors in p2pdma documentation file. * Added vdev->pci_ops check for NULL in vfio_pci_core_feature_dma_buf(). * Simplified the nvgrace_get_dmabuf_phys() function. * Added extra check in pcim_p2pdma_provider() to catch missing call to pcim_p2pdma_init(). v7: https://patch.msgid.link/20251106-dmabuf-vfio-v7-0-2503bf390699@nvidia.com * Dropped restore_revoke flag and added vfio_pci_dma_buf_move to reverse loop. * Fixed spelling errors in documentation patch. * Rebased on top of v6.18-rc3. * Added include to stddef.h to vfio.h, to keep uapi header file independent. v6: https://patch.msgid.link/20251102-dmabuf-vfio-v6-0-d773cff0db9f@nvidia.com * Fixed wrong error check from pcim_p2pdma_init(). * Documented pcim_p2pdma_provider() function. * Improved commit messages. * Added VFIO DMA-BUF selftest, not sent yet. * Added __counted_by(nr_ranges) annotation to struct vfio_device_feature_dma_buf. * Fixed error unwind when dma_buf_fd() fails. * Document latest changes to p2pmem. * Removed EXPORT_SYMBOL_GPL from pci_p2pdma_map_type. * Moved DMA mapping logic to DMA-BUF. * Removed types patch to avoid dependencies between subsystems. * Moved vfio_pci_dma_buf_move() in err_undo block. * Added nvgrace patch. v5: https://lore.kernel.org/all/cover.1760368250.git.leon@kernel.org * Rebased on top of v6.18-rc1. * Added more validation logic to make sure that DMA-BUF length doesn't overflow in various scenarios. * Hide kernel config from the users. * Fixed type conversion issue. DMA ranges are exposed with u64 length, but DMA-BUF uses "unsigned int" as a length for SG entries. * Added check to prevent from VFIO drivers which reports BAR size different from PCI, do not use DMA-BUF functionality. v4: https://lore.kernel.org/all/cover.1759070796.git.leon@kernel.org * Split pcim_p2pdma_provider() to two functions, one that initializes array of providers and another to return right provider pointer. v3: https://lore.kernel.org/all/cover.1758804980.git.leon@kernel.org * Changed pcim_p2pdma_enable() to be pcim_p2pdma_provider(). * Cache provider in vfio_pci_dma_buf struct instead of BAR index. * Removed misleading comment from pcim_p2pdma_provider(). * Moved MMIO check to be in pcim_p2pdma_provider(). v2: https://lore.kernel.org/all/cover.1757589589.git.leon@kernel.org/ * Added extra patch which adds new CONFIG, so next patches can reuse * it. * Squashed "PCI/P2PDMA: Remove redundant bus_offset from map state" into the other patch. * Fixed revoke calls to be aligned with true->false semantics. * Extended p2pdma_providers to be per-BAR and not global to whole * device. * Fixed possible race between dmabuf states and revoke. * Moved revoke to PCI BAR zap block. v1: https://lore.kernel.org/all/cover.1754311439.git.leon@kernel.org * Changed commit messages. * Reused DMA_ATTR_MMIO attribute. * Returned support for multiple DMA ranges per-dMABUF. v0: https://lore.kernel.org/all/cover.1753274085.git.leonro@nvidia.com --------------------------------------------------------------------------- Based on "[PATCH v6 00/16] dma-mapping: migrate to physical address-based API" https://lore.kernel.org/all/cover.1757423202.git.leonro@nvidia.com/ series. --------------------------------------------------------------------------- This series extends the VFIO PCI subsystem to support exporting MMIO regions from PCI device BARs as dma-buf objects, enabling safe sharing of non-struct page memory with controlled lifetime management. This allows RDMA and other subsystems to import dma-buf FDs and build them into memory regions for PCI P2P operations. The series supports a use case for SPDK where a NVMe device will be owned by SPDK through VFIO but interacting with a RDMA device. The RDMA device may directly access the NVMe CMB or directly manipulate the NVMe device's doorbell using PCI P2P. However, as a general mechanism, it can support many other scenarios with VFIO. This dmabuf approach can be usable by iommufd as well for generic and safe P2P mappings. In addition to the SPDK use-case mentioned above, the capability added in this patch series can also be useful when a buffer (located in device memory such as VRAM) needs to be shared between any two dGPU devices or instances (assuming one of them is bound to VFIO PCI) as long as they are P2P DMA compatible. The implementation provides a revocable attachment mechanism using dma-buf move operations. MMIO regions are normally pinned as BARs don't change physical addresses, but access is revoked when the VFIO device is closed or a PCI reset is issued. This ensures kernel self-defense against potentially hostile userspace. The series includes significant refactoring of the PCI P2PDMA subsystem to separate core P2P functionality from memory allocation features, making it more modular and suitable for VFIO use cases that don't need struct page support. ----------------------------------------------------------------------- The series is based originally on https://lore.kernel.org/all/20250307052248.405803-1-vivek.kasireddy@intel.c… but heavily rewritten to be based on DMA physical API. ----------------------------------------------------------------------- The WIP branch can be found here: https://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma.git/log/?h=… Thanks --- Jason Gunthorpe (2): PCI/P2PDMA: Document DMABUF model vfio/nvgrace: Support get_dmabuf_phys Leon Romanovsky (7): PCI/P2PDMA: Separate the mmap() support from the core logic PCI/P2PDMA: Simplify bus address mapping API PCI/P2PDMA: Refactor to separate core P2P functionality from memory allocation PCI/P2PDMA: Provide an access to pci_p2pdma_map_type() function dma-buf: provide phys_vec to scatter-gather mapping routine vfio/pci: Enable peer-to-peer DMA transactions by default vfio/pci: Add dma-buf export support for MMIO regions Vivek Kasireddy (2): vfio: Export vfio device get and put registration helpers vfio/pci: Share the core device pointer while invoking feature functions Documentation/driver-api/pci/p2pdma.rst | 97 +++++++--- block/blk-mq-dma.c | 2 +- drivers/dma-buf/Makefile | 2 +- drivers/dma-buf/dma-buf-mapping.c | 248 +++++++++++++++++++++++++ drivers/iommu/dma-iommu.c | 4 +- drivers/pci/p2pdma.c | 186 ++++++++++++++----- drivers/vfio/pci/Kconfig | 3 + drivers/vfio/pci/Makefile | 1 + drivers/vfio/pci/nvgrace-gpu/main.c | 52 ++++++ drivers/vfio/pci/vfio_pci.c | 5 + drivers/vfio/pci/vfio_pci_config.c | 22 ++- drivers/vfio/pci/vfio_pci_core.c | 53 ++++-- drivers/vfio/pci/vfio_pci_dmabuf.c | 316 ++++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_priv.h | 23 +++ drivers/vfio/vfio_main.c | 2 + include/linux/dma-buf-mapping.h | 17 ++ include/linux/dma-buf.h | 11 ++ include/linux/pci-p2pdma.h | 120 +++++++----- include/linux/vfio.h | 2 + include/linux/vfio_pci_core.h | 42 +++++ include/uapi/linux/vfio.h | 28 +++ kernel/dma/direct.c | 4 +- mm/hmm.c | 2 +- 23 files changed, 1101 insertions(+), 141 deletions(-) --- base-commit: dcb6fa37fd7bc9c3d2b066329b0d27dedf8becaa change-id: 20251016-dmabuf-vfio-6cef732adf5a Best regards, -- Leon Romanovsky <leonro(a)nvidia.com>

5 months, 2 weeks

[PATCH v2] dma-buf: add some tracepoints to debug.

by Xiang Gao

From: gaoxiang17 <gaoxiang17(a)xiaomi.com> I want to track the status of dmabuf in real time in the production environment. But now we can only check it by traversing the fd in the process or dmabuf_list. For example: <...>-8342 [006] ..... 199.626433: dma_buf_export: exp_name=system name=(null) size=32768 ino=2337 f_refcnt=2 <...>-8342 [006] ..... 199.626436: dma_buf_fd: exp_name=system name=(null) size=32768 ino=2337 fd=853 f_refcnt=2 <...>-8342 [006] ..... 199.626472: dma_buf_mmap_internal: exp_name=system name=system size=32768 ino=2337 f_refcnt=6 <...>-3199 [006] ..... 200.719182: dma_buf_get: exp_name=qcom,system name=acb size=184320 ino=2331 fd=111 f_refcnt=6 <...>-894 [006] ..... 199.632342: dma_buf_put: exp_name=system name=system size=32768 ino=2337 f_refcnt=2 <...>-3199 [003] ..... 213.402200: dma_buf_attach: dev_name=soc:qcom,xxx exp_name=qcom,system name=acb size=184320 ino=2331 f_refcnt=7 <...>-1229 [004] ..... 213.850270: dma_buf_detach: exp_name=qcom,system name=acb size=184320 ino=2331 f_refcnt=6 Signed-off-by: Xiang Gao <gaoxiang17(a)xiaomi.com> --- drivers/dma-buf/dma-buf.c | 19 +++ include/trace/events/dma_buf.h | 281 +++++++++++++++++++++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 include/trace/events/dma_buf.h diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 2bcf9ceca997..8b5af73f0218 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -35,6 +35,9 @@ #include "dma-buf-sysfs-stats.h" +#define CREATE_TRACE_POINTS +#include <trace/events/dma_buf.h> + static inline int is_dma_buf_file(struct file *); static DEFINE_MUTEX(dmabuf_list_mutex); @@ -220,6 +223,8 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma) dmabuf->size >> PAGE_SHIFT) return -EINVAL; + trace_dma_buf_mmap_internal(dmabuf); + return dmabuf->ops->mmap(dmabuf, vma); } @@ -745,6 +750,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) __dma_buf_list_add(dmabuf); + trace_dma_buf_export(dmabuf); + return dmabuf; err_dmabuf: @@ -779,6 +786,8 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags) fd_install(fd, dmabuf->file); + trace_dma_buf_fd(dmabuf, fd); + return fd; } EXPORT_SYMBOL_NS_GPL(dma_buf_fd, "DMA_BUF"); @@ -805,6 +814,8 @@ struct dma_buf *dma_buf_get(int fd) return ERR_PTR(-EINVAL); } + trace_dma_buf_get(fd, file); + return file->private_data; } EXPORT_SYMBOL_NS_GPL(dma_buf_get, "DMA_BUF"); @@ -825,6 +836,8 @@ void dma_buf_put(struct dma_buf *dmabuf) return; fput(dmabuf->file); + + trace_dma_buf_put(dmabuf); } EXPORT_SYMBOL_NS_GPL(dma_buf_put, "DMA_BUF"); @@ -998,6 +1011,8 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, "DMA_BUF"); struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev) { + trace_dma_buf_attach(dmabuf, dev); + return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL); } EXPORT_SYMBOL_NS_GPL(dma_buf_attach, "DMA_BUF"); @@ -1024,6 +1039,8 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) dmabuf->ops->detach(dmabuf, attach); kfree(attach); + + trace_dma_buf_detach(dmabuf); } EXPORT_SYMBOL_NS_GPL(dma_buf_detach, "DMA_BUF"); @@ -1488,6 +1505,8 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, vma_set_file(vma, dmabuf->file); vma->vm_pgoff = pgoff; + trace_dma_buf_mmap(dmabuf); + return dmabuf->ops->mmap(dmabuf, vma); } EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, "DMA_BUF"); diff --git a/include/trace/events/dma_buf.h b/include/trace/events/dma_buf.h new file mode 100644 index 000000000000..ab593dea4617 --- /dev/null +++ b/include/trace/events/dma_buf.h @@ -0,0 +1,281 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dma_buf + +#if !defined(_TRACE_DMA_BUF_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DMA_BUF_H + +#include <linux/dma-buf.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(dma_buf_export, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(long, f_refcnt) + ), + + TP_fast_assign( + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->f_refcnt = file_count(dmabuf->file); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu f_refcnt=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->f_refcnt) +); + +TRACE_EVENT(dma_buf_fd, + + TP_PROTO(struct dma_buf *dmabuf, int fd), + + TP_ARGS(dmabuf, fd), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(int, fd) + __field(long, f_refcnt) + ), + + TP_fast_assign( + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->fd = fd; + __entry->f_refcnt = file_count(dmabuf->file); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu fd=%d f_refcnt=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->fd, + __entry->f_refcnt) +); + +TRACE_EVENT(dma_buf_mmap_internal, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(long, f_refcnt) + ), + + TP_fast_assign( + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->f_refcnt = file_count(dmabuf->file); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu f_refcnt=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->f_refcnt) +); + +TRACE_EVENT(dma_buf_mmap, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(long, f_refcnt) + ), + + TP_fast_assign( + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->f_refcnt = file_count(dmabuf->file); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu f_refcnt=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->f_refcnt) +); + +TRACE_EVENT(dma_buf_attach, + + TP_PROTO(struct dma_buf *dmabuf, struct device *dev), + + TP_ARGS(dmabuf, dev), + + TP_STRUCT__entry( + __string(dname, dev_name(dev)) + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(long, f_refcnt) + ), + + TP_fast_assign( + __assign_str(dname); + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->f_refcnt = file_count(dmabuf->file); + ), + + TP_printk("dev_name=%s exp_name=%s name=%s size=%zu ino=%lu f_refcnt=%ld", + __get_str(dname), + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->f_refcnt) +); + +TRACE_EVENT(dma_buf_detach, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(long, f_refcnt) + ), + + TP_fast_assign( + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->f_refcnt = file_count(dmabuf->file); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu f_refcnt=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->f_refcnt) +); + +TRACE_EVENT(dma_buf_get, + + TP_PROTO(int fd, struct file *file), + + TP_ARGS(fd, file), + + TP_STRUCT__entry( + __string(exp_name, ((struct dma_buf *)file->private_data)->exp_name) + __string(name, ((struct dma_buf *)file->private_data)->name) + __field(size_t, size) + __field(ino_t, ino) + __field(int, fd) + __field(long, f_refcnt) + ), + + TP_fast_assign( + struct dma_buf *dmabuf = (struct dma_buf *)file->private_data; + + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->fd = fd; + __entry->f_refcnt = file_count(file); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu fd=%d f_refcnt=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->fd, + __entry->f_refcnt) +); + +TRACE_EVENT(dma_buf_put, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(long, f_refcnt) + ), + + TP_fast_assign( + __assign_str(exp_name); + spin_lock(&dmabuf->name_lock); + __assign_str(name); + spin_unlock(&dmabuf->name_lock); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->f_refcnt = file_count(dmabuf->file); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu f_refcnt=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->f_refcnt) +); + +#endif /* _TRACE_DMA_BUF_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> -- 2.34.1

5 months, 2 weeks

[PATCH] dma-buf: add some tracepoints to debug.

by Xiang Gao

From: gaoxiang17 <gaoxiang17(a)xiaomi.com> With these tracepoints, we can track dmabuf in real time. For example: binder:3025_3-10524 [000] ..... 553.310313: dma_buf_export: exp_name=qcom,system name=(null) size=12771328 ino=2799 binder:3025_3-10524 [000] ..... 553.310318: dma_buf_fd: exp_name=qcom,system name=(null) size=12771328 ino=2799 fd=8 RenderThread-9307 [000] ..... 553.310869: dma_buf_get: exp_name=qcom,system name=blastBufferQueue for scaleUpDow size=12771328 ino=2799 fd=673 f_ref=4 RenderThread-9307 [000] ..... 553.310871: dma_buf_attach: dev_name=kgsl-3d0 exp_name=qcom,system name=blastBufferQueue for scaleUpDow size=12771328 ino=2799 RenderThread-9307 [000] ..... 553.310946: dma_buf_mmap_internal: exp_name=qcom,system name=blastBufferQueue for scaleUpDow size=12771328 ino=2799 RenderThread-9307 [004] ..... 553.315084: dma_buf_detach: exp_name=qcom,system name=blastBufferQueue for scaleUpDow size=12771328 ino=2799 RenderThread-9307 [004] ..... 553.315084: dma_buf_put: exp_name=qcom,system name=blastBufferQueue for scaleUpDow size=12771328 ino=2799 f_ref=5 Signed-off-by: gaoxiang17 <gaoxiang17(a)xiaomi.com> --- drivers/dma-buf/dma-buf.c | 19 +++ include/trace/events/dma_buf.h | 245 +++++++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 include/trace/events/dma_buf.h diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 2bcf9ceca997..8b5af73f0218 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -35,6 +35,9 @@ #include "dma-buf-sysfs-stats.h" +#define CREATE_TRACE_POINTS +#include <trace/events/dma_buf.h> + static inline int is_dma_buf_file(struct file *); static DEFINE_MUTEX(dmabuf_list_mutex); @@ -220,6 +223,8 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma) dmabuf->size >> PAGE_SHIFT) return -EINVAL; + trace_dma_buf_mmap_internal(dmabuf); + return dmabuf->ops->mmap(dmabuf, vma); } @@ -745,6 +750,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) __dma_buf_list_add(dmabuf); + trace_dma_buf_export(dmabuf); + return dmabuf; err_dmabuf: @@ -779,6 +786,8 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags) fd_install(fd, dmabuf->file); + trace_dma_buf_fd(dmabuf, fd); + return fd; } EXPORT_SYMBOL_NS_GPL(dma_buf_fd, "DMA_BUF"); @@ -805,6 +814,8 @@ struct dma_buf *dma_buf_get(int fd) return ERR_PTR(-EINVAL); } + trace_dma_buf_get(fd, file); + return file->private_data; } EXPORT_SYMBOL_NS_GPL(dma_buf_get, "DMA_BUF"); @@ -825,6 +836,8 @@ void dma_buf_put(struct dma_buf *dmabuf) return; fput(dmabuf->file); + + trace_dma_buf_put(dmabuf); } EXPORT_SYMBOL_NS_GPL(dma_buf_put, "DMA_BUF"); @@ -998,6 +1011,8 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, "DMA_BUF"); struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev) { + trace_dma_buf_attach(dmabuf, dev); + return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL); } EXPORT_SYMBOL_NS_GPL(dma_buf_attach, "DMA_BUF"); @@ -1024,6 +1039,8 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) dmabuf->ops->detach(dmabuf, attach); kfree(attach); + + trace_dma_buf_detach(dmabuf); } EXPORT_SYMBOL_NS_GPL(dma_buf_detach, "DMA_BUF"); @@ -1488,6 +1505,8 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, vma_set_file(vma, dmabuf->file); vma->vm_pgoff = pgoff; + trace_dma_buf_mmap(dmabuf); + return dmabuf->ops->mmap(dmabuf, vma); } EXPORT_SYMBOL_NS_GPL(dma_buf_mmap, "DMA_BUF"); diff --git a/include/trace/events/dma_buf.h b/include/trace/events/dma_buf.h new file mode 100644 index 000000000000..796ae444f6ae --- /dev/null +++ b/include/trace/events/dma_buf.h @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dma_buf + +#if !defined(_TRACE_DMA_BUF_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DMA_BUF_H + +#include <linux/dma-buf.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(dma_buf_export, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + ), + + TP_fast_assign( + __assign_str(exp_name); + __assign_str(name); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino) +); + +TRACE_EVENT(dma_buf_fd, + + TP_PROTO(struct dma_buf *dmabuf, int fd), + + TP_ARGS(dmabuf, fd), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(int, fd) + ), + + TP_fast_assign( + __assign_str(exp_name); + __assign_str(name); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->fd = fd; + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu fd=%d", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->fd) +); + +TRACE_EVENT(dma_buf_mmap_internal, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + ), + + TP_fast_assign( + __assign_str(exp_name); + __assign_str(name); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino) +); + +TRACE_EVENT(dma_buf_mmap, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + ), + + TP_fast_assign( + __assign_str(exp_name); + __assign_str(name); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino) +); + +TRACE_EVENT(dma_buf_attach, + + TP_PROTO(struct dma_buf *dmabuf, struct device *dev), + + TP_ARGS(dmabuf, dev), + + TP_STRUCT__entry( + __string(dname, dev_name(dev)) + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + ), + + TP_fast_assign( + __assign_str(dname); + __assign_str(exp_name); + __assign_str(name); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + ), + + TP_printk("dev_name=%s exp_name=%s name=%s size=%zu ino=%lu", + __get_str(dname), + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino) +); + +TRACE_EVENT(dma_buf_detach, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + ), + + TP_fast_assign( + __assign_str(exp_name); + __assign_str(name); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino) +); + +TRACE_EVENT(dma_buf_get, + + TP_PROTO(int fd, struct file *file), + + TP_ARGS(fd, file), + + TP_STRUCT__entry( + __string(exp_name, ((struct dma_buf *)file->private_data)->exp_name) + __string(name, ((struct dma_buf *)file->private_data)->name) + __field(size_t, size) + __field(ino_t, ino) + __field(int, fd) + __field(long, f_ref) + ), + + TP_fast_assign( + __assign_str(exp_name); + __assign_str(name); + __entry->size = ((struct dma_buf *)file->private_data)->size; + __entry->ino = ((struct dma_buf *)file->private_data)->file->f_inode->i_ino; + __entry->fd = fd; + __entry->f_ref = file_ref_get(&file->f_ref); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu fd=%d f_ref=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->fd, + __entry->f_ref) +); + +TRACE_EVENT(dma_buf_put, + + TP_PROTO(struct dma_buf *dmabuf), + + TP_ARGS(dmabuf), + + TP_STRUCT__entry( + __string(exp_name, dmabuf->exp_name) + __string(name, dmabuf->name) + __field(size_t, size) + __field(ino_t, ino) + __field(long, f_ref) + ), + + TP_fast_assign( + __assign_str(exp_name); + __assign_str(name); + __entry->size = dmabuf->size; + __entry->ino = dmabuf->file->f_inode->i_ino; + __entry->f_ref = file_ref_get(&dmabuf->file->f_ref); + ), + + TP_printk("exp_name=%s name=%s size=%zu ino=%lu f_ref=%ld", + __get_str(exp_name), + __get_str(name), + __entry->size, + __entry->ino, + __entry->f_ref) +); + +#endif /* _TRACE_DMA_BUF_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> -- 2.34.1

5 months, 2 weeks

[RFC v2 11/11] io_uring/rsrc: implement dmabuf regbuf import

by asml.silence＠gmail.com

The content of this message was lost. It was probably cross-posted to multiple lists and previously handled on another list.

5 months, 3 weeks

[RFC v2 09/11] io_uring/rsrc: extended reg buffer registration

by asml.silence＠gmail.com

The content of this message was lost. It was probably cross-posted to multiple lists and previously handled on another list.

5 months, 3 weeks

Jump to page:

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

Linaro-mm-sig