On embedded platforms a central process often allocates dma-buf memory on behalf of client applications. Without a way to attribute the charge to the requesting client's cgroup, the cost lands on the allocator, making per-cgroup memory limits ineffective for the actual consumers.
Add charge_pid_fd to struct dma_heap_allocation_data. When set to a valid pidfd, DMA_HEAP_IOCTL_ALLOC resolves the target task's memcg and charges the buffer there via mem_cgroup_charge_dmabuf() inside dma_heap_buffer_alloc(). Without charge_pid_fd, and with the mem_accounting module parameter enabled, the buffer is charged to the allocator's own cgroup.
Additionally, commit 3c227be90659 ("dma-buf: system_heap: account for system heap allocation in memcg") adds __GFP_ACCOUNT to system-heap page allocations. Keeping __GFP_ACCOUNT would charge the same pages twice (once to kmem, once to MEMCG_DMABUF), thus remove it and route all accounting through a single MEMCG_DMABUF path.
Usage examples:
1. Central allocator charging to a client at allocation time. The allocator knows the client's PID (e.g., from binder's sender_pid) and uses pidfd to attribute the charge:
pid_t client_pid = txn->sender_pid; int pidfd = pidfd_open(client_pid, 0);
struct dma_heap_allocation_data alloc = { .len = buffer_size, .fd_flags = O_RDWR | O_CLOEXEC, .charge_pid_fd = pidfd, }; ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc); close(pidfd); /* alloc.fd is now charged to client's cgroup */
2. Default allocation (no pidfd, mem_accounting=1). When charge_pid_fd is not set and the mem_accounting module parameter is enabled, the buffer is charged to the allocator's own cgroup:
struct dma_heap_allocation_data alloc = { .len = buffer_size, .fd_flags = O_RDWR | O_CLOEXEC, }; ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc); /* charged to current process's cgroup */
Current limitations:
- Single-owner model: a dma-buf carries one memcg charge regardless of how many processes share it. Means only the first owner (and exporter) of the shared buffer bears the charge. - Only memcg accounting supported. While this makes sense for system heap buffers, other heaps (e.g., CMA heaps) will require selectively charging also for the dmem controller.
Signed-off-by: Albert Esteve aesteve@redhat.com --- Documentation/admin-guide/cgroup-v2.rst | 5 ++-- drivers/dma-buf/dma-buf.c | 16 ++++--------- drivers/dma-buf/dma-heap.c | 42 ++++++++++++++++++++++++++++++--- drivers/dma-buf/heaps/system_heap.c | 2 -- include/uapi/linux/dma-heap.h | 6 +++++ 5 files changed, 53 insertions(+), 18 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 8bdbc2e866430..824d269531eb1 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1636,8 +1636,9 @@ The following nested keys are defined. structures.
dmabuf (npn) - Amount of memory used for exported DMA buffers allocated by the cgroup. - Stays with the allocating cgroup regardless of how the buffer is shared. + Amount of memory used for exported DMA buffers allocated by or on + behalf of the cgroup. Stays with the allocating cgroup regardless + of how the buffer is shared.
workingset_refault_anon Number of refaults of previously evicted anonymous pages. diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index ce02377f48908..23fb758b78297 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -181,8 +181,11 @@ static void dma_buf_release(struct dentry *dentry) */ BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
- mem_cgroup_uncharge_dmabuf(dmabuf->memcg, PAGE_ALIGN(dmabuf->size) / PAGE_SIZE); - mem_cgroup_put(dmabuf->memcg); + if (dmabuf->memcg) { + mem_cgroup_uncharge_dmabuf(dmabuf->memcg, + PAGE_ALIGN(dmabuf->size) / PAGE_SIZE); + mem_cgroup_put(dmabuf->memcg); + }
dmabuf->ops->release(dmabuf);
@@ -764,13 +767,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) dmabuf->resv = resv; }
- dmabuf->memcg = get_mem_cgroup_from_mm(current->mm); - if (!mem_cgroup_charge_dmabuf(dmabuf->memcg, PAGE_ALIGN(dmabuf->size) / PAGE_SIZE, - GFP_KERNEL)) { - ret = -ENOMEM; - goto err_memcg; - } - file->private_data = dmabuf; file->f_path.dentry->d_fsdata = dmabuf; dmabuf->file = file; @@ -781,8 +777,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
return dmabuf;
-err_memcg: - mem_cgroup_put(dmabuf->memcg); err_file: fput(file); err_module: diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index ac5f8685a6494..ff6e259afcdc0 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -7,13 +7,17 @@ */
#include <linux/cdev.h> +#include <linux/cgroup.h> #include <linux/device.h> #include <linux/dma-buf.h> #include <linux/dma-heap.h> +#include <linux/memcontrol.h> +#include <linux/sched/mm.h> #include <linux/err.h> #include <linux/export.h> #include <linux/list.h> #include <linux/nospec.h> +#include <linux/pidfd.h> #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/xarray.h> @@ -55,10 +59,12 @@ MODULE_PARM_DESC(mem_accounting, "Enable cgroup-based memory accounting for dma-buf heap allocations (default=false).");
static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, - u32 fd_flags, - u64 heap_flags) + u32 fd_flags, u64 heap_flags, + struct mem_cgroup *charge_to) { struct dma_buf *dmabuf; + unsigned int nr_pages; + struct mem_cgroup *memcg = charge_to; int fd;
/* @@ -73,6 +79,22 @@ static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf);
+ nr_pages = len / PAGE_SIZE; + + if (memcg) + css_get(&memcg->css); + else if (mem_accounting) + memcg = get_mem_cgroup_from_mm(current->mm); + + if (memcg) { + if (!mem_cgroup_charge_dmabuf(memcg, nr_pages, GFP_KERNEL)) { + mem_cgroup_put(memcg); + dma_buf_put(dmabuf); + return -ENOMEM; + } + dmabuf->memcg = memcg; + } + fd = dma_buf_fd(dmabuf, fd_flags); if (fd < 0) { dma_buf_put(dmabuf); @@ -102,6 +124,9 @@ static long dma_heap_ioctl_allocate(struct file *file, void *data) { struct dma_heap_allocation_data *heap_allocation = data; struct dma_heap *heap = file->private_data; + struct mem_cgroup *memcg = NULL; + struct task_struct *task; + unsigned int pidfd_flags; int fd;
if (heap_allocation->fd) @@ -113,9 +138,20 @@ static long dma_heap_ioctl_allocate(struct file *file, void *data) if (heap_allocation->heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS) return -EINVAL;
+ if (heap_allocation->charge_pid_fd) { + task = pidfd_get_task(heap_allocation->charge_pid_fd, &pidfd_flags); + if (IS_ERR(task)) + return PTR_ERR(task); + + memcg = get_mem_cgroup_from_mm(task->mm); + put_task_struct(task); + } + fd = dma_heap_buffer_alloc(heap, heap_allocation->len, heap_allocation->fd_flags, - heap_allocation->heap_flags); + heap_allocation->heap_flags, + memcg); + mem_cgroup_put(memcg); if (fd < 0) return fd;
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 03c2b87cb1112..95d7688167b93 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -385,8 +385,6 @@ static struct page *alloc_largest_available(unsigned long size, if (max_order < orders[i]) continue; flags = order_flags[i]; - if (mem_accounting) - flags |= __GFP_ACCOUNT; page = alloc_pages(flags, orders[i]); if (!page) continue; diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h index a4cf716a49fa6..e02b0f8cbc6a1 100644 --- a/include/uapi/linux/dma-heap.h +++ b/include/uapi/linux/dma-heap.h @@ -29,6 +29,10 @@ * handle to the allocated dma-buf * @fd_flags: file descriptor flags used when allocating * @heap_flags: flags passed to heap + * @charge_pid_fd: optional pidfd of the process whose cgroup should be + * charged for this allocation; 0 means charge the calling + * process's cgroup + * @__padding: reserved, must be zero * * Provided by userspace as an argument to the ioctl */ @@ -37,6 +41,8 @@ struct dma_heap_allocation_data { __u32 fd; __u32 fd_flags; __u64 heap_flags; + __u32 charge_pid_fd; + __u32 __padding; };
#define DMA_HEAP_IOC_MAGIC 'H'