[Linaro-mm-sig] Re: [PATCH v3 5/6] dma-buf: heaps: Add Coherent heap to dmabuf heaps

11 Mar 2026

On 3/11/26 5:19 AM, Albert Esteve wrote:
...
On Tue, Mar 10, 2026 at 4:34 PM Andrew Davis afd@ti.com wrote:
...
On 3/6/26 4:36 AM, Albert Esteve wrote:
...
Expose DT coherent reserved-memory pools ("shared-dma-pool"
without "reusable") as dma-buf heaps, creating one heap per
region so userspace can allocate from the exact device-local
pool intended for coherent DMA.
This is a missing backend in the long-term effort to steer
userspace buffer allocations (DRM, v4l2, dma-buf heaps)
through heaps for clearer cgroup accounting. CMA and system
heaps already exist; non-reusable coherent reserved memory
did not.
The heap binds the heap device to each memory region so
coherent allocations use the correct dev->dma_mem, and
it defers registration until module_init when normal
allocators are available.
Signed-off-by: Albert Esteve aesteve@redhat.com
drivers/dma-buf/heaps/Kconfig         |   9 +
   drivers/dma-buf/heaps/Makefile        |   1 +
   drivers/dma-buf/heaps/coherent_heap.c | 414 ++++++++++++++++++++++++++++++++++
   3 files changed, 424 insertions(+)

diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
index a5eef06c42264..aeb475e585048 100644
--- a/drivers/dma-buf/heaps/Kconfig
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -12,3 +12,12 @@ config DMABUF_HEAPS_CMA
         Choose this option to enable dma-buf CMA heap. This heap is backed
         by the Contiguous Memory Allocator (CMA). If your system has these
         regions, you should say Y here.



+config DMABUF_HEAPS_COHERENT

bool "DMA-BUF Coherent Reserved-Memory Heap"


depends on DMABUF_HEAPS && OF_RESERVED_MEM && DMA_DECLARE_COHERENT


help


  Choose this option to enable coherent reserved-memory dma-buf heaps.


  This heap is backed by non-reusable DT "shared-dma-pool" regions.


  If your system defines coherent reserved-memory regions, you should


  say Y here.



diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile
index 974467791032f..96bda7a65f041 100644
--- a/drivers/dma-buf/heaps/Makefile
+++ b/drivers/dma-buf/heaps/Makefile
@@ -1,3 +1,4 @@
   # SPDX-License-Identifier: GPL-2.0
   obj-$(CONFIG_DMABUF_HEAPS_SYSTEM)   += system_heap.o
   obj-$(CONFIG_DMABUF_HEAPS_CMA)              += cma_heap.o
+obj-$(CONFIG_DMABUF_HEAPS_COHERENT)  += coherent_heap.o
diff --git a/drivers/dma-buf/heaps/coherent_heap.c b/drivers/dma-buf/heaps/coherent_heap.c
new file mode 100644
index 0000000000000..55f53f87c4c15
--- /dev/null
+++ b/drivers/dma-buf/heaps/coherent_heap.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+/*


DMABUF heap for coherent reserved-memory regions







Copyright (C) 2026 Red Hat, Inc.



Author: Albert Esteve aesteve@redhat.com






*/


+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/dma-map-ops.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/iosys-map.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>



+struct coherent_heap {

struct dma_heap *heap;


struct reserved_mem *rmem;


char *name;



+};



+struct coherent_heap_buffer {

struct coherent_heap *heap;


struct list_head attachments;


struct mutex lock;


unsigned long len;


dma_addr_t dma_addr;


void *alloc_vaddr;


struct page **pages;


pgoff_t pagecount;


int vmap_cnt;


void *vaddr;



+};



+struct dma_heap_attachment {

struct device *dev;


struct sg_table table;


struct list_head list;


bool mapped;



+};



+static int coherent_heap_attach(struct dma_buf *dmabuf,

                        struct dma_buf_attachment *attachment)



+{

struct coherent_heap_buffer *buffer = dmabuf->priv;


struct dma_heap_attachment *a;


int ret;



a = kzalloc_obj(*a);


if (!a)


        return -ENOMEM;



ret = sg_alloc_table_from_pages(&a->table, buffer->pages,


                                buffer->pagecount, 0,


                                buffer->pagecount << PAGE_SHIFT,


                                GFP_KERNEL);


if (ret) {


        kfree(a);


        return ret;


}



a->dev = attachment->dev;


INIT_LIST_HEAD(&a->list);


a->mapped = false;



attachment->priv = a;



mutex_lock(&buffer->lock);


list_add(&a->list, &buffer->attachments);


mutex_unlock(&buffer->lock);



return 0;



+}



+static void coherent_heap_detach(struct dma_buf *dmabuf,

                         struct dma_buf_attachment *attachment)



+{

struct coherent_heap_buffer *buffer = dmabuf->priv;


struct dma_heap_attachment *a = attachment->priv;



mutex_lock(&buffer->lock);


list_del(&a->list);


mutex_unlock(&buffer->lock);



sg_free_table(&a->table);


kfree(a);



+}



+static struct sg_table *coherent_heap_map_dma_buf(struct dma_buf_attachment *attachment,

                                          enum dma_data_direction direction)



+{

struct dma_heap_attachment *a = attachment->priv;


struct sg_table *table = &a->table;


int ret;



ret = dma_map_sgtable(attachment->dev, table, direction, 0);


if (ret)


        return ERR_PTR(-ENOMEM);


a->mapped = true;



return table;



+}



+static void coherent_heap_unmap_dma_buf(struct dma_buf_attachment *attachment,

                                struct sg_table *table,


                                enum dma_data_direction direction)



+{

struct dma_heap_attachment *a = attachment->priv;



a->mapped = false;


dma_unmap_sgtable(attachment->dev, table, direction, 0);



+}



+static int coherent_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,

                                          enum dma_data_direction direction)



+{

struct coherent_heap_buffer *buffer = dmabuf->priv;


struct dma_heap_attachment *a;



mutex_lock(&buffer->lock);


if (buffer->vmap_cnt)


        invalidate_kernel_vmap_range(buffer->vaddr, buffer->len);



list_for_each_entry(a, &buffer->attachments, list) {


        if (!a->mapped)


                continue;


        dma_sync_sgtable_for_cpu(a->dev, &a->table, direction);


}


mutex_unlock(&buffer->lock);



return 0;



+}



+static int coherent_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,

                                        enum dma_data_direction direction)



+{

struct coherent_heap_buffer *buffer = dmabuf->priv;


struct dma_heap_attachment *a;



mutex_lock(&buffer->lock);


if (buffer->vmap_cnt)


        flush_kernel_vmap_range(buffer->vaddr, buffer->len);



list_for_each_entry(a, &buffer->attachments, list) {


        if (!a->mapped)


                continue;


        dma_sync_sgtable_for_device(a->dev, &a->table, direction);


}


mutex_unlock(&buffer->lock);



return 0;



+}



+static int coherent_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{

struct coherent_heap_buffer *buffer = dmabuf->priv;


struct coherent_heap *coh_heap = buffer->heap;


struct device *heap_dev = dma_heap_get_dev(coh_heap->heap);



return dma_mmap_coherent(heap_dev, vma, buffer->alloc_vaddr,


                         buffer->dma_addr, buffer->len);



+}



+static void *coherent_heap_do_vmap(struct coherent_heap_buffer *buffer)
+{

void *vaddr;



vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, PAGE_KERNEL);


if (!vaddr)


        return ERR_PTR(-ENOMEM);



return vaddr;



+}



+static int coherent_heap_vmap(struct dma_buf *dmabuf, struct iosys_map *map)
+{

struct coherent_heap_buffer *buffer = dmabuf->priv;


void *vaddr;


int ret = 0;



mutex_lock(&buffer->lock);


if (buffer->vmap_cnt) {


        buffer->vmap_cnt++;


        iosys_map_set_vaddr(map, buffer->vaddr);


        goto out;


}



vaddr = coherent_heap_do_vmap(buffer);


if (IS_ERR(vaddr)) {


        ret = PTR_ERR(vaddr);


        goto out;


}



buffer->vaddr = vaddr;


buffer->vmap_cnt++;


iosys_map_set_vaddr(map, buffer->vaddr);



+out:

mutex_unlock(&buffer->lock);



return ret;



+}



+static void coherent_heap_vunmap(struct dma_buf *dmabuf, struct iosys_map *map)
+{

struct coherent_heap_buffer *buffer = dmabuf->priv;



mutex_lock(&buffer->lock);


if (!--buffer->vmap_cnt) {


        vunmap(buffer->vaddr);


        buffer->vaddr = NULL;


}


mutex_unlock(&buffer->lock);


iosys_map_clear(map);



+}



+static void coherent_heap_dma_buf_release(struct dma_buf *dmabuf)
+{

struct coherent_heap_buffer *buffer = dmabuf->priv;


struct coherent_heap *coh_heap = buffer->heap;


struct device *heap_dev = dma_heap_get_dev(coh_heap->heap);



if (buffer->vmap_cnt > 0) {


        WARN(1, "%s: buffer still mapped in the kernel\n", __func__);


        vunmap(buffer->vaddr);


        buffer->vaddr = NULL;


        buffer->vmap_cnt = 0;


}



if (buffer->alloc_vaddr)


        dma_free_coherent(heap_dev, buffer->len, buffer->alloc_vaddr,


                          buffer->dma_addr);


kfree(buffer->pages);


kfree(buffer);



+}



+static const struct dma_buf_ops coherent_heap_buf_ops = {

.attach = coherent_heap_attach,


.detach = coherent_heap_detach,


.map_dma_buf = coherent_heap_map_dma_buf,


.unmap_dma_buf = coherent_heap_unmap_dma_buf,


.begin_cpu_access = coherent_heap_dma_buf_begin_cpu_access,


.end_cpu_access = coherent_heap_dma_buf_end_cpu_access,


.mmap = coherent_heap_mmap,


.vmap = coherent_heap_vmap,


.vunmap = coherent_heap_vunmap,


.release = coherent_heap_dma_buf_release,



+};



+static struct dma_buf *coherent_heap_allocate(struct dma_heap *heap,

                                      unsigned long len,


                                      u32 fd_flags,


                                      u64 heap_flags)



+{

struct coherent_heap *coh_heap;


struct coherent_heap_buffer *buffer;


struct device *heap_dev;


DEFINE_DMA_BUF_EXPORT_INFO(exp_info);


size_t size = PAGE_ALIGN(len);


pgoff_t pagecount = size >> PAGE_SHIFT;


struct dma_buf *dmabuf;


int ret = -ENOMEM;


pgoff_t pg;



coh_heap = dma_heap_get_drvdata(heap);


if (!coh_heap)


        return ERR_PTR(-EINVAL);



heap_dev = dma_heap_get_dev(coh_heap->heap);


if (!heap_dev)


        return ERR_PTR(-ENODEV);



buffer = kzalloc_obj(*buffer);


if (!buffer)


        return ERR_PTR(-ENOMEM);



INIT_LIST_HEAD(&buffer->attachments);


mutex_init(&buffer->lock);


buffer->len = size;


buffer->heap = coh_heap;


buffer->pagecount = pagecount;



buffer->alloc_vaddr = dma_alloc_coherent(heap_dev, buffer->len,


                                         &buffer->dma_addr, GFP_KERNEL);



You are doing this DMA allocation using a non-DMA pseudo-device (heap_dev).
This is why you need to do that dma_coerce_mask_and_coherent(64) nonsense, you
are doing a DMA alloc for the CPU itself. This might still work, but only if
dma_map_sgtable() can handle swiotlb/iommu for all attaching devices at map
time.
The concern is valid. We're allocating via a synthetic device, which
ties the allocation to that device's DMA domain. I looked deeper into
this trying to address the concern.
The approach works because dma_map_sgtable() handles both
dma_map_direct and use_dma_iommu cases in __dma_map_sg_attrs(). For
each physical address in the sg_table (extracted via sg_phys()), it
creates device-specific DMA mappings:

For direct mapping: it checks if the address is directly accessible

(dma_capable()), and if not, it falls back to swiotlb.

For IOMMU: it creates mappings that allow the device to access

physical addresses.
This means every attached device gets its own device-specific DMA
mapping, properly handling cases where the physical addresses are
inaccessible or have DMA constraints.
While this means it might still "work" it won't always be ideal. Take
the case where the consuming device(s) have a 32bit address restriction,
if the allocation was done using the real devices then the backing buffer
itself would be allocated in <32bit mem. Whereas here the allocation
could end up in >32bit mem, as the CPU/synthetic device supports that.
Then each mapping device would instead get a bounce buffer.
(this example might not be great as we usually know the address of
carveout/reserved memory regions, but substitute in whatever restriction
makes more sense)
These non-reusable carveouts tend to be made for some specific device, and
they are made specifically because that device has some memory restriction.
So we might run into the situation above more than one would expect.
Not a blocker here, but just something worth thinking on.
...
I'm not sure whether other approaches (whatever they may be) would be
better, as here we are leveraging a great part of the existing
infrastructure.
...
...

if (!buffer->alloc_vaddr) {


        ret = -ENOMEM;


        goto free_buffer;


}



buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages),


                              GFP_KERNEL);


if (!buffer->pages) {


        ret = -ENOMEM;


        goto free_dma;


}



for (pg = 0; pg < pagecount; pg++)


        buffer->pages[pg] = virt_to_page((char *)buffer->alloc_vaddr +


                                         (pg * PAGE_SIZE));




Is any of this valid if the coherent pool in DT was marked "no-map;"?
I'm sure the .mmap and .cpu_access function are not valid in that case.
Our (TI) evil vendor tree version of this heap sets a flag in that case and
avoids doing anything invalid when the region doesn't have normal backing
page structs. This region is treated more like a P2PDMA area in that case.
https://git.ti.com/cgit/ti-linux-kernel/ti-linux-kernel/tree/drivers/dma-buf...
I completely missed the "no-map" case. Thanks for the review and the
link! I will address this in the next version, using a logic similar
to the one from the linked driver.
Do take note that the linked driver is only part of an evil vendor tree,
I do things in that driver that are not correct and would not fly upstream.
For "no-map" I chose to make un-cached mappings for the CPU. This allowed for
kernel/userspace access without changing cacheability (which can't be done
safely on ARM). The issue is that "no-map" really should mean DO NOT MAP.
It might be these carveouts are firewalled or have some other side effect
that prevent *any* mapping from CPU. The safer thing to do would be to
simply not allow CPU mappings (vmap/mmap) if "no-map" is set.
Andrew
...
BR,
Albert.
...
Andrew
...

/* create the dmabuf */


exp_info.exp_name = dma_heap_get_name(heap);


exp_info.ops = &coherent_heap_buf_ops;


exp_info.size = buffer->len;


exp_info.flags = fd_flags;


exp_info.priv = buffer;


dmabuf = dma_buf_export(&exp_info);


if (IS_ERR(dmabuf)) {


        ret = PTR_ERR(dmabuf);


        goto free_pages;


}


return dmabuf;




+free_pages:

kfree(buffer->pages);



+free_dma:

dma_free_coherent(heap_dev, buffer->len, buffer->alloc_vaddr,


                  buffer->dma_addr);



+free_buffer:

kfree(buffer);


return ERR_PTR(ret);



+}



+static const struct dma_heap_ops coherent_heap_ops = {

.allocate = coherent_heap_allocate,



+};



+static int __coherent_heap_register(struct reserved_mem *rmem)
+{

struct dma_heap_export_info exp_info;


struct coherent_heap *coh_heap;


struct device *heap_dev;


int ret;



if (!rmem || !rmem->name)


        return -EINVAL;



coh_heap = kzalloc_obj(*coh_heap);


if (!coh_heap)


        return -ENOMEM;



coh_heap->rmem = rmem;


coh_heap->name = kstrdup(rmem->name, GFP_KERNEL);


if (!coh_heap->name) {


        ret = -ENOMEM;


        goto free_coherent_heap;


}



exp_info.name = coh_heap->name;


exp_info.ops = &coherent_heap_ops;


exp_info.priv = coh_heap;



coh_heap->heap = dma_heap_create(&exp_info);


if (IS_ERR(coh_heap->heap)) {


        ret = PTR_ERR(coh_heap->heap);


        goto free_name;


}



heap_dev = dma_heap_get_dev(coh_heap->heap);


ret = dma_coerce_mask_and_coherent(heap_dev, DMA_BIT_MASK(64));


if (ret) {


        pr_err("coherent_heap: failed to set DMA mask (%d)\n", ret);


        goto destroy_heap;


}



ret = of_reserved_mem_device_init_with_mem(heap_dev, rmem);


if (ret) {


        pr_err("coherent_heap: failed to initialize memory (%d)\n", ret);


        goto destroy_heap;


}



ret = dma_heap_register(coh_heap->heap);


if (ret) {


        pr_err("coherent_heap: failed to register heap (%d)\n", ret);


        goto destroy_heap;


}



return 0;




+destroy_heap:

dma_heap_destroy(coh_heap->heap);


coh_heap->heap = NULL;



+free_name:

kfree(coh_heap->name);



+free_coherent_heap:

kfree(coh_heap);



return ret;



+}



+static int __init coherent_heap_register(void)
+{

struct reserved_mem *rmem;


unsigned int i;


int ret;



for (i = 0; (rmem = dma_coherent_get_reserved_region(i)) != NULL; i++) {


        ret = __coherent_heap_register(rmem);


        if (ret) {


                pr_warn("Failed to add coherent heap %s",


                        rmem->name ? rmem->name : "unknown");


                continue;


        }


}



return 0;



+}
+module_init(coherent_heap_register);
+MODULE_DESCRIPTION("DMA-BUF heap for coherent reserved-memory regions");

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

[Linaro-mm-sig] Re: [PATCH v3 5/6] dma-buf: heaps: Add Coherent heap to dmabuf heaps

Signed-off-by: Albert Esteve aesteve@redhat.com