Hello all,
This series makes it so the udmabuf will sync the backing buffer with the set of attached devices as required for DMA-BUFs when doing {begin,end}_cpu_access.
Thanks Andrew
Changes for v2: - fix attachment table use-after-free - rebased on v6.17-rc1
Andrew Davis (3): udmabuf: Keep track current device mappings udmabuf: Sync buffer mappings for attached devices udmabuf: Use module_misc_device() to register this device
drivers/dma-buf/udmabuf.c | 133 +++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 66 deletions(-)
When a device attaches to and maps our buffer we need to keep track of this mapping/device. This is needed for synchronization with these devices when beginning and ending CPU access for instance. Add a list that tracks device mappings as part of {map,unmap}_udmabuf().
Signed-off-by: Andrew Davis afd@ti.com --- drivers/dma-buf/udmabuf.c | 45 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-)
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index 40399c26e6be6..cc5c1cc42c7f2 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -42,6 +42,14 @@ struct udmabuf { struct sg_table *sg; struct miscdevice *device; pgoff_t *offsets; + struct list_head attachments; + struct mutex lock; /* for attachments list */ +}; + +struct udmabuf_attachment { + struct device *dev; + struct sg_table *table; + struct list_head list; };
static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) @@ -185,14 +193,44 @@ static void put_sg_table(struct device *dev, struct sg_table *sg, static struct sg_table *map_udmabuf(struct dma_buf_attachment *at, enum dma_data_direction direction) { - return get_sg_table(at->dev, at->dmabuf, direction); + struct udmabuf *ubuf = at->dmabuf->priv; + struct udmabuf_attachment *a; + struct sg_table *table; + + a = kzalloc(sizeof(*a), GFP_KERNEL); + if (!a) + return ERR_PTR(-ENOMEM); + + table = get_sg_table(at->dev, at->dmabuf, direction); + if (IS_ERR(table)) { + kfree(a); + return table; + } + + a->dev = at->dev; + a->table = table; + + mutex_lock(&ubuf->lock); + list_add(&a->list, &ubuf->attachments); + mutex_unlock(&ubuf->lock); + + return a->table; }
static void unmap_udmabuf(struct dma_buf_attachment *at, struct sg_table *sg, enum dma_data_direction direction) { - return put_sg_table(at->dev, sg, direction); + struct udmabuf_attachment *a = at->priv; + struct udmabuf *ubuf = at->dmabuf->priv; + + mutex_lock(&ubuf->lock); + list_del(&a->list); + mutex_unlock(&ubuf->lock); + + put_sg_table(at->dev, sg, direction); + + kfree(a); }
static void unpin_all_folios(struct udmabuf *ubuf) @@ -384,6 +422,9 @@ static long udmabuf_create(struct miscdevice *device, if (!ubuf) return -ENOMEM;
+ INIT_LIST_HEAD(&ubuf->attachments); + mutex_init(&ubuf->lock); + pglimit = ((u64)size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; for (i = 0; i < head->count; i++) { pgoff_t subpgcnt;
Currently this driver creates a SGT table using the CPU as the target device, then performs the dma_sync operations against that SGT. This is backwards to how DMA-BUFs are supposed to behave. This may have worked for the case where these buffers were given only back to the same CPU that produced them as in the QEMU case. And only then because the original author had the dma_sync operations also backwards, syncing for the "device" on begin_cpu. This was noticed and "fixed" in this patch[0].
That then meant we were sync'ing from the CPU to the CPU using a pseudo-device "miscdevice". Which then caused another issue due to the miscdevice not having a proper DMA mask (and why should it, the CPU is not a DMA device). The fix for that was an even more egregious hack[1] that declares the CPU is coherent with itself and can access its own memory space..
Unwind all this and perform the correct action by doing the dma_sync operations for each device currently attached to the backing buffer.
[0] commit 1ffe09590121 ("udmabuf: fix dma-buf cpu access") [1] commit 9e9fa6a9198b ("udmabuf: Set the DMA mask for the udmabuf device (v2)")
Signed-off-by: Andrew Davis afd@ti.com --- drivers/dma-buf/udmabuf.c | 64 +++++++++++++-------------------------- 1 file changed, 21 insertions(+), 43 deletions(-)
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index cc5c1cc42c7f2..8d71c3d72eb5e 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -39,8 +39,6 @@ struct udmabuf { pgoff_t nr_pinned; struct folio **pinned_folios;
- struct sg_table *sg; - struct miscdevice *device; pgoff_t *offsets; struct list_head attachments; struct mutex lock; /* for attachments list */ @@ -272,10 +270,6 @@ static __always_inline void deinit_udmabuf(struct udmabuf *ubuf) static void release_udmabuf(struct dma_buf *buf) { struct udmabuf *ubuf = buf->priv; - struct device *dev = ubuf->device->this_device; - - if (ubuf->sg) - put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
deinit_udmabuf(ubuf); kfree(ubuf); @@ -285,32 +279,27 @@ static int begin_cpu_udmabuf(struct dma_buf *buf, enum dma_data_direction direction) { struct udmabuf *ubuf = buf->priv; - struct device *dev = ubuf->device->this_device; - int ret = 0; - - if (!ubuf->sg) { - ubuf->sg = get_sg_table(dev, buf, direction); - if (IS_ERR(ubuf->sg)) { - ret = PTR_ERR(ubuf->sg); - ubuf->sg = NULL; - } - } else { - dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction); - } + struct udmabuf_attachment *a;
- return ret; + guard(mutex)(&ubuf->lock); + + list_for_each_entry(a, &ubuf->attachments, list) + dma_sync_sgtable_for_cpu(a->dev, a->table, direction); + + return 0; }
static int end_cpu_udmabuf(struct dma_buf *buf, enum dma_data_direction direction) { struct udmabuf *ubuf = buf->priv; - struct device *dev = ubuf->device->this_device; + struct udmabuf_attachment *a;
- if (!ubuf->sg) - return -EINVAL; + guard(mutex)(&ubuf->lock); + + list_for_each_entry(a, &ubuf->attachments, list) + dma_sync_sgtable_for_device(a->dev, a->table, direction);
- dma_sync_sgtable_for_device(dev, ubuf->sg, direction); return 0; }
@@ -346,12 +335,10 @@ static int check_memfd_seals(struct file *memfd) return 0; }
-static struct dma_buf *export_udmabuf(struct udmabuf *ubuf, - struct miscdevice *device) +static struct dma_buf *export_udmabuf(struct udmabuf *ubuf) { DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
- ubuf->device = device; exp_info.ops = &udmabuf_ops; exp_info.size = ubuf->pagecount << PAGE_SHIFT; exp_info.priv = ubuf; @@ -406,8 +393,7 @@ static long udmabuf_pin_folios(struct udmabuf *ubuf, struct file *memfd, return 0; }
-static long udmabuf_create(struct miscdevice *device, - struct udmabuf_create_list *head, +static long udmabuf_create(struct udmabuf_create_list *head, struct udmabuf_create_item *list) { unsigned long max_nr_folios = 0; @@ -482,7 +468,7 @@ static long udmabuf_create(struct miscdevice *device, }
flags = head->flags & UDMABUF_FLAGS_CLOEXEC ? O_CLOEXEC : 0; - dmabuf = export_udmabuf(ubuf, device); + dmabuf = export_udmabuf(ubuf); if (IS_ERR(dmabuf)) { ret = PTR_ERR(dmabuf); goto err; @@ -508,7 +494,7 @@ static long udmabuf_create(struct miscdevice *device, return ret; }
-static long udmabuf_ioctl_create(struct file *filp, unsigned long arg) +static long udmabuf_ioctl_create(unsigned long arg) { struct udmabuf_create create; struct udmabuf_create_list head; @@ -524,10 +510,10 @@ static long udmabuf_ioctl_create(struct file *filp, unsigned long arg) list.offset = create.offset; list.size = create.size;
- return udmabuf_create(filp->private_data, &head, &list); + return udmabuf_create(&head, &list); }
-static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg) +static long udmabuf_ioctl_create_list(unsigned long arg) { struct udmabuf_create_list head; struct udmabuf_create_item *list; @@ -543,7 +529,7 @@ static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg) if (IS_ERR(list)) return PTR_ERR(list);
- ret = udmabuf_create(filp->private_data, &head, list); + ret = udmabuf_create(&head, list); kfree(list); return ret; } @@ -555,10 +541,10 @@ static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
switch (ioctl) { case UDMABUF_CREATE: - ret = udmabuf_ioctl_create(filp, arg); + ret = udmabuf_ioctl_create(arg); break; case UDMABUF_CREATE_LIST: - ret = udmabuf_ioctl_create_list(filp, arg); + ret = udmabuf_ioctl_create_list(arg); break; default: ret = -ENOTTY; @@ -591,14 +577,6 @@ static int __init udmabuf_dev_init(void) return ret; }
- ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device, - DMA_BIT_MASK(64)); - if (ret < 0) { - pr_err("Could not setup DMA mask for udmabuf device\n"); - misc_deregister(&udmabuf_misc); - return ret; - } - return 0; }
Now that we do not need to call dma_coerce_mask_and_coherent() on our miscdevice device, use the module_misc_device() helper for registering and module init/exit.
While here, add module description and license, modules built with W=1 warn if built without these.
Signed-off-by: Andrew Davis afd@ti.com Acked-by: Vivek Kasireddy vivek.kasireddy@intel.com --- drivers/dma-buf/udmabuf.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-)
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index 8d71c3d72eb5e..d888e4d667c67 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -566,26 +566,8 @@ static struct miscdevice udmabuf_misc = { .name = "udmabuf", .fops = &udmabuf_fops, }; - -static int __init udmabuf_dev_init(void) -{ - int ret; - - ret = misc_register(&udmabuf_misc); - if (ret < 0) { - pr_err("Could not initialize udmabuf device\n"); - return ret; - } - - return 0; -} - -static void __exit udmabuf_dev_exit(void) -{ - misc_deregister(&udmabuf_misc); -} - -module_init(udmabuf_dev_init) -module_exit(udmabuf_dev_exit) +module_misc_device(udmabuf_misc);
MODULE_AUTHOR("Gerd Hoffmann kraxel@redhat.com"); +MODULE_DESCRIPTION("Userspace memfd to DMA-BUF misc Driver"); +MODULE_LICENSE("GPL");
On 14.08.25 18:10, Andrew Davis wrote:
Hello all,
This series makes it so the udmabuf will sync the backing buffer with the set of attached devices as required for DMA-BUFs when doing {begin,end}_cpu_access.
Yeah the reason why we didn't do that is that this doesn't even work 100% reliable in theory. So this patchset here might make your use case work but is a bit questionable in general.
udmabuf is about turning a file descriptor created by memfd_create() into a DMA-buf. Mapping that memory can happen through the memfd as well and so it is perfectly valid to skip the DMA-buf begin_access and end_access callbacks.
Additional to that when CONFIG_DMABUF_DEBUG is enabled the DMA-buf code mangles the page addresses in the sg table to prevent importers from abusing it. That makes dma_sync_sgtable_for_cpu() and dma_sync_sgtable_for_device() on the exporter side crash.
That's the reason why DMA-buf heaps uses a copy of the sg table for calling dma_sync_sgtable_for_cpu()/dma_sync_sgtable_for_device().
It's basically a hack and should be removed, but for this we need to change all clients which is tons of work.
Regards, Christian.
Thanks Andrew
Changes for v2:
- fix attachment table use-after-free
- rebased on v6.17-rc1
Andrew Davis (3): udmabuf: Keep track current device mappings udmabuf: Sync buffer mappings for attached devices udmabuf: Use module_misc_device() to register this device
drivers/dma-buf/udmabuf.c | 133 +++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 66 deletions(-)
On 8/15/25 4:41 AM, Christian König wrote:
On 14.08.25 18:10, Andrew Davis wrote:
Hello all,
This series makes it so the udmabuf will sync the backing buffer with the set of attached devices as required for DMA-BUFs when doing {begin,end}_cpu_access.
Yeah the reason why we didn't do that is that this doesn't even work 100% reliable in theory. So this patchset here might make your use case work but is a bit questionable in general.
udmabuf is about turning a file descriptor created by memfd_create() into a DMA-buf. Mapping that memory can happen through the memfd as well and so it is perfectly valid to skip the DMA-buf begin_access and end_access callbacks.
If someone maps the memory backed by the DMA-buf outside of the DMA-APIs then we cannot really control that, but in this case if they do map with the DMA-API then it is *not* valid to skip these begin_access and end_access callbacks. And that is the case I am addressing here.
Right now we are not syncing the mapping for any attached device, we just zap it from the CPU caches using some hacky loopback and hope that is enough for the devices :/
Additional to that when CONFIG_DMABUF_DEBUG is enabled the DMA-buf code mangles the page addresses in the sg table to prevent importers from abusing it. That makes dma_sync_sgtable_for_cpu() and dma_sync_sgtable_for_device() on the exporter side crash.
I was not aware of this mangle_sg_table() hack, must have been added while I was not looking :)
Seems very broken TBH, taking a quick look, I see on this line[0] you call it, then just a couple lines later you use that same mangled page_link to walk the SG table[1]..
If anyone enables DMA_API_DEBUG and tried to attach/map a non-contiguous DMA-BUF with a chained sg I don't see how that doesn't crash out.
That's the reason why DMA-buf heaps uses a copy of the sg table for calling dma_sync_sgtable_for_cpu()/dma_sync_sgtable_for_device().
Could you point me to where Heaps uses a copy of the SG table? I see it using the exact same SG table for dma_sync_sgtable_for_*() that we created when mapping the device attachments.
Thanks, Andrew
[0] https://github.com/torvalds/linux/blob/master/drivers/dma-buf/dma-buf.c#L114... [1] https://github.com/torvalds/linux/blob/master/drivers/dma-buf/dma-buf.c#L115...
It's basically a hack and should be removed, but for this we need to change all clients which is tons of work.
Regards, Christian.
Thanks Andrew
Changes for v2:
- fix attachment table use-after-free
- rebased on v6.17-rc1
Andrew Davis (3): udmabuf: Keep track current device mappings udmabuf: Sync buffer mappings for attached devices udmabuf: Use module_misc_device() to register this device
drivers/dma-buf/udmabuf.c | 133 +++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 66 deletions(-)
On 15.08.25 21:40, Andrew Davis wrote:
On 8/15/25 4:41 AM, Christian König wrote:
On 14.08.25 18:10, Andrew Davis wrote:
Hello all,
This series makes it so the udmabuf will sync the backing buffer with the set of attached devices as required for DMA-BUFs when doing {begin,end}_cpu_access.
Yeah the reason why we didn't do that is that this doesn't even work 100% reliable in theory. So this patchset here might make your use case work but is a bit questionable in general.
udmabuf is about turning a file descriptor created by memfd_create() into a DMA-buf. Mapping that memory can happen through the memfd as well and so it is perfectly valid to skip the DMA-buf begin_access and end_access callbacks.
If someone maps the memory backed by the DMA-buf outside of the DMA-APIs then we cannot really control that, but in this case if they do map with the DMA-API then it is *not* valid to skip these begin_access and end_access callbacks. And that is the case I am addressing here.
Good argument, but that needs quite some documentation then. udmabuf.c could use some general documentation anyway.
Right now we are not syncing the mapping for any attached device, we just zap it from the CPU caches using some hacky loopback and hope that is enough for the devices :/
Yeah that is just pretty horrible.
Additional to that when CONFIG_DMABUF_DEBUG is enabled the DMA-buf code mangles the page addresses in the sg table to prevent importers from abusing it. That makes dma_sync_sgtable_for_cpu() and dma_sync_sgtable_for_device() on the exporter side crash.
I was not aware of this mangle_sg_table() hack, must have been added while I was not looking :)
Seems very broken TBH, taking a quick look, I see on this line[0] you call it, then just a couple lines later you use that same mangled page_link to walk the SG table[1]..
sg_next() is skipping over the chain entries, only page entries are mangled, but I completely agree that this is as hackish as it can get.
We just had quite a number of harsh problems and even CVEs because importers didn't got that they absolutely shouldn't touch the underlying page of a mapping.
Allowing userspace to R/W to freed up memory or messing up the page count is not funny at all.
If anyone enables DMA_API_DEBUG and tried to attach/map a non-contiguous DMA-BUF with a chained sg I don't see how that doesn't crash out.
That's the reason why DMA-buf heaps uses a copy of the sg table for calling dma_sync_sgtable_for_cpu()/dma_sync_sgtable_for_device().
Could you point me to where Heaps uses a copy of the SG table? I see it using the exact same SG table for dma_sync_sgtable_for_*() that we created when mapping the device attachments.
See dup_sg_table() in system_heap.c.
Apart from stopping using sg_table in DMA-buf at all what we could potentially do is to improve the mangling. E.g. just allocate a new sg_table, copy over all the DMA addresses and keep the page_link pointing to the original one.
Regards, Christian.
Thanks, Andrew
[0] https://github.com/torvalds/linux/blob/master/drivers/dma-buf/dma-buf.c#L114... [1] https://github.com/torvalds/linux/blob/master/drivers/dma-buf/dma-buf.c#L115...
It's basically a hack and should be removed, but for this we need to change all clients which is tons of work.
Regards, Christian.
Thanks Andrew
Changes for v2: - fix attachment table use-after-free - rebased on v6.17-rc1
Andrew Davis (3): udmabuf: Keep track current device mappings udmabuf: Sync buffer mappings for attached devices udmabuf: Use module_misc_device() to register this device
drivers/dma-buf/udmabuf.c | 133 +++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 66 deletions(-)
linaro-mm-sig@lists.linaro.org