On 5/14/25 13:02, wangtao wrote:
-----Original Message----- From: Christian König christian.koenig@amd.com Sent: Tuesday, May 13, 2025 9:18 PM To: wangtao tao.wangtao@honor.com; sumit.semwal@linaro.org; benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; jstultz@google.com; tjmercier@google.com Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; wangbintian(BintianWang) bintian.wang@honor.com; yipengxiang yipengxiang@honor.com; liulu.liu@honor.com; feng.han@honor.com Subject: Re: [PATCH 2/2] dmabuf/heaps: implement DMA_BUF_IOCTL_RW_FILE for system_heap
On 5/13/25 14:30, wangtao wrote:
-----Original Message----- From: Christian König christian.koenig@amd.com Sent: Tuesday, May 13, 2025 7:32 PM To: wangtao tao.wangtao@honor.com; sumit.semwal@linaro.org; benjamin.gaignard@collabora.com; Brian.Starkey@arm.com; jstultz@google.com; tjmercier@google.com Cc: linux-media@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro- mm-sig@lists.linaro.org; linux-kernel@vger.kernel.org; wangbintian(BintianWang) bintian.wang@honor.com; yipengxiang yipengxiang@honor.com; liulu.liu@honor.com; feng.han@honor.com Subject: Re: [PATCH 2/2] dmabuf/heaps: implement DMA_BUF_IOCTL_RW_FILE for system_heap
On 5/13/25 11:28, wangtao wrote:
Support direct file I/O operations for system_heap dma-buf objects. Implementation includes:
- Convert sg_table to bio_vec
That is usually illegal for DMA-bufs.
[wangtao] The term 'convert' is misleading in this context. The appropriate
phrasing should be: Construct bio_vec from sg_table.
Well it doesn't matter what you call it. Touching the page inside an sg table of a DMA-buf is illegal, we even have code to actively prevent that.
[wangtao] For a driver using DMA-buf: Don't touch pages in the sg_table. But the system heap exporter (sg_table owner) should be allowed to use them.
Good point that might be possible.
If a driver takes ownership via dma_buf_map_attachment or similar calls, the exporter must stop using the sg_table. User-space programs should call DMA_BUF_IOCTL_RW_FILE only when the DMA-buf is not attached. The exporter must check ownership (e.g., ensure no map_dma_buf/vmap is active) and block new calls during operations. I'll add these checks in patch v2.
Once more: This approach was already rejected multiple times! Please use udmabuf instead!
The hack you came up here is simply not necessary.
[wangtao] Many people need DMA-buf direct I/O. I tried it 2 years ago. My method is simpler, uses less CPU/power, and performs better:
I don't think that this is a valid argument.
- Speed: 3418 MB/s vs. 2073 MB/s (udmabuf) at 1GHz CPU.
- udmabuf wastes half its CPU time on __get_user_pages.
- Creating 32x32MB DMA-bufs + reading 1GB file takes 346 ms vs. 1145 ms for udmabuf (10x slower) vs. 1503 ms for DMA-buf normal.
Why would using udmabuf be slower here?
udmabuf is slightly faster but not enough. Switching to udmabuf is easy for small apps but hard in complex systems without major benefits.
Yeah, but your approach here is a rather clear hack. Using udmabuf is much more cleaner and generally accepted by everybody now.
As far as I can see I have to reject your approach here.
Regards, Christian.
Regards, Christian.
Appreciate your feedback.
Regards, Christian.
- Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke
vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O
Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz):
| Metric | 1MB | 8MB | 64MB | 1024MB | 3072MB | |--------------------|-------:|-------:|--------:|---------:|------- |--------------------|-- |--------------------|:| | Buffer Read (us) | 1658 | 9028 | 69295 | 1019783 | 2978179 | | Direct Read (us) | 707 | 2647 | 18689 | 299627 | 937758 | | Buffer Rate (MB/s) | 603 | 886 | 924 | 1004 | 1032 | | Direct Rate (MB/s) | 1414 | 3022 | 3425 | 3418 | 3276 |
Signed-off-by: wangtao tao.wangtao@honor.com
drivers/dma-buf/heaps/system_heap.c | 118 ++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+)
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 26d5dc89ea16..f7b71b9843aa 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -20,6 +20,8 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/bvec.h> +#include <linux/uio.h>
static struct dma_heap *sys_heap;
@@ -281,6 +283,121 @@ static void system_heap_vunmap(struct
dma_buf
*dmabuf, struct iosys_map *map)
iosys_map_clear(map); }
+static struct bio_vec *system_heap_init_bvec(struct
system_heap_buffer *buffer,
size_t offset, size_t len, int *nr_segs) {
- struct sg_table *sgt = &buffer->sg_table;
- struct scatterlist *sg;
- size_t length = 0;
- unsigned int i, k = 0;
- struct bio_vec *bvec;
- size_t sg_left;
- size_t sg_offset;
- size_t sg_len;
- bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL);
- if (!bvec)
return NULL;
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
length += sg->length;
if (length <= offset)
continue;
sg_left = length - offset;
sg_offset = sg->offset + sg->length - sg_left;
sg_len = min(sg_left, len);
bvec[k].bv_page = sg_page(sg);
bvec[k].bv_len = sg_len;
bvec[k].bv_offset = sg_offset;
k++;
offset += sg_len;
len -= sg_len;
if (len <= 0)
break;
- }
- *nr_segs = k;
- return bvec;
+}
+static int system_heap_rw_file(struct system_heap_buffer *buffer, +bool
is_read,
bool direct_io, struct file *filp, loff_t file_offset,
size_t buf_offset, size_t len)
+{
- struct bio_vec *bvec;
- int nr_segs = 0;
- struct iov_iter iter;
- struct kiocb kiocb;
- ssize_t ret = 0;
- if (direct_io) {
if (!(filp->f_mode & FMODE_CAN_ODIRECT))
return -EINVAL;
- }
- bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs);
- if (!bvec)
return -ENOMEM;
- iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec,
nr_segs, len);
- init_sync_kiocb(&kiocb, filp);
- kiocb.ki_pos = file_offset;
- if (direct_io)
kiocb.ki_flags |= IOCB_DIRECT;
- while (kiocb.ki_pos < file_offset + len) {
if (is_read)
ret = vfs_iocb_iter_read(filp, &kiocb, &iter);
else
ret = vfs_iocb_iter_write(filp, &kiocb, &iter);
if (ret <= 0)
break;
- }
- kvfree(bvec);
- return ret < 0 ? ret : 0;
+}
+static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf,
struct dma_buf_rw_file *back)
+{
- struct system_heap_buffer *buffer = dmabuf->priv;
- int ret = 0;
- __u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK;
- bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT;
- struct file *filp;
- if (op != DMA_BUF_RW_FLAGS_READ && op !=
DMA_BUF_RW_FLAGS_WRITE)
return -EINVAL;
- if (direct_io) {
if (!PAGE_ALIGNED(back->file_offset) ||
!PAGE_ALIGNED(back->buf_offset) ||
!PAGE_ALIGNED(back->buf_len))
return -EINVAL;
- }
- if (!back->buf_len || back->buf_len > dmabuf->size ||
back->buf_offset >= dmabuf->size ||
back->buf_offset + back->buf_len > dmabuf->size)
return -EINVAL;
- if (back->file_offset + back->buf_len < back->file_offset)
return -EINVAL;
- filp = fget(back->fd);
- if (!filp)
return -EBADF;
- mutex_lock(&buffer->lock);
- ret = system_heap_rw_file(buffer, op ==
DMA_BUF_RW_FLAGS_READ, direct_io,
filp, back->file_offset, back->buf_offset, back-
buf_len);
- mutex_unlock(&buffer->lock);
- fput(filp);
- return ret;
+}
static void system_heap_dma_buf_release(struct dma_buf *dmabuf) { struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6
+425,7
@@ static const struct dma_buf_ops system_heap_buf_ops = { .mmap = system_heap_mmap, .vmap = system_heap_vmap, .vunmap = system_heap_vunmap,
- .rw_file = system_heap_dma_buf_rw_file, .release = system_heap_dma_buf_release, };