The double `by' is duplicated in the comment, remove one.
Signed-off-by: Jason Wang <wangborong(a)cdjrlc.com>
---
drivers/dma-buf/heaps/cma_heap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index 28fb04eccdd0..cd386ce639f3 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -316,7 +316,7 @@ static struct dma_buf *cma_heap_allocate(struct dma_heap *heap,
kunmap_atomic(vaddr);
/*
* Avoid wasting time zeroing memory if the process
- * has been killed by by SIGKILL
+ * has been killed by SIGKILL
*/
if (fatal_signal_pending(current))
goto free_cma;
--
2.35.1
From: Randy Li <ayaka(a)soulik.info>
This module is still at a early stage, I wrote this for showing what
APIs we need here.
Let me explain why we need such a module here.
If you won't allocate buffers from a V4L2 M2M device, this module
may not be very useful. I am sure the most of users won't know a
device would require them allocate buffers from a DMA-Heap then
import those buffers into a V4L2's queue.
Then the question goes back to why DMA-Heap. From the Android's
description, we know it is about the copyright's DRM.
When we allocate a buffer in a DMA-Heap, it may register that buffer
in the trusted execution environment so the firmware which is running
or could only be acccesed from there could use that buffer later.
The answer above leads to another thing which is not done in this
version, the DMA mapping. Although in some platforms, a DMA-Heap
responses a IOMMU device as well. For the genernal purpose, we would
be better assuming the device mapping should be done for each device
itself. The problem here we only know alloc_devs in those DMAbuf
methods, which are DMA-heaps in my design, the device from the queue
is not enough, a plane may requests another IOMMU device or table
for mapping.
Signed-off-by: Randy Li <ayaka(a)soulik.info>
---
drivers/media/common/videobuf2/Kconfig | 6 +
drivers/media/common/videobuf2/Makefile | 1 +
.../common/videobuf2/videobuf2-dma-heap.c | 350 ++++++++++++++++++
include/media/videobuf2-dma-heap.h | 30 ++
4 files changed, 387 insertions(+)
create mode 100644 drivers/media/common/videobuf2/videobuf2-dma-heap.c
create mode 100644 include/media/videobuf2-dma-heap.h
diff --git a/drivers/media/common/videobuf2/Kconfig b/drivers/media/common/videobuf2/Kconfig
index d2223a12c95f..02235077f07e 100644
--- a/drivers/media/common/videobuf2/Kconfig
+++ b/drivers/media/common/videobuf2/Kconfig
@@ -30,3 +30,9 @@ config VIDEOBUF2_DMA_SG
config VIDEOBUF2_DVB
tristate
select VIDEOBUF2_CORE
+
+config VIDEOBUF2_DMA_HEAP
+ tristate
+ select VIDEOBUF2_CORE
+ select VIDEOBUF2_MEMOPS
+ select DMABUF_HEAPS
diff --git a/drivers/media/common/videobuf2/Makefile b/drivers/media/common/videobuf2/Makefile
index a6fe3f304685..7fe65f93117f 100644
--- a/drivers/media/common/videobuf2/Makefile
+++ b/drivers/media/common/videobuf2/Makefile
@@ -10,6 +10,7 @@ endif
# (e. g. LC_ALL=C sort Makefile)
obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-common.o
obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o
+obj-$(CONFIG_VIDEOBUF2_DMA_HEAP) += videobuf2-dma-heap.o
obj-$(CONFIG_VIDEOBUF2_DMA_SG) += videobuf2-dma-sg.o
obj-$(CONFIG_VIDEOBUF2_DVB) += videobuf2-dvb.o
obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-heap.c b/drivers/media/common/videobuf2/videobuf2-dma-heap.c
new file mode 100644
index 000000000000..377b82ab8f5a
--- /dev/null
+++ b/drivers/media/common/videobuf2/videobuf2-dma-heap.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2022 Randy Li <ayaka(a)soulik.info>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include <media/videobuf2-v4l2.h>
+#include <media/videobuf2-memops.h>
+#include <media/videobuf2-dma-heap.h>
+
+struct vb2_dmaheap_buf {
+ struct device *dev;
+ void *vaddr;
+ unsigned long size;
+ struct dma_buf *dmabuf;
+ dma_addr_t dma_addr;
+ unsigned long attrs;
+ enum dma_data_direction dma_dir;
+ struct sg_table *dma_sgt;
+
+ /* MMAP related */
+ struct vb2_vmarea_handler handler;
+ refcount_t refcount;
+
+ /* DMABUF related */
+ struct dma_buf_attachment *db_attach;
+};
+
+/*********************************************/
+/* callbacks for all buffers */
+/*********************************************/
+
+void *vb2_dmaheap_cookie(struct vb2_buffer *vb, void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ return &buf->dma_addr;
+}
+
+static void *vb2_dmaheap_vaddr(struct vb2_buffer *vb, void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+ struct iosys_map map;
+
+ if (buf->vaddr)
+ return buf->vaddr;
+
+ if (buf->db_attach) {
+ if (!dma_buf_vmap(buf->db_attach->dmabuf, &map))
+ buf->vaddr = map.vaddr;
+ }
+
+ return buf->vaddr;
+}
+
+static unsigned int vb2_dmaheap_num_users(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ return refcount_read(&buf->refcount);
+}
+
+static void vb2_dmaheap_prepare(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ /* TODO: DMABUF exporter will flush the cache for us */
+ if (buf->db_attach)
+ return;
+
+ dma_buf_end_cpu_access(buf->dmabuf, buf->dma_dir);
+}
+
+static void vb2_dmaheap_finish(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ /* TODO: DMABUF exporter will flush the cache for us */
+ if (buf->db_attach)
+ return;
+
+ dma_buf_begin_cpu_access(buf->dmabuf, buf->dma_dir);
+}
+
+/*********************************************/
+/* callbacks for MMAP buffers */
+/*********************************************/
+
+void vb2_dmaheap_put(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ if (!refcount_dec_and_test(&buf->refcount))
+ return;
+
+ dma_buf_put(buf->dmabuf);
+
+ put_device(buf->dev);
+ kfree(buf);
+}
+
+static void *vb2_dmaheap_alloc(struct vb2_buffer *vb,
+ struct device *dev,
+ unsigned long size)
+{
+ struct vb2_queue *q = vb->vb2_queue;
+ struct dma_heap *heap;
+ struct vb2_dmaheap_buf *buf;
+ const char *heap_name;
+ int ret;
+
+ if (WARN_ON(!dev))
+ return ERR_PTR(-EINVAL);
+
+ heap_name = dev_name(dev);
+ if (!heap_name)
+ return ERR_PTR(-EINVAL);
+
+ heap = dma_heap_find(heap_name);
+ if (!heap) {
+ dev_err(dev, "is not a DMA-heap device\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ buf = kzalloc(sizeof *buf, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ /* Prevent the device from being released while the buffer is used */
+ buf->dev = get_device(dev);
+ buf->attrs = vb->vb2_queue->dma_attrs;
+ buf->dma_dir = vb->vb2_queue->dma_dir;
+
+ /* TODO: heap flags */
+ ret = dma_heap_buffer_alloc(heap, size, 0, 0);
+ if (ret < 0) {
+ dev_err(dev, "is not a DMA-heap device\n");
+ put_device(buf->dev);
+ kfree(buf);
+ return ERR_PTR(ret);
+ }
+ buf->dmabuf = dma_buf_get(ret);
+
+ /* FIXME */
+ buf->dma_addr = 0;
+
+ if ((q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
+ buf->vaddr = buf->dmabuf;
+
+ buf->handler.refcount = &buf->refcount;
+ buf->handler.put = vb2_dmaheap_put;
+ buf->handler.arg = buf;
+
+ refcount_set(&buf->refcount, 1);
+
+ return buf;
+}
+
+static int vb2_dmaheap_mmap(void *buf_priv, struct vm_area_struct *vma)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+ int ret;
+
+ if (!buf) {
+ printk(KERN_ERR "No buffer to map\n");
+ return -EINVAL;
+ }
+
+ vma->vm_flags &= ~VM_PFNMAP;
+
+ ret = dma_buf_mmap(buf->dmabuf, vma, 0);
+ if (ret) {
+ pr_err("Remapping memory failed, error: %d\n", ret);
+ return ret;
+ }
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_private_data = &buf->handler;
+ vma->vm_ops = &vb2_common_vm_ops;
+
+ vma->vm_ops->open(vma);
+
+ pr_debug("%s: mapped memid 0x%08lx at 0x%08lx, size %ld\n",
+ __func__, (unsigned long)buf->dma_addr, vma->vm_start,
+ buf->size);
+
+ return 0;
+}
+
+/*********************************************/
+/* DMABUF ops for exporters */
+/*********************************************/
+
+static struct dma_buf *vb2_dmaheap_get_dmabuf(struct vb2_buffer *vb,
+ void *buf_priv,
+ unsigned long flags)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+ struct dma_buf *dbuf;
+
+ dbuf = buf->dmabuf;
+
+ return dbuf;
+}
+
+/*********************************************/
+/* callbacks for DMABUF buffers */
+/*********************************************/
+
+static int vb2_dmaheap_map_dmabuf(void *mem_priv)
+{
+ struct vb2_dmaheap_buf *buf = mem_priv;
+ struct sg_table *sgt;
+
+ if (WARN_ON(!buf->db_attach)) {
+ pr_err("trying to pin a non attached buffer\n");
+ return -EINVAL;
+ }
+
+ if (WARN_ON(buf->dma_sgt)) {
+ pr_err("dmabuf buffer is already pinned\n");
+ return 0;
+ }
+
+ /* get the associated scatterlist for this buffer */
+ sgt = dma_buf_map_attachment(buf->db_attach, buf->dma_dir);
+ if (IS_ERR(sgt)) {
+ pr_err("Error getting dmabuf scatterlist\n");
+ return -EINVAL;
+ }
+
+ buf->dma_addr = sg_dma_address(sgt->sgl);
+ buf->dma_sgt = sgt;
+ buf->vaddr = NULL;
+
+ return 0;
+}
+
+static void vb2_dmaheap_unmap_dmabuf(void *mem_priv)
+{
+ struct vb2_dmaheap_buf *buf = mem_priv;
+ struct sg_table *sgt = buf->dma_sgt;
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(buf->vaddr);
+
+ if (WARN_ON(!buf->db_attach)) {
+ pr_err("trying to unpin a not attached buffer\n");
+ return;
+ }
+
+ if (WARN_ON(!sgt)) {
+ pr_err("dmabuf buffer is already unpinned\n");
+ return;
+ }
+
+ if (buf->vaddr) {
+ dma_buf_vunmap(buf->db_attach->dmabuf, &map);
+ buf->vaddr = NULL;
+ }
+ dma_buf_unmap_attachment(buf->db_attach, sgt, buf->dma_dir);
+
+ buf->dma_addr = 0;
+ buf->dma_sgt = NULL;
+}
+
+static void vb2_dmaheap_detach_dmabuf(void *mem_priv)
+{
+ struct vb2_dmaheap_buf *buf = mem_priv;
+
+ /* if vb2 works correctly you should never detach mapped buffer */
+ if (WARN_ON(buf->dma_addr))
+ vb2_dmaheap_unmap_dmabuf(buf);
+
+ /* detach this attachment */
+ dma_buf_detach(buf->db_attach->dmabuf, buf->db_attach);
+ kfree(buf);
+}
+
+static void *vb2_dmaheap_attach_dmabuf(struct vb2_buffer *vb, struct device *dev,
+ struct dma_buf *dbuf, unsigned long size)
+{
+ struct vb2_dmaheap_buf *buf;
+ struct dma_buf_attachment *dba;
+
+ if (dbuf->size < size)
+ return ERR_PTR(-EFAULT);
+
+ if (WARN_ON(!dev))
+ return ERR_PTR(-EINVAL);
+ /*
+ * TODO: A better way to check whether the buffer is coming
+ * from this heap or this heap could accept this buffer
+ */
+ if (strcmp(dbuf->exp_name, dev_name(dev)))
+ return ERR_PTR(-EINVAL);
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ buf->dev = dev;
+ /* create attachment for the dmabuf with the user device */
+ dba = dma_buf_attach(dbuf, buf->dev);
+ if (IS_ERR(dba)) {
+ pr_err("failed to attach dmabuf\n");
+ kfree(buf);
+ return dba;
+ }
+
+ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->size = size;
+ buf->db_attach = dba;
+
+ return buf;
+}
+
+const struct vb2_mem_ops vb2_dmaheap_memops = {
+ .alloc = vb2_dmaheap_alloc,
+ .put = vb2_dmaheap_put,
+ .get_dmabuf = vb2_dmaheap_get_dmabuf,
+ .cookie = vb2_dmaheap_cookie,
+ .vaddr = vb2_dmaheap_vaddr,
+ .prepare = vb2_dmaheap_prepare,
+ .finish = vb2_dmaheap_finish,
+ .map_dmabuf = vb2_dmaheap_map_dmabuf,
+ .unmap_dmabuf = vb2_dmaheap_unmap_dmabuf,
+ .attach_dmabuf = vb2_dmaheap_attach_dmabuf,
+ .detach_dmabuf = vb2_dmaheap_detach_dmabuf,
+ .num_users = vb2_dmaheap_num_users,
+ .mmap = vb2_dmaheap_mmap,
+};
+
+MODULE_DESCRIPTION("DMA-Heap memory handling routines for videobuf2");
+MODULE_AUTHOR("Randy Li <ayaka(a)soulik.info>");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(DMA_BUF);
diff --git a/include/media/videobuf2-dma-heap.h b/include/media/videobuf2-dma-heap.h
new file mode 100644
index 000000000000..fa057f67d6e9
--- /dev/null
+++ b/include/media/videobuf2-dma-heap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2022 Randy Li <ayaka(a)soulik.info>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _MEDIA_VIDEOBUF2_DMA_HEAP_H
+#define _MEDIA_VIDEOBUF2_DMA_HEAP_H
+
+#include <media/videobuf2-v4l2.h>
+#include <linux/dma-mapping.h>
+
+static inline dma_addr_t
+vb2_dmaheap_plane_dma_addr(struct vb2_buffer *vb, unsigned int plane_no)
+{
+ dma_addr_t *addr = vb2_plane_cookie(vb, plane_no);
+
+ return *addr;
+}
+
+extern const struct vb2_mem_ops vb2_dmaheap_memops;
+#endif
--
2.17.1
Doing TLB invalidation cause performance regressions, like:
[424.370996] i915 0000:00:02.0: [drm] *ERROR* rcs0 TLB invalidation did not complete in 4ms!
As reported at:
https://gitlab.freedesktop.org/drm/intel/-/issues/6424
as this is an expensive operation. So, reduce the need of it by:
- checking if the engine is awake;
- checking if the engine is not wedged;
- batching operations.
Additionally, add a workaround for a known hardware issue on some GPUs.
In order to double-check that this series won't be introducing any regressions,
I used this new IGT test:
https://patchwork.freedesktop.org/patch/495684/?series=106757&rev=1
Checking the results for 3 different patchsets, on Broadwell:
1) On the top of drm-tip (2022y-07m-14d-08h-35m-36) - e. g. with TLB
invalidation and serialization patches:
$ sudo build/tests/gem_exec_tlb|grep Subtest
Subtest close-clear: SUCCESS (10.490s)
Subtest madv-clear: SUCCESS (10.484s)
Subtest u-unmap-clear: SUCCESS (10.527s)
Subtest u-shrink-clear: SUCCESS (10.506s)
Subtest close-dumb: SUCCESS (10.165s)
Subtest madv-dumb: SUCCESS (10.177s)
Subtest u-unmap-dumb: SUCCESS (10.172s)
Subtest u-shrink-dumb: SUCCESS (10.172s)
2) With the new version of the batch TLB invalidation patches from this series:
$ sudo build/tests/gem_exec_tlb|grep Subtest
Subtest close-clear: SUCCESS (10.483s)
Subtest madv-clear: SUCCESS (10.495s)
Subtest u-unmap-clear: SUCCESS (10.545s)
Subtest u-shrink-clear: SUCCESS (10.508s)
Subtest close-dumb: SUCCESS (10.172s)
Subtest madv-dumb: SUCCESS (10.169s)
Subtest u-unmap-dumb: SUCCESS (10.174s)
Subtest u-shrink-dumb: SUCCESS (10.176s)
3) Changing the TLB invalidation routine to do nothing[1]:
$ sudo ~/freedesktop-igt/build/tests/gem_exec_tlb|grep Subtest
(gem_exec_tlb:1958) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1958) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1958) CRITICAL: Found deadbeef in a new (clear) buffer after 3 tries!
(gem_exec_tlb:1956) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1956) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1956) CRITICAL: Found deadbeef in a new (clear) buffer after 89 tries!
(gem_exec_tlb:1957) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1957) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1957) CRITICAL: Found deadbeef in a new (clear) buffer after 256 tries!
(gem_exec_tlb:1960) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1960) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1960) CRITICAL: Found deadbeef in a new (clear) buffer after 845 tries!
(gem_exec_tlb:1961) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1961) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1961) CRITICAL: Found deadbeef in a new (clear) buffer after 1138 tries!
(gem_exec_tlb:1954) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1954) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1954) CRITICAL: Found deadbeef in a new (clear) buffer after 1359 tries!
(gem_exec_tlb:1955) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1955) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1955) CRITICAL: Found deadbeef in a new (clear) buffer after 1794 tries!
(gem_exec_tlb:1959) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1959) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1959) CRITICAL: Found deadbeef in a new (clear) buffer after 2139 tries!
Dynamic subtest smem0 failed.
**** DEBUG ****
(gem_exec_tlb:1944) DEBUG: 2M hole:200000 contains poison:6b6b6b6b
(gem_exec_tlb:1944) DEBUG: Running writer for 200000 at 300000 on bcs0
(gem_exec_tlb:1944) DEBUG: Closing hole:200000 on rcs0, sample:deadbeef
(gem_exec_tlb:1944) DEBUG: Rechecking hole:200000, sample:6b6b6b6b
**** END ****
Subtest close-clear: FAIL (10.434s)
Subtest madv-clear: SUCCESS (10.479s)
Subtest u-unmap-clear: SUCCESS (10.512s)
In summary, the test does properly detect fail when TLB cache invalidation doesn't happen,
as shown at result (3). It also shows that both current drm-tip and drm-tip with this series
applied don't have TLB invalidation cache issues.
[1] I applied this patch on the top of drm-tip:
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 68c2b0d8f187..0aefcd7be5e9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -930,0 +931,3 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+ // HACK: don't do TLB invalidations!!!
+ return;
+
Regards,
Mauro
Chris Wilson (4):
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
Mauro Carvalho Chehab (2):
drm/i915/gt: document with_intel_gt_pm_if_awake()
drm/i915/gt: describe the new tlb parameter at i915_vma_resource
.../gpu/drm/i915/gem/i915_gem_object_types.h | 3 +-
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 25 +++---
drivers/gpu/drm/i915/gt/intel_gt.c | 77 +++++++++++++++----
drivers/gpu/drm/i915/gt/intel_gt.h | 12 ++-
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 11 +++
drivers/gpu/drm/i915/gt/intel_gt_types.h | 18 ++++-
drivers/gpu/drm/i915/gt/intel_ppgtt.c | 8 +-
drivers/gpu/drm/i915/i915_vma.c | 33 ++++++--
drivers/gpu/drm/i915/i915_vma.h | 1 +
drivers/gpu/drm/i915/i915_vma_resource.c | 9 ++-
drivers/gpu/drm/i915/i915_vma_resource.h | 6 +-
11 files changed, 163 insertions(+), 40 deletions(-)
--
2.36.1
TLB invalidation is a slow operation. It should not be doing lightly, as it
causes performance regressions, like this:
[178.821002] i915 0000:00:02.0: [drm] *ERROR* rcs0 TLB invalidation did not complete in 4ms!
This series contain
1) some patches that makes TLB invalidation to happen only on
active, non-wedged engines, doing cache invalidation in batch
and only when GT objects are exposed to userspace:
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Only invalidate TLBs exposed to user manipulation
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
drm/i915/gt: Move TLB invalidation to its own file
2) It fixes two bugs, being the first a workaround:
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915: Invalidate the TLBs on each GT
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
3) It adds GuC support. Besides providing TLB invalidation on some
additional hardware, this should also help serializing GuC operations
with TLB invalidation:
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
drm/i915/guc: Define CTB based TLB invalidation routines
drm/i915: Add platform macro for selective tlb flush
drm/i915: Define GuC Based TLB invalidation routines
drm/i915: Add generic interface for tlb invalidation for XeHP
drm/i915: Use selective tlb invalidations where supported
4) It adds the corresponding kernel-doc markups for the kAPI
used for TLB invalidation.
While I could have split this into smaller pieces, I'm opting to send
them altogether, in order for CI trybot to better verify what issues
will be closed with this series.
---
v2:
- no changes. Just rebased on the top of drm-tip: 2022y-07m-14d-08h-35m-36s,
as CI trybot was having troubles applying it. Hopefully, it will now work.
Chris Wilson (7):
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915/gt: Only invalidate TLBs exposed to user manipulation
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
drm/i915/gt: Move TLB invalidation to its own file
drm/i915: Invalidate the TLBs on each GT
Mauro Carvalho Chehab (8):
drm/i915/gt: document with_intel_gt_pm_if_awake()
drm/i915/gt: describe the new tlb parameter at i915_vma_resource
drm/i915/guc: use kernel-doc for enum intel_guc_tlb_inval_mode
drm/i915/guc: document the TLB invalidation struct members
drm/i915: document tlb field at struct drm_i915_gem_object
drm/i915/gt: document TLB cache invalidation functions
drm/i915/guc: describe enum intel_guc_tlb_invalidation_type
drm/i915/guc: document TLB cache invalidation functions
Piotr Piórkowski (1):
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
Prathap Kumar Valsan (5):
drm/i915/guc: Define CTB based TLB invalidation routines
drm/i915: Add platform macro for selective tlb flush
drm/i915: Define GuC Based TLB invalidation routines
drm/i915: Add generic interface for tlb invalidation for XeHP
drm/i915: Use selective tlb invalidations where supported
drivers/gpu/drm/i915/Makefile | 1 +
.../gpu/drm/i915/gem/i915_gem_object_types.h | 6 +-
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 28 +-
drivers/gpu/drm/i915/gt/intel_engine.h | 1 +
drivers/gpu/drm/i915/gt/intel_gt.c | 125 +-------
drivers/gpu/drm/i915/gt/intel_gt.h | 2 -
.../gpu/drm/i915/gt/intel_gt_buffer_pool.h | 3 +-
drivers/gpu/drm/i915/gt/intel_gt_defines.h | 11 +
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 10 +
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 +
drivers/gpu/drm/i915/gt/intel_gt_types.h | 22 +-
drivers/gpu/drm/i915/gt/intel_ppgtt.c | 8 +-
drivers/gpu/drm/i915/gt/intel_tlb.c | 295 ++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_tlb.h | 30 ++
.../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 54 ++++
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 232 ++++++++++++++
drivers/gpu/drm/i915/gt/uc/intel_guc.h | 36 +++
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +-
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 9 +
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 +++++-
drivers/gpu/drm/i915/i915_drv.h | 4 +-
drivers/gpu/drm/i915/i915_pci.c | 1 +
drivers/gpu/drm/i915/i915_vma.c | 46 ++-
drivers/gpu/drm/i915/i915_vma.h | 2 +
drivers/gpu/drm/i915/i915_vma_resource.c | 9 +-
drivers/gpu/drm/i915/i915_vma_resource.h | 6 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
27 files changed, 910 insertions(+), 155 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_defines.h
create mode 100644 drivers/gpu/drm/i915/gt/intel_tlb.c
create mode 100644 drivers/gpu/drm/i915/gt/intel_tlb.h
--
2.36.1
On 20/07/2022 08:13, Mauro Carvalho Chehab wrote:
> On Mon, 18 Jul 2022 14:52:05 +0100
> Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com> wrote:
>
>>
>> On 14/07/2022 13:06, Mauro Carvalho Chehab wrote:
>>> From: Chris Wilson <chris.p.wilson(a)intel.com>
>>>
>>> Invalidate TLB in patch, in order to reduce performance regressions.
>>
>> "in batches"?
>
> Yeah. Will fix it.
>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
>>> index d8b94d638559..2da6c82a8bd2 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
>>> @@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
>>> void ppgtt_unbind_vma(struct i915_address_space *vm,
>>> struct i915_vma_resource *vma_res)
>>> {
>>> - if (vma_res->allocated)
>>> - vm->clear_range(vm, vma_res->start, vma_res->vma_size);
>>> + if (!vma_res->allocated)
>>> + return;
>>> +
>>> + vm->clear_range(vm, vma_res->start, vma_res->vma_size);
>>> + if (vma_res->tlb)
>>> + vma_invalidate_tlb(vm, *vma_res->tlb);
>>
>> The patch is about more than batching? If there is a security hole in
>> this area (unbind) with the current code?
>
> No, I don't think there's a security hole. The rationale for this is
> not due to it.
In this case obvious question is why are these changes in the patch
which declares itself to be about batching invalidations? Because...
> Since commit 2f6b90da9192 ("drm/i915: Use vma resources for async unbinding"),
> VMA unbind can happen either sync or async.
>
> So, the logic needs to do TLB invalidate on two places. After this
> patch, the code at __i915_vma_evict is:
>
> struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
> {
> ...
> if (async)
> unbind_fence = i915_vma_resource_unbind(vma_res,
> &vma->obj->mm.tlb);
> else
> unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
>
> vma->resource = NULL;
>
> atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
> &vma->flags);
>
> i915_vma_detach(vma);
>
> if (!async) {
> if (unbind_fence) {
> dma_fence_wait(unbind_fence, false);
> dma_fence_put(unbind_fence);
> unbind_fence = NULL;
> }
> vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb);
> }
> ...
>
> So, basically, if !async, __i915_vma_evict() will do TLB cache invalidation.
>
> However, when async is used, the actual page release will happen later,
> at this function:
>
> void ppgtt_unbind_vma(struct i915_address_space *vm,
> struct i915_vma_resource *vma_res)
> {
> if (!vma_res->allocated)
> return;
>
> vm->clear_range(vm, vma_res->start, vma_res->vma_size);
> if (vma_res->tlb)
> vma_invalidate_tlb(vm, *vma_res->tlb);
> }
.. frankly I don't follow since I don't see any page release happening
in here. Just PTE clearing.
I am explaining why it looks to me that the patch is doing two things.
Implementing batching _and_ adding invalidation points at VMA unbind
sites, while so far we had it at backing store release only. Maybe I am
wrong and perhaps I am too slow to pick up on the explanation here.
So if the patch is doing two things please split it up.
I am further confused by the invalidation call site in evict and in
unbind - why there can't be one logical site since the logical sequence
is evict -> unbind.
Regards,
Tvrtko
Hello,
This series moves all drivers to a dynamic dma-buf locking specification.
From now on all dma-buf importers are made responsible for holding
dma-buf's reservation lock around all operations performed over dma-bufs.
This common locking convention allows us to utilize reservation lock more
broadly around kernel without fearing of potential dead locks.
This patchset passes all i915 selftests. It was also tested using VirtIO,
Panfrost, Lima and Tegra drivers. I tested cases of display+GPU,
display+V4L and GPU+V4L dma-buf sharing, which covers majority of kernel
drivers since rest of the drivers share same or similar code paths.
This is a continuation of [1] where Christian König asked to factor out
the dma-buf locking changes into separate series.
[1] https://lore.kernel.org/dri-devel/20220526235040.678984-1-dmitry.osipenko@c…
Dmitry Osipenko (6):
dma-buf: Add _unlocked postfix to function names
drm/gem: Take reservation lock for vmap/vunmap operations
dma-buf: Move all dma-bufs to dynamic locking specification
dma-buf: Acquire wait-wound context on attachment
media: videobuf2: Stop using internal dma-buf lock
dma-buf: Remove internal lock
drivers/dma-buf/dma-buf.c | 198 +++++++++++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +-
drivers/gpu/drm/armada/armada_gem.c | 14 +-
drivers/gpu/drm/drm_client.c | 4 +-
drivers/gpu/drm/drm_gem.c | 28 +++
drivers/gpu/drm/drm_gem_cma_helper.c | 6 +-
drivers/gpu/drm/drm_gem_framebuffer_helper.c | 6 +-
drivers/gpu/drm/drm_gem_shmem_helper.c | 6 +-
drivers/gpu/drm/drm_prime.c | 12 +-
drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 6 +-
drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 20 +-
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 +-
.../drm/i915/gem/selftests/i915_gem_dmabuf.c | 20 +-
drivers/gpu/drm/i915/i915_gem_evict.c | 2 +-
drivers/gpu/drm/i915/i915_gem_ww.c | 26 ++-
drivers/gpu/drm/i915/i915_gem_ww.h | 15 +-
drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 8 +-
drivers/gpu/drm/qxl/qxl_object.c | 17 +-
drivers/gpu/drm/qxl/qxl_prime.c | 4 +-
drivers/gpu/drm/tegra/gem.c | 27 +--
drivers/infiniband/core/umem_dmabuf.c | 11 +-
.../common/videobuf2/videobuf2-dma-contig.c | 26 +--
.../media/common/videobuf2/videobuf2-dma-sg.c | 23 +-
.../common/videobuf2/videobuf2-vmalloc.c | 17 +-
.../platform/nvidia/tegra-vde/dmabuf-cache.c | 12 +-
drivers/misc/fastrpc.c | 12 +-
drivers/xen/gntdev-dmabuf.c | 14 +-
include/drm/drm_gem.h | 3 +
include/linux/dma-buf.h | 49 ++---
32 files changed, 347 insertions(+), 257 deletions(-)
--
2.36.1
Hello,
I found a bug in the most usb driver.
When the driver fails at
mdev->conf = kcalloc(num_endpoints, sizeof(*mdev->conf), GFP_KERNEL);
I got the following warning message:
[ 15.406256] kobject: '(null)' (ffff8881068f8000): is not
initialized, yet kobject_put() is being called.
[ 15.406986] WARNING: CPU: 3 PID: 396 at lib/kobject.c:720
kobject_put+0x6e/0x1c0
[ 15.410120] RIP: 0010:kobject_put+0x6e/0x1c0
[ 15.410470] Code: 01 75 29 4c 89 f8 48 c1 e8 03 80 3c 28 00 74 08
4c 89 ff e8 14 2e 73 ff 49 8b 37 48 c7 c7 c0 fc de 85 4c 89 fa e8 e2
61 21 ff <0f> 0b 49 8d 5f 38 48 89 df be 04 00 00 00 e8 df 2e 73 ff b8
ff ff
[ 15.416529] Call Trace:
[ 15.416896] hdm_probe+0xf3d/0x1090 [most_usb]
Since I'm not familiar with the driver, I ask for your help to solve
the warning.
regards,
Zheyu Ma