The double `by' is duplicated in the comment, remove one.
Signed-off-by: Jason Wang <wangborong(a)cdjrlc.com>
---
drivers/dma-buf/heaps/cma_heap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
index 28fb04eccdd0..cd386ce639f3 100644
--- a/drivers/dma-buf/heaps/cma_heap.c
+++ b/drivers/dma-buf/heaps/cma_heap.c
@@ -316,7 +316,7 @@ static struct dma_buf *cma_heap_allocate(struct dma_heap *heap,
kunmap_atomic(vaddr);
/*
* Avoid wasting time zeroing memory if the process
- * has been killed by by SIGKILL
+ * has been killed by SIGKILL
*/
if (fatal_signal_pending(current))
goto free_cma;
--
2.35.1
From: Randy Li <ayaka(a)soulik.info>
This module is still at a early stage, I wrote this for showing what
APIs we need here.
Let me explain why we need such a module here.
If you won't allocate buffers from a V4L2 M2M device, this module
may not be very useful. I am sure the most of users won't know a
device would require them allocate buffers from a DMA-Heap then
import those buffers into a V4L2's queue.
Then the question goes back to why DMA-Heap. From the Android's
description, we know it is about the copyright's DRM.
When we allocate a buffer in a DMA-Heap, it may register that buffer
in the trusted execution environment so the firmware which is running
or could only be acccesed from there could use that buffer later.
The answer above leads to another thing which is not done in this
version, the DMA mapping. Although in some platforms, a DMA-Heap
responses a IOMMU device as well. For the genernal purpose, we would
be better assuming the device mapping should be done for each device
itself. The problem here we only know alloc_devs in those DMAbuf
methods, which are DMA-heaps in my design, the device from the queue
is not enough, a plane may requests another IOMMU device or table
for mapping.
Signed-off-by: Randy Li <ayaka(a)soulik.info>
---
drivers/media/common/videobuf2/Kconfig | 6 +
drivers/media/common/videobuf2/Makefile | 1 +
.../common/videobuf2/videobuf2-dma-heap.c | 350 ++++++++++++++++++
include/media/videobuf2-dma-heap.h | 30 ++
4 files changed, 387 insertions(+)
create mode 100644 drivers/media/common/videobuf2/videobuf2-dma-heap.c
create mode 100644 include/media/videobuf2-dma-heap.h
diff --git a/drivers/media/common/videobuf2/Kconfig b/drivers/media/common/videobuf2/Kconfig
index d2223a12c95f..02235077f07e 100644
--- a/drivers/media/common/videobuf2/Kconfig
+++ b/drivers/media/common/videobuf2/Kconfig
@@ -30,3 +30,9 @@ config VIDEOBUF2_DMA_SG
config VIDEOBUF2_DVB
tristate
select VIDEOBUF2_CORE
+
+config VIDEOBUF2_DMA_HEAP
+ tristate
+ select VIDEOBUF2_CORE
+ select VIDEOBUF2_MEMOPS
+ select DMABUF_HEAPS
diff --git a/drivers/media/common/videobuf2/Makefile b/drivers/media/common/videobuf2/Makefile
index a6fe3f304685..7fe65f93117f 100644
--- a/drivers/media/common/videobuf2/Makefile
+++ b/drivers/media/common/videobuf2/Makefile
@@ -10,6 +10,7 @@ endif
# (e. g. LC_ALL=C sort Makefile)
obj-$(CONFIG_VIDEOBUF2_CORE) += videobuf2-common.o
obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o
+obj-$(CONFIG_VIDEOBUF2_DMA_HEAP) += videobuf2-dma-heap.o
obj-$(CONFIG_VIDEOBUF2_DMA_SG) += videobuf2-dma-sg.o
obj-$(CONFIG_VIDEOBUF2_DVB) += videobuf2-dvb.o
obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-heap.c b/drivers/media/common/videobuf2/videobuf2-dma-heap.c
new file mode 100644
index 000000000000..377b82ab8f5a
--- /dev/null
+++ b/drivers/media/common/videobuf2/videobuf2-dma-heap.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2022 Randy Li <ayaka(a)soulik.info>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/refcount.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include <media/videobuf2-v4l2.h>
+#include <media/videobuf2-memops.h>
+#include <media/videobuf2-dma-heap.h>
+
+struct vb2_dmaheap_buf {
+ struct device *dev;
+ void *vaddr;
+ unsigned long size;
+ struct dma_buf *dmabuf;
+ dma_addr_t dma_addr;
+ unsigned long attrs;
+ enum dma_data_direction dma_dir;
+ struct sg_table *dma_sgt;
+
+ /* MMAP related */
+ struct vb2_vmarea_handler handler;
+ refcount_t refcount;
+
+ /* DMABUF related */
+ struct dma_buf_attachment *db_attach;
+};
+
+/*********************************************/
+/* callbacks for all buffers */
+/*********************************************/
+
+void *vb2_dmaheap_cookie(struct vb2_buffer *vb, void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ return &buf->dma_addr;
+}
+
+static void *vb2_dmaheap_vaddr(struct vb2_buffer *vb, void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+ struct iosys_map map;
+
+ if (buf->vaddr)
+ return buf->vaddr;
+
+ if (buf->db_attach) {
+ if (!dma_buf_vmap(buf->db_attach->dmabuf, &map))
+ buf->vaddr = map.vaddr;
+ }
+
+ return buf->vaddr;
+}
+
+static unsigned int vb2_dmaheap_num_users(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ return refcount_read(&buf->refcount);
+}
+
+static void vb2_dmaheap_prepare(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ /* TODO: DMABUF exporter will flush the cache for us */
+ if (buf->db_attach)
+ return;
+
+ dma_buf_end_cpu_access(buf->dmabuf, buf->dma_dir);
+}
+
+static void vb2_dmaheap_finish(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ /* TODO: DMABUF exporter will flush the cache for us */
+ if (buf->db_attach)
+ return;
+
+ dma_buf_begin_cpu_access(buf->dmabuf, buf->dma_dir);
+}
+
+/*********************************************/
+/* callbacks for MMAP buffers */
+/*********************************************/
+
+void vb2_dmaheap_put(void *buf_priv)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+
+ if (!refcount_dec_and_test(&buf->refcount))
+ return;
+
+ dma_buf_put(buf->dmabuf);
+
+ put_device(buf->dev);
+ kfree(buf);
+}
+
+static void *vb2_dmaheap_alloc(struct vb2_buffer *vb,
+ struct device *dev,
+ unsigned long size)
+{
+ struct vb2_queue *q = vb->vb2_queue;
+ struct dma_heap *heap;
+ struct vb2_dmaheap_buf *buf;
+ const char *heap_name;
+ int ret;
+
+ if (WARN_ON(!dev))
+ return ERR_PTR(-EINVAL);
+
+ heap_name = dev_name(dev);
+ if (!heap_name)
+ return ERR_PTR(-EINVAL);
+
+ heap = dma_heap_find(heap_name);
+ if (!heap) {
+ dev_err(dev, "is not a DMA-heap device\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ buf = kzalloc(sizeof *buf, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ /* Prevent the device from being released while the buffer is used */
+ buf->dev = get_device(dev);
+ buf->attrs = vb->vb2_queue->dma_attrs;
+ buf->dma_dir = vb->vb2_queue->dma_dir;
+
+ /* TODO: heap flags */
+ ret = dma_heap_buffer_alloc(heap, size, 0, 0);
+ if (ret < 0) {
+ dev_err(dev, "is not a DMA-heap device\n");
+ put_device(buf->dev);
+ kfree(buf);
+ return ERR_PTR(ret);
+ }
+ buf->dmabuf = dma_buf_get(ret);
+
+ /* FIXME */
+ buf->dma_addr = 0;
+
+ if ((q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
+ buf->vaddr = buf->dmabuf;
+
+ buf->handler.refcount = &buf->refcount;
+ buf->handler.put = vb2_dmaheap_put;
+ buf->handler.arg = buf;
+
+ refcount_set(&buf->refcount, 1);
+
+ return buf;
+}
+
+static int vb2_dmaheap_mmap(void *buf_priv, struct vm_area_struct *vma)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+ int ret;
+
+ if (!buf) {
+ printk(KERN_ERR "No buffer to map\n");
+ return -EINVAL;
+ }
+
+ vma->vm_flags &= ~VM_PFNMAP;
+
+ ret = dma_buf_mmap(buf->dmabuf, vma, 0);
+ if (ret) {
+ pr_err("Remapping memory failed, error: %d\n", ret);
+ return ret;
+ }
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_private_data = &buf->handler;
+ vma->vm_ops = &vb2_common_vm_ops;
+
+ vma->vm_ops->open(vma);
+
+ pr_debug("%s: mapped memid 0x%08lx at 0x%08lx, size %ld\n",
+ __func__, (unsigned long)buf->dma_addr, vma->vm_start,
+ buf->size);
+
+ return 0;
+}
+
+/*********************************************/
+/* DMABUF ops for exporters */
+/*********************************************/
+
+static struct dma_buf *vb2_dmaheap_get_dmabuf(struct vb2_buffer *vb,
+ void *buf_priv,
+ unsigned long flags)
+{
+ struct vb2_dmaheap_buf *buf = buf_priv;
+ struct dma_buf *dbuf;
+
+ dbuf = buf->dmabuf;
+
+ return dbuf;
+}
+
+/*********************************************/
+/* callbacks for DMABUF buffers */
+/*********************************************/
+
+static int vb2_dmaheap_map_dmabuf(void *mem_priv)
+{
+ struct vb2_dmaheap_buf *buf = mem_priv;
+ struct sg_table *sgt;
+
+ if (WARN_ON(!buf->db_attach)) {
+ pr_err("trying to pin a non attached buffer\n");
+ return -EINVAL;
+ }
+
+ if (WARN_ON(buf->dma_sgt)) {
+ pr_err("dmabuf buffer is already pinned\n");
+ return 0;
+ }
+
+ /* get the associated scatterlist for this buffer */
+ sgt = dma_buf_map_attachment(buf->db_attach, buf->dma_dir);
+ if (IS_ERR(sgt)) {
+ pr_err("Error getting dmabuf scatterlist\n");
+ return -EINVAL;
+ }
+
+ buf->dma_addr = sg_dma_address(sgt->sgl);
+ buf->dma_sgt = sgt;
+ buf->vaddr = NULL;
+
+ return 0;
+}
+
+static void vb2_dmaheap_unmap_dmabuf(void *mem_priv)
+{
+ struct vb2_dmaheap_buf *buf = mem_priv;
+ struct sg_table *sgt = buf->dma_sgt;
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(buf->vaddr);
+
+ if (WARN_ON(!buf->db_attach)) {
+ pr_err("trying to unpin a not attached buffer\n");
+ return;
+ }
+
+ if (WARN_ON(!sgt)) {
+ pr_err("dmabuf buffer is already unpinned\n");
+ return;
+ }
+
+ if (buf->vaddr) {
+ dma_buf_vunmap(buf->db_attach->dmabuf, &map);
+ buf->vaddr = NULL;
+ }
+ dma_buf_unmap_attachment(buf->db_attach, sgt, buf->dma_dir);
+
+ buf->dma_addr = 0;
+ buf->dma_sgt = NULL;
+}
+
+static void vb2_dmaheap_detach_dmabuf(void *mem_priv)
+{
+ struct vb2_dmaheap_buf *buf = mem_priv;
+
+ /* if vb2 works correctly you should never detach mapped buffer */
+ if (WARN_ON(buf->dma_addr))
+ vb2_dmaheap_unmap_dmabuf(buf);
+
+ /* detach this attachment */
+ dma_buf_detach(buf->db_attach->dmabuf, buf->db_attach);
+ kfree(buf);
+}
+
+static void *vb2_dmaheap_attach_dmabuf(struct vb2_buffer *vb, struct device *dev,
+ struct dma_buf *dbuf, unsigned long size)
+{
+ struct vb2_dmaheap_buf *buf;
+ struct dma_buf_attachment *dba;
+
+ if (dbuf->size < size)
+ return ERR_PTR(-EFAULT);
+
+ if (WARN_ON(!dev))
+ return ERR_PTR(-EINVAL);
+ /*
+ * TODO: A better way to check whether the buffer is coming
+ * from this heap or this heap could accept this buffer
+ */
+ if (strcmp(dbuf->exp_name, dev_name(dev)))
+ return ERR_PTR(-EINVAL);
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ buf->dev = dev;
+ /* create attachment for the dmabuf with the user device */
+ dba = dma_buf_attach(dbuf, buf->dev);
+ if (IS_ERR(dba)) {
+ pr_err("failed to attach dmabuf\n");
+ kfree(buf);
+ return dba;
+ }
+
+ buf->dma_dir = vb->vb2_queue->dma_dir;
+ buf->size = size;
+ buf->db_attach = dba;
+
+ return buf;
+}
+
+const struct vb2_mem_ops vb2_dmaheap_memops = {
+ .alloc = vb2_dmaheap_alloc,
+ .put = vb2_dmaheap_put,
+ .get_dmabuf = vb2_dmaheap_get_dmabuf,
+ .cookie = vb2_dmaheap_cookie,
+ .vaddr = vb2_dmaheap_vaddr,
+ .prepare = vb2_dmaheap_prepare,
+ .finish = vb2_dmaheap_finish,
+ .map_dmabuf = vb2_dmaheap_map_dmabuf,
+ .unmap_dmabuf = vb2_dmaheap_unmap_dmabuf,
+ .attach_dmabuf = vb2_dmaheap_attach_dmabuf,
+ .detach_dmabuf = vb2_dmaheap_detach_dmabuf,
+ .num_users = vb2_dmaheap_num_users,
+ .mmap = vb2_dmaheap_mmap,
+};
+
+MODULE_DESCRIPTION("DMA-Heap memory handling routines for videobuf2");
+MODULE_AUTHOR("Randy Li <ayaka(a)soulik.info>");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(DMA_BUF);
diff --git a/include/media/videobuf2-dma-heap.h b/include/media/videobuf2-dma-heap.h
new file mode 100644
index 000000000000..fa057f67d6e9
--- /dev/null
+++ b/include/media/videobuf2-dma-heap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2022 Randy Li <ayaka(a)soulik.info>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _MEDIA_VIDEOBUF2_DMA_HEAP_H
+#define _MEDIA_VIDEOBUF2_DMA_HEAP_H
+
+#include <media/videobuf2-v4l2.h>
+#include <linux/dma-mapping.h>
+
+static inline dma_addr_t
+vb2_dmaheap_plane_dma_addr(struct vb2_buffer *vb, unsigned int plane_no)
+{
+ dma_addr_t *addr = vb2_plane_cookie(vb, plane_no);
+
+ return *addr;
+}
+
+extern const struct vb2_mem_ops vb2_dmaheap_memops;
+#endif
--
2.17.1
Doing TLB invalidation cause performance regressions, like:
[424.370996] i915 0000:00:02.0: [drm] *ERROR* rcs0 TLB invalidation did not complete in 4ms!
As reported at:
https://gitlab.freedesktop.org/drm/intel/-/issues/6424
as this is an expensive operation. So, reduce the need of it by:
- checking if the engine is awake;
- checking if the engine is not wedged;
- batching operations.
Additionally, add a workaround for a known hardware issue on some GPUs.
In order to double-check that this series won't be introducing any regressions,
I used this new IGT test:
https://patchwork.freedesktop.org/patch/495684/?series=106757&rev=1
Checking the results for 3 different patchsets, on Broadwell:
1) On the top of drm-tip (2022y-07m-14d-08h-35m-36) - e. g. with TLB
invalidation and serialization patches:
$ sudo build/tests/gem_exec_tlb|grep Subtest
Subtest close-clear: SUCCESS (10.490s)
Subtest madv-clear: SUCCESS (10.484s)
Subtest u-unmap-clear: SUCCESS (10.527s)
Subtest u-shrink-clear: SUCCESS (10.506s)
Subtest close-dumb: SUCCESS (10.165s)
Subtest madv-dumb: SUCCESS (10.177s)
Subtest u-unmap-dumb: SUCCESS (10.172s)
Subtest u-shrink-dumb: SUCCESS (10.172s)
2) With the new version of the batch TLB invalidation patches from this series:
$ sudo build/tests/gem_exec_tlb|grep Subtest
Subtest close-clear: SUCCESS (10.483s)
Subtest madv-clear: SUCCESS (10.495s)
Subtest u-unmap-clear: SUCCESS (10.545s)
Subtest u-shrink-clear: SUCCESS (10.508s)
Subtest close-dumb: SUCCESS (10.172s)
Subtest madv-dumb: SUCCESS (10.169s)
Subtest u-unmap-dumb: SUCCESS (10.174s)
Subtest u-shrink-dumb: SUCCESS (10.176s)
3) Changing the TLB invalidation routine to do nothing[1]:
$ sudo ~/freedesktop-igt/build/tests/gem_exec_tlb|grep Subtest
(gem_exec_tlb:1958) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1958) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1958) CRITICAL: Found deadbeef in a new (clear) buffer after 3 tries!
(gem_exec_tlb:1956) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1956) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1956) CRITICAL: Found deadbeef in a new (clear) buffer after 89 tries!
(gem_exec_tlb:1957) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1957) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1957) CRITICAL: Found deadbeef in a new (clear) buffer after 256 tries!
(gem_exec_tlb:1960) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1960) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1960) CRITICAL: Found deadbeef in a new (clear) buffer after 845 tries!
(gem_exec_tlb:1961) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1961) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1961) CRITICAL: Found deadbeef in a new (clear) buffer after 1138 tries!
(gem_exec_tlb:1954) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1954) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1954) CRITICAL: Found deadbeef in a new (clear) buffer after 1359 tries!
(gem_exec_tlb:1955) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1955) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1955) CRITICAL: Found deadbeef in a new (clear) buffer after 1794 tries!
(gem_exec_tlb:1959) CRITICAL: Test assertion failure function check_bo, file ../tests/i915/gem_exec_tlb.c:384:
(gem_exec_tlb:1959) CRITICAL: Failed assertion: !sq
(gem_exec_tlb:1959) CRITICAL: Found deadbeef in a new (clear) buffer after 2139 tries!
Dynamic subtest smem0 failed.
**** DEBUG ****
(gem_exec_tlb:1944) DEBUG: 2M hole:200000 contains poison:6b6b6b6b
(gem_exec_tlb:1944) DEBUG: Running writer for 200000 at 300000 on bcs0
(gem_exec_tlb:1944) DEBUG: Closing hole:200000 on rcs0, sample:deadbeef
(gem_exec_tlb:1944) DEBUG: Rechecking hole:200000, sample:6b6b6b6b
**** END ****
Subtest close-clear: FAIL (10.434s)
Subtest madv-clear: SUCCESS (10.479s)
Subtest u-unmap-clear: SUCCESS (10.512s)
In summary, the test does properly detect fail when TLB cache invalidation doesn't happen,
as shown at result (3). It also shows that both current drm-tip and drm-tip with this series
applied don't have TLB invalidation cache issues.
[1] I applied this patch on the top of drm-tip:
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 68c2b0d8f187..0aefcd7be5e9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -930,0 +931,3 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+ // HACK: don't do TLB invalidations!!!
+ return;
+
Regards,
Mauro
Chris Wilson (4):
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
Mauro Carvalho Chehab (2):
drm/i915/gt: document with_intel_gt_pm_if_awake()
drm/i915/gt: describe the new tlb parameter at i915_vma_resource
.../gpu/drm/i915/gem/i915_gem_object_types.h | 3 +-
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 25 +++---
drivers/gpu/drm/i915/gt/intel_gt.c | 77 +++++++++++++++----
drivers/gpu/drm/i915/gt/intel_gt.h | 12 ++-
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 11 +++
drivers/gpu/drm/i915/gt/intel_gt_types.h | 18 ++++-
drivers/gpu/drm/i915/gt/intel_ppgtt.c | 8 +-
drivers/gpu/drm/i915/i915_vma.c | 33 ++++++--
drivers/gpu/drm/i915/i915_vma.h | 1 +
drivers/gpu/drm/i915/i915_vma_resource.c | 9 ++-
drivers/gpu/drm/i915/i915_vma_resource.h | 6 +-
11 files changed, 163 insertions(+), 40 deletions(-)
--
2.36.1
TLB invalidation is a slow operation. It should not be doing lightly, as it
causes performance regressions, like this:
[178.821002] i915 0000:00:02.0: [drm] *ERROR* rcs0 TLB invalidation did not complete in 4ms!
This series contain
1) some patches that makes TLB invalidation to happen only on
active, non-wedged engines, doing cache invalidation in batch
and only when GT objects are exposed to userspace:
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Only invalidate TLBs exposed to user manipulation
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
drm/i915/gt: Move TLB invalidation to its own file
2) It fixes two bugs, being the first a workaround:
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915: Invalidate the TLBs on each GT
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
3) It adds GuC support. Besides providing TLB invalidation on some
additional hardware, this should also help serializing GuC operations
with TLB invalidation:
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
drm/i915/guc: Define CTB based TLB invalidation routines
drm/i915: Add platform macro for selective tlb flush
drm/i915: Define GuC Based TLB invalidation routines
drm/i915: Add generic interface for tlb invalidation for XeHP
drm/i915: Use selective tlb invalidations where supported
4) It adds the corresponding kernel-doc markups for the kAPI
used for TLB invalidation.
While I could have split this into smaller pieces, I'm opting to send
them altogether, in order for CI trybot to better verify what issues
will be closed with this series.
---
v2:
- no changes. Just rebased on the top of drm-tip: 2022y-07m-14d-08h-35m-36s,
as CI trybot was having troubles applying it. Hopefully, it will now work.
Chris Wilson (7):
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915/gt: Only invalidate TLBs exposed to user manipulation
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
drm/i915/gt: Move TLB invalidation to its own file
drm/i915: Invalidate the TLBs on each GT
Mauro Carvalho Chehab (8):
drm/i915/gt: document with_intel_gt_pm_if_awake()
drm/i915/gt: describe the new tlb parameter at i915_vma_resource
drm/i915/guc: use kernel-doc for enum intel_guc_tlb_inval_mode
drm/i915/guc: document the TLB invalidation struct members
drm/i915: document tlb field at struct drm_i915_gem_object
drm/i915/gt: document TLB cache invalidation functions
drm/i915/guc: describe enum intel_guc_tlb_invalidation_type
drm/i915/guc: document TLB cache invalidation functions
Piotr Piórkowski (1):
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
Prathap Kumar Valsan (5):
drm/i915/guc: Define CTB based TLB invalidation routines
drm/i915: Add platform macro for selective tlb flush
drm/i915: Define GuC Based TLB invalidation routines
drm/i915: Add generic interface for tlb invalidation for XeHP
drm/i915: Use selective tlb invalidations where supported
drivers/gpu/drm/i915/Makefile | 1 +
.../gpu/drm/i915/gem/i915_gem_object_types.h | 6 +-
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 28 +-
drivers/gpu/drm/i915/gt/intel_engine.h | 1 +
drivers/gpu/drm/i915/gt/intel_gt.c | 125 +-------
drivers/gpu/drm/i915/gt/intel_gt.h | 2 -
.../gpu/drm/i915/gt/intel_gt_buffer_pool.h | 3 +-
drivers/gpu/drm/i915/gt/intel_gt_defines.h | 11 +
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 10 +
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 +
drivers/gpu/drm/i915/gt/intel_gt_types.h | 22 +-
drivers/gpu/drm/i915/gt/intel_ppgtt.c | 8 +-
drivers/gpu/drm/i915/gt/intel_tlb.c | 295 ++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_tlb.h | 30 ++
.../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 54 ++++
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 232 ++++++++++++++
drivers/gpu/drm/i915/gt/uc/intel_guc.h | 36 +++
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +-
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 9 +
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 +++++-
drivers/gpu/drm/i915/i915_drv.h | 4 +-
drivers/gpu/drm/i915/i915_pci.c | 1 +
drivers/gpu/drm/i915/i915_vma.c | 46 ++-
drivers/gpu/drm/i915/i915_vma.h | 2 +
drivers/gpu/drm/i915/i915_vma_resource.c | 9 +-
drivers/gpu/drm/i915/i915_vma_resource.h | 6 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
27 files changed, 910 insertions(+), 155 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_defines.h
create mode 100644 drivers/gpu/drm/i915/gt/intel_tlb.c
create mode 100644 drivers/gpu/drm/i915/gt/intel_tlb.h
--
2.36.1
On 20/07/2022 08:13, Mauro Carvalho Chehab wrote:
> On Mon, 18 Jul 2022 14:52:05 +0100
> Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com> wrote:
>
>>
>> On 14/07/2022 13:06, Mauro Carvalho Chehab wrote:
>>> From: Chris Wilson <chris.p.wilson(a)intel.com>
>>>
>>> Invalidate TLB in patch, in order to reduce performance regressions.
>>
>> "in batches"?
>
> Yeah. Will fix it.
>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
>>> index d8b94d638559..2da6c82a8bd2 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
>>> @@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
>>> void ppgtt_unbind_vma(struct i915_address_space *vm,
>>> struct i915_vma_resource *vma_res)
>>> {
>>> - if (vma_res->allocated)
>>> - vm->clear_range(vm, vma_res->start, vma_res->vma_size);
>>> + if (!vma_res->allocated)
>>> + return;
>>> +
>>> + vm->clear_range(vm, vma_res->start, vma_res->vma_size);
>>> + if (vma_res->tlb)
>>> + vma_invalidate_tlb(vm, *vma_res->tlb);
>>
>> The patch is about more than batching? If there is a security hole in
>> this area (unbind) with the current code?
>
> No, I don't think there's a security hole. The rationale for this is
> not due to it.
In this case obvious question is why are these changes in the patch
which declares itself to be about batching invalidations? Because...
> Since commit 2f6b90da9192 ("drm/i915: Use vma resources for async unbinding"),
> VMA unbind can happen either sync or async.
>
> So, the logic needs to do TLB invalidate on two places. After this
> patch, the code at __i915_vma_evict is:
>
> struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
> {
> ...
> if (async)
> unbind_fence = i915_vma_resource_unbind(vma_res,
> &vma->obj->mm.tlb);
> else
> unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
>
> vma->resource = NULL;
>
> atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
> &vma->flags);
>
> i915_vma_detach(vma);
>
> if (!async) {
> if (unbind_fence) {
> dma_fence_wait(unbind_fence, false);
> dma_fence_put(unbind_fence);
> unbind_fence = NULL;
> }
> vma_invalidate_tlb(vma->vm, vma->obj->mm.tlb);
> }
> ...
>
> So, basically, if !async, __i915_vma_evict() will do TLB cache invalidation.
>
> However, when async is used, the actual page release will happen later,
> at this function:
>
> void ppgtt_unbind_vma(struct i915_address_space *vm,
> struct i915_vma_resource *vma_res)
> {
> if (!vma_res->allocated)
> return;
>
> vm->clear_range(vm, vma_res->start, vma_res->vma_size);
> if (vma_res->tlb)
> vma_invalidate_tlb(vm, *vma_res->tlb);
> }
.. frankly I don't follow since I don't see any page release happening
in here. Just PTE clearing.
I am explaining why it looks to me that the patch is doing two things.
Implementing batching _and_ adding invalidation points at VMA unbind
sites, while so far we had it at backing store release only. Maybe I am
wrong and perhaps I am too slow to pick up on the explanation here.
So if the patch is doing two things please split it up.
I am further confused by the invalidation call site in evict and in
unbind - why there can't be one logical site since the logical sequence
is evict -> unbind.
Regards,
Tvrtko
Hello,
This series moves all drivers to a dynamic dma-buf locking specification.
From now on all dma-buf importers are made responsible for holding
dma-buf's reservation lock around all operations performed over dma-bufs.
This common locking convention allows us to utilize reservation lock more
broadly around kernel without fearing of potential dead locks.
This patchset passes all i915 selftests. It was also tested using VirtIO,
Panfrost, Lima and Tegra drivers. I tested cases of display+GPU,
display+V4L and GPU+V4L dma-buf sharing, which covers majority of kernel
drivers since rest of the drivers share same or similar code paths.
This is a continuation of [1] where Christian König asked to factor out
the dma-buf locking changes into separate series.
[1] https://lore.kernel.org/dri-devel/20220526235040.678984-1-dmitry.osipenko@c…
Dmitry Osipenko (6):
dma-buf: Add _unlocked postfix to function names
drm/gem: Take reservation lock for vmap/vunmap operations
dma-buf: Move all dma-bufs to dynamic locking specification
dma-buf: Acquire wait-wound context on attachment
media: videobuf2: Stop using internal dma-buf lock
dma-buf: Remove internal lock
drivers/dma-buf/dma-buf.c | 198 +++++++++++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +-
drivers/gpu/drm/armada/armada_gem.c | 14 +-
drivers/gpu/drm/drm_client.c | 4 +-
drivers/gpu/drm/drm_gem.c | 28 +++
drivers/gpu/drm/drm_gem_cma_helper.c | 6 +-
drivers/gpu/drm/drm_gem_framebuffer_helper.c | 6 +-
drivers/gpu/drm/drm_gem_shmem_helper.c | 6 +-
drivers/gpu/drm/drm_prime.c | 12 +-
drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 6 +-
drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 20 +-
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 +-
.../drm/i915/gem/selftests/i915_gem_dmabuf.c | 20 +-
drivers/gpu/drm/i915/i915_gem_evict.c | 2 +-
drivers/gpu/drm/i915/i915_gem_ww.c | 26 ++-
drivers/gpu/drm/i915/i915_gem_ww.h | 15 +-
drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 8 +-
drivers/gpu/drm/qxl/qxl_object.c | 17 +-
drivers/gpu/drm/qxl/qxl_prime.c | 4 +-
drivers/gpu/drm/tegra/gem.c | 27 +--
drivers/infiniband/core/umem_dmabuf.c | 11 +-
.../common/videobuf2/videobuf2-dma-contig.c | 26 +--
.../media/common/videobuf2/videobuf2-dma-sg.c | 23 +-
.../common/videobuf2/videobuf2-vmalloc.c | 17 +-
.../platform/nvidia/tegra-vde/dmabuf-cache.c | 12 +-
drivers/misc/fastrpc.c | 12 +-
drivers/xen/gntdev-dmabuf.c | 14 +-
include/drm/drm_gem.h | 3 +
include/linux/dma-buf.h | 49 ++---
32 files changed, 347 insertions(+), 257 deletions(-)
--
2.36.1
Hello,
I found a bug in the most usb driver.
When the driver fails at
mdev->conf = kcalloc(num_endpoints, sizeof(*mdev->conf), GFP_KERNEL);
I got the following warning message:
[ 15.406256] kobject: '(null)' (ffff8881068f8000): is not
initialized, yet kobject_put() is being called.
[ 15.406986] WARNING: CPU: 3 PID: 396 at lib/kobject.c:720
kobject_put+0x6e/0x1c0
[ 15.410120] RIP: 0010:kobject_put+0x6e/0x1c0
[ 15.410470] Code: 01 75 29 4c 89 f8 48 c1 e8 03 80 3c 28 00 74 08
4c 89 ff e8 14 2e 73 ff 49 8b 37 48 c7 c7 c0 fc de 85 4c 89 fa e8 e2
61 21 ff <0f> 0b 49 8d 5f 38 48 89 df be 04 00 00 00 e8 df 2e 73 ff b8
ff ff
[ 15.416529] Call Trace:
[ 15.416896] hdm_probe+0xf3d/0x1090 [most_usb]
Since I'm not familiar with the driver, I ask for your help to solve
the warning.
regards,
Zheyu Ma
to provid --> to provide
Signed-off-by: Flavio Suligoi <f.suligoi(a)asem.it>
---
v2:
- fix typo in subject
drivers/i2c/busses/i2c-imx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index e9e2db68b9fb..78fb1a4274a6 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -66,7 +66,7 @@
/* IMX I2C registers:
* the I2C register offset is different between SoCs,
- * to provid support for all these chips, split the
+ * to provide support for all these chips, split the
* register offset into a fixed base address and a
* variable shift value, then the full register offset
* will be calculated by
--
2.25.1
to provid --> to provide
Signed-off-by: Flavio Suligoi <f.suligoi(a)asem.it>
---
drivers/i2c/busses/i2c-imx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index e9e2db68b9fb..78fb1a4274a6 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -66,7 +66,7 @@
/* IMX I2C registers:
* the I2C register offset is different between SoCs,
- * to provid support for all these chips, split the
+ * to provide support for all these chips, split the
* register offset into a fixed base address and a
* variable shift value, then the full register offset
* will be calculated by
--
2.25.1
TLB invalidation is a slow operation. It should not be doing lightly, as it
causes performance regressions, like this:
[178.821002] i915 0000:00:02.0: [drm] *ERROR* rcs0 TLB invalidation did not complete in 4ms!
This series contain
1) some patches that makes TLB invalidation to happen only on
active, non-wedged engines, doing cache invalidation in batch
and only when GT objects are exposed to userspace:
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Only invalidate TLBs exposed to user manipulation
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
drm/i915/gt: Move TLB invalidation to its own file
2) It fixes two bugs, being the first a workaround:
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915: Invalidate the TLBs on each GT
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
3) It adds GuC support. Besides providing TLB invalidation on some
additional hardware, this should also help serializing GuC operations
with TLB invalidation:
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
drm/i915/guc: Define CTB based TLB invalidation routines
drm/i915: Add platform macro for selective tlb flush
drm/i915: Define GuC Based TLB invalidation routines
drm/i915: Add generic interface for tlb invalidation for XeHP
drm/i915: Use selective tlb invalidations where supported
4) It adds the corresponding kernel-doc markups for the kAPI
used for TLB invalidation.
While I could have split this into smaller pieces, I'm opting to send
them altogether, in order for CI trybot to better verify what issues
will be closed with this series.
---
Chris Wilson (7):
drm/i915/gt: Ignore TLB invalidations on idle engines
drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations
drm/i915/gt: Only invalidate TLBs exposed to user manipulation
drm/i915/gt: Skip TLB invalidations once wedged
drm/i915/gt: Batch TLB invalidations
drm/i915/gt: Move TLB invalidation to its own file
drm/i915: Invalidate the TLBs on each GT
Mauro Carvalho Chehab (8):
drm/i915/gt: document with_intel_gt_pm_if_awake()
drm/i915/gt: describe the new tlb parameter at i915_vma_resource
drm/i915/guc: use kernel-doc for enum intel_guc_tlb_inval_mode
drm/i915/guc: document the TLB invalidation struct members
drm/i915: document tlb field at struct drm_i915_gem_object
drm/i915/gt: document TLB cache invalidation functions
drm/i915/guc: describe enum intel_guc_tlb_invalidation_type
drm/i915/guc: document TLB cache invalidation functions
Piotr Piórkowski (1):
drm/i915/guc: Introduce TLB_INVALIDATION_ALL action
Prathap Kumar Valsan (5):
drm/i915/guc: Define CTB based TLB invalidation routines
drm/i915: Add platform macro for selective tlb flush
drm/i915: Define GuC Based TLB invalidation routines
drm/i915: Add generic interface for tlb invalidation for XeHP
drm/i915: Use selective tlb invalidations where supported
drivers/gpu/drm/i915/Makefile | 1 +
.../gpu/drm/i915/gem/i915_gem_object_types.h | 6 +-
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 28 +-
drivers/gpu/drm/i915/gt/intel_engine.h | 1 +
drivers/gpu/drm/i915/gt/intel_gt.c | 125 +-------
drivers/gpu/drm/i915/gt/intel_gt.h | 2 -
.../gpu/drm/i915/gt/intel_gt_buffer_pool.h | 3 +-
drivers/gpu/drm/i915/gt/intel_gt_defines.h | 11 +
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 10 +
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 +
drivers/gpu/drm/i915/gt/intel_gt_types.h | 22 +-
drivers/gpu/drm/i915/gt/intel_ppgtt.c | 8 +-
drivers/gpu/drm/i915/gt/intel_tlb.c | 295 ++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_tlb.h | 30 ++
.../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 54 ++++
drivers/gpu/drm/i915/gt/uc/intel_guc.c | 232 ++++++++++++++
drivers/gpu/drm/i915/gt/uc/intel_guc.h | 36 +++
drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 24 +-
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 9 +
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 +++++-
drivers/gpu/drm/i915/i915_drv.h | 4 +-
drivers/gpu/drm/i915/i915_pci.c | 1 +
drivers/gpu/drm/i915/i915_vma.c | 46 ++-
drivers/gpu/drm/i915/i915_vma.h | 2 +
drivers/gpu/drm/i915/i915_vma_resource.c | 9 +-
drivers/gpu/drm/i915/i915_vma_resource.h | 6 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
27 files changed, 910 insertions(+), 155 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_defines.h
create mode 100644 drivers/gpu/drm/i915/gt/intel_tlb.c
create mode 100644 drivers/gpu/drm/i915/gt/intel_tlb.h
--
2.36.1
This series fix almost all fixable issues when building the html docs at
linux-next (next-20220608):
- Address some broken cross-references;
- Fix kernel-doc warnings;
- Fix bad tags on ReST files.
With this series applied, plus other pending patches that should hopefully
be merged in time for the next merge window, htmldocs build will produce
just 4 warnings with Sphinx 2.4.4.
Sphinx >=3 will produce some extra false-positive warnings due to conflicts
between structs and functions sharing the same name. Hopefully this will
be fixed either on a new Sphinx 5.x version or Sphinx 6.0.
Mauro Carvalho Chehab (21):
docs: networking: update netdevices.rst reference
docs: update vmalloced-kernel-stacks.rst reference
docs: update vmemmap_dedup.rst reference
docs: zh_CN: page_migration: fix reference to mm index.rst
dt-bindings: arm: update arm,coresight-cpu-debug.yaml reference
x86/sgx: fix kernel-doc markups
fscache: fix kernel-doc documentation
fs: namei: address some kernel-doc issues
drm/scheduler: fix a kernel-doc warning
drm/scheduler: add a missing kernel-doc parameter
kfence: fix a kernel-doc parameter
genalloc: add a description for start_addr parameter
textsearch: document list inside struct ts_ops
dcache: fix a kernel-doc warning
docs: ext4: blockmap.rst: fix a broken table
docs: PCI: pci-vntb-function.rst: Properly include ascii artwork
docs: PCI: pci-vntb-howto.rst: fix a title markup
docs: virt: kvm: fix a title markup at api.rst
docs: ABI: sysfs-bus-nvdimm
docs: leds: index.rst: add leds-qcom-lpg to it
Documentation: coresight: fix binding wildcards
Documentation/ABI/testing/sysfs-bus-nvdimm | 2 ++
Documentation/PCI/endpoint/pci-vntb-function.rst | 2 +-
Documentation/PCI/endpoint/pci-vntb-howto.rst | 2 +-
Documentation/filesystems/ext4/blockmap.rst | 2 +-
Documentation/leds/index.rst | 1 +
Documentation/trace/coresight/coresight-cpu-debug.rst | 2 +-
Documentation/trace/coresight/coresight.rst | 2 +-
Documentation/translations/zh_CN/mm/page_migration.rst | 2 +-
.../translations/zh_CN/mm/vmalloced-kernel-stacks.rst | 2 +-
Documentation/virt/kvm/api.rst | 6 +++---
arch/x86/include/uapi/asm/sgx.h | 10 ++++++++--
drivers/gpu/drm/scheduler/sched_main.c | 1 +
drivers/net/can/can327.c | 2 +-
fs/namei.c | 3 +++
include/drm/gpu_scheduler.h | 1 +
include/linux/dcache.h | 2 +-
include/linux/fscache.h | 4 ++--
include/linux/genalloc.h | 1 +
include/linux/kfence.h | 1 +
include/linux/textsearch.h | 1 +
mm/hugetlb_vmemmap.h | 2 +-
21 files changed, 34 insertions(+), 17 deletions(-)
--
2.36.1
Hello,
This patchset introduces memory shrinker for the VirtIO-GPU DRM driver
and adds memory purging and eviction support to VirtIO-GPU driver.
The new dma-buf locking convention is introduced here as well.
During OOM, the shrinker will release BOs that are marked as "not needed"
by userspace using the new madvise IOCTL, it will also evict idling BOs
to SWAP. The userspace in this case is the Mesa VirGL driver, it will mark
the cached BOs as "not needed", allowing kernel driver to release memory
of the cached shmem BOs on lowmem situations, preventing OOM kills.
The Panfrost driver is switched to use generic memory shrinker.
This patchset includes improvements and fixes for various things that
I found while was working on the shrinker.
The Mesa and IGT patches will be kept on hold until this kernel series
will be approved and merged.
This patchset was tested using Qemu and crosvm, including both cases of
IOMMU off/on.
Mesa: https://gitlab.freedesktop.org/digetx/mesa/-/commits/virgl-madvise
IGT: https://gitlab.freedesktop.org/digetx/igt-gpu-tools/-/commits/virtio-madvisehttps://gitlab.freedesktop.org/digetx/igt-gpu-tools/-/commits/panfrost-madv…
Changelog:
v6: - Added new VirtIO-related fix patch that previously was sent separately
and didn't get much attention:
drm/gem: Properly annotate WW context on drm_gem_lock_reservations() error
- Added new patch that fixes mapping of imported dma-bufs for
Tegra DRM and other affected drivers. It's also handy to have it
for switching to the new dma-buf locking convention scheme:
drm/gem: Move mapping of imported dma-bufs to drm_gem_mmap_obj()
- Added new patch that fixes shrinker list corruption for stable Panfrost
driver:
drm/panfrost: Fix shrinker list corruption by madvise IOCTL
- Added new minor patch-fix for drm-shmem:
drm/shmem-helper: Add missing vunmap on error
- Added fixes tag to the "Put mapping ..." patch like was suggested by
Steven Price.
- Added new VirtIO-GPU driver improvement patch:
drm/virtio: Return proper error codes instead of -1
- Reworked shrinker patches like was suggested by Daniel Vetter:
- Introduced the new locking convention for dma-bufs. Tested on
VirtIO-GPU, Panfrost, Lima, Tegra and Intel selftests.
- Dropped separate purge() callback. Now single evict() does
everything.
- Dropped swap_in() callback from drm-shmem objects. DRM drivers
now could and should restore only the required mappings.
- Dropped dynamic counting of evictable pages. This simplifies
code in exchange to *potentially* burning more CPU time on OOM.
v5: - Added new for-stable patch "drm/panfrost: Put mapping instead of
shmem obj on panfrost_mmu_map_fault_addr() error" that corrects GEM's
refcounting in case of error.
- The drm_gem_shmem_v[un]map() now takes a separate vmap_lock for
imported GEMs to avoid recursive locking of DMA reservations.
This addresses v4 comment from Thomas Zimmermann about the potential
deadlocking of vmapping.
- Added ack from Thomas Zimmermann to "drm/shmem-helper: Correct
doc-comment of drm_gem_shmem_get_sg_table()" patch.
- Dropped explicit shmem states from the generic shrinker patch as
was requested by Thomas Zimmermann.
- Improved variable names and comments of the generic shrinker code.
- Extended drm_gem_shmem_print_info() with the shrinker-state info in
the "drm/virtio: Support memory shrinking" patch.
- Moved evict()/swap_in()/purge() callbacks from drm_gem_object_funcs
to drm_gem_shmem_object in the generic shrinker patch, for more
consistency.
- Corrected bisectability of the patches that was broken in v4
by accident.
- The virtio_gpu_plane_prepare_fb() now uses drm_gem_shmem_pin() instead
of drm_gem_shmem_set_unpurgeable_and_unevictable() and does it only for
shmem BOs in the "drm/virtio: Support memory shrinking" patch.
- Made more functions private to drm_gem_shmem_helper.c as was requested
by Thomas Zimmermann. This minimizes number of the public shmem helpers.
v4: - Corrected minor W=1 warnings reported by kernel test robot for v3.
- Renamed DRM_GEM_SHMEM_PAGES_STATE_ACTIVE/INACTIVE to PINNED/UNPINNED,
for more clarity.
v3: - Hardened shrinker's count() with usage of READ_ONCE() since we don't
use atomic type for counting and technically compiler is free to
re-fetch counter's variable.
- "Correct drm_gem_shmem_get_sg_table() error handling" now uses
PTR_ERR_OR_ZERO(), fixing typo that was made in v2.
- Removed obsoleted shrinker from the Panfrost driver, which I missed to
do in v2 by accident and Alyssa Rosenzweig managed to notice it.
- CCed stable kernels in all patches that make fixes, even the minor ones,
like was suggested by Emil Velikov and added his r-b to the patches.
- Added t-b from Steven Price to the Panfrost's shrinker patch.
- Corrected doc-comment of drm_gem_shmem_object.madv, like was suggested
by Steven Price. Comment now says that madv=1 means "object is purged"
instead of saying that value is unused.
- Added more doc-comments to the new shmem shrinker API.
- The "Improve DMA API usage for shmem BOs" patch got more improvements
by removing the obsoleted drm_dev_set_unique() quirk and its comment.
- Added patch that makes Virtio-GPU driver to use common dev_is_pci()
helper, which was suggested by Robin Murphy.
- Added new "drm/shmem-helper: Take GEM reservation lock instead of
drm_gem_shmem locks" patch, which was suggested by Daniel Vetter.
- Added new "drm/virtio: Simplify error handling of
virtio_gpu_object_create()" patch.
- Improved "Correct doc-comment of drm_gem_shmem_get_sg_table()" patch,
like was suggested by Daniel Vetter, by saying that function returns
ERR_PTR() and not errno.
- virtio_gpu_purge_object() is fenced properly now, turned out
virtio_gpu_notify() doesn't do fencing as I was supposing before.
Stress testing of memory eviction revealed that.
- Added new patch that corrects virtio_gpu_plane_cleanup_fb() to use
appropriate atomic plane state.
- SHMEM shrinker got eviction support.
- VirtIO-GPU driver now supports memory eviction. It's enabled for a
non-blob GEMs only, i.e. for VirGL. The blobs don't support dynamic
attaching/detaching of guest's memory, so it's not trivial to enable
them.
- Added patch that removes obsoleted drm_gem_shmem_purge()
- Added patch that makes drm_gem_shmem_get_pages() private.
- Added patch that fixes lockup on dma_resv_reserve_fences() error.
v2: - Improved shrinker by using a more fine-grained locking to reduce
contention during scan of objects and dropped locking from the
'counting' callback by tracking count of shrinkable pages. This
was suggested by Rob Clark in the comment to v1.
- Factored out common shrinker code into drm_gem_shmem_helper.c
and switched Panfrost driver to use the new common memory shrinker.
This was proposed by Thomas Zimmermann in his prototype series that
he shared with us in the comment to v1. Note that I only compile-tested
the Panfrost driver.
- Shrinker now takes object_name_lock during scan to prevent racing
with dma-buf exporting.
- Shrinker now takes vmap_lock during scan to prevent racing with shmem
vmap/unmap code.
- Added "Correct doc-comment of drm_gem_shmem_get_sg_table()" patch,
which I sent out previously as a standalone change, since the
drm_gem_shmem_helper.c is now touched by this patchset anyways and
it doesn't hurt to group all the patches together.
Dmitry Osipenko (22):
drm/gem: Properly annotate WW context on drm_gem_lock_reservations()
error
drm/gem: Move mapping of imported dma-bufs to drm_gem_mmap_obj()
drm/panfrost: Put mapping instead of shmem obj on
panfrost_mmu_map_fault_addr() error
drm/panfrost: Fix shrinker list corruption by madvise IOCTL
drm/virtio: Correct drm_gem_shmem_get_sg_table() error handling
drm/virtio: Check whether transferred 2D BO is shmem
drm/virtio: Unlock reservations on virtio_gpu_object_shmem_init()
error
drm/virtio: Unlock reservations on dma_resv_reserve_fences() error
drm/virtio: Use appropriate atomic state in
virtio_gpu_plane_cleanup_fb()
drm/shmem-helper: Add missing vunmap on error
drm/shmem-helper: Correct doc-comment of drm_gem_shmem_get_sg_table()
drm/virtio: Simplify error handling of virtio_gpu_object_create()
drm/virtio: Improve DMA API usage for shmem BOs
dma-buf: Introduce new locking convention
drm/shmem-helper: Don't use vmap_use_count for dma-bufs
drm/shmem-helper: Use reservation lock
drm/shmem-helper: Add generic memory shrinker
drm/gem: Add drm_gem_pin_unlocked()
drm/virtio: Support memory shrinking
drm/virtio: Use dev_is_pci()
drm/virtio: Return proper error codes instead of -1
drm/panfrost: Switch to generic memory shrinker
drivers/dma-buf/dma-buf.c | 270 ++++---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +-
drivers/gpu/drm/drm_client.c | 4 +-
drivers/gpu/drm/drm_gem.c | 69 +-
drivers/gpu/drm/drm_gem_framebuffer_helper.c | 6 +-
drivers/gpu/drm/drm_gem_shmem_helper.c | 718 ++++++++++++++----
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 10 +-
drivers/gpu/drm/lima/lima_gem.c | 8 +-
drivers/gpu/drm/lima/lima_sched.c | 4 +-
drivers/gpu/drm/panfrost/Makefile | 1 -
drivers/gpu/drm/panfrost/panfrost_device.h | 4 -
drivers/gpu/drm/panfrost/panfrost_drv.c | 26 +-
drivers/gpu/drm/panfrost/panfrost_gem.c | 33 +-
drivers/gpu/drm/panfrost/panfrost_gem.h | 9 -
.../gpu/drm/panfrost/panfrost_gem_shrinker.c | 122 ---
drivers/gpu/drm/panfrost/panfrost_job.c | 18 +-
drivers/gpu/drm/panfrost/panfrost_mmu.c | 21 +-
drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 6 +-
drivers/gpu/drm/qxl/qxl_object.c | 17 +-
drivers/gpu/drm/qxl/qxl_prime.c | 4 +-
drivers/gpu/drm/tegra/gem.c | 4 +
drivers/gpu/drm/virtio/virtgpu_drv.c | 53 +-
drivers/gpu/drm/virtio/virtgpu_drv.h | 23 +-
drivers/gpu/drm/virtio/virtgpu_gem.c | 59 +-
drivers/gpu/drm/virtio/virtgpu_ioctl.c | 37 +
drivers/gpu/drm/virtio/virtgpu_kms.c | 16 +-
drivers/gpu/drm/virtio/virtgpu_object.c | 203 +++--
drivers/gpu/drm/virtio/virtgpu_plane.c | 28 +-
drivers/gpu/drm/virtio/virtgpu_vq.c | 61 +-
.../common/videobuf2/videobuf2-dma-contig.c | 11 +-
.../media/common/videobuf2/videobuf2-dma-sg.c | 11 +-
.../common/videobuf2/videobuf2-vmalloc.c | 11 +-
include/drm/drm_device.h | 4 +
include/drm/drm_gem.h | 6 +
include/drm/drm_gem_shmem_helper.h | 99 ++-
include/linux/dma-buf.h | 14 +-
include/uapi/drm/virtgpu_drm.h | 14 +
37 files changed, 1349 insertions(+), 661 deletions(-)
delete mode 100644 drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
--
2.35.3