From: Thomas Zimmermann <tzimmermann(a)suse.de>
[ Upstream commit b57aa47d39e94dc47403a745e2024664e544078c ]
Add drm_gem_is_imported() that tests if a GEM object's buffer has
been imported. Update the GEM code accordingly.
GEM code usually tests for imports if import_attach has been set
in struct drm_gem_object. But attaching a dma-buf on import requires
a DMA-capable importer device, which is not the case for many serial
busses like USB or I2C. The new helper tests if a GEM object's dma-buf
has been created from the GEM object.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Reviewed-by: Anusha Srivatsa <asrivats(a)redhat.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250226172457.217725-2-tzimm…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/drm_gem.c | 4 ++--
include/drm/drm_gem.h | 14 ++++++++++++++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 44a948b80ee14..deb93f78ce344 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -322,7 +322,7 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
return -ENOENT;
/* Don't allow imported objects to be mapped */
- if (obj->import_attach) {
+ if (drm_gem_is_imported(obj)) {
ret = -EINVAL;
goto out;
}
@@ -1155,7 +1155,7 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
drm_vma_node_start(&obj->vma_node));
drm_printf_indent(p, indent, "size=%zu\n", obj->size);
drm_printf_indent(p, indent, "imported=%s\n",
- str_yes_no(obj->import_attach));
+ str_yes_no(drm_gem_is_imported(obj)));
if (obj->funcs->print_info)
obj->funcs->print_info(p, indent, obj);
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 7c2ec139c464a..fbfccb96dd17b 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -35,6 +35,7 @@
*/
#include <linux/kref.h>
+#include <linux/dma-buf.h>
#include <linux/dma-resv.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -557,6 +558,19 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje
return (obj->handle_count > 1) || obj->dma_buf;
}
+/**
+ * drm_gem_is_imported() - Tests if GEM object's buffer has been imported
+ * @obj: the GEM object
+ *
+ * Returns:
+ * True if the GEM object's buffer has been imported, false otherwise
+ */
+static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
+{
+ /* The dma-buf's priv field points to the original GEM object. */
+ return obj->dma_buf && (obj->dma_buf->priv != obj);
+}
+
#ifdef CONFIG_LOCKDEP
/**
* drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
--
2.39.5
From: Thomas Zimmermann <tzimmermann(a)suse.de>
[ Upstream commit b57aa47d39e94dc47403a745e2024664e544078c ]
Add drm_gem_is_imported() that tests if a GEM object's buffer has
been imported. Update the GEM code accordingly.
GEM code usually tests for imports if import_attach has been set
in struct drm_gem_object. But attaching a dma-buf on import requires
a DMA-capable importer device, which is not the case for many serial
busses like USB or I2C. The new helper tests if a GEM object's dma-buf
has been created from the GEM object.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Reviewed-by: Anusha Srivatsa <asrivats(a)redhat.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250226172457.217725-2-tzimm…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/drm_gem.c | 4 ++--
include/drm/drm_gem.h | 14 ++++++++++++++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 149b8e25da5bb..426d0867882df 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -322,7 +322,7 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
return -ENOENT;
/* Don't allow imported objects to be mapped */
- if (obj->import_attach) {
+ if (drm_gem_is_imported(obj)) {
ret = -EINVAL;
goto out;
}
@@ -1152,7 +1152,7 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
drm_vma_node_start(&obj->vma_node));
drm_printf_indent(p, indent, "size=%zu\n", obj->size);
drm_printf_indent(p, indent, "imported=%s\n",
- str_yes_no(obj->import_attach));
+ str_yes_no(drm_gem_is_imported(obj)));
if (obj->funcs->print_info)
obj->funcs->print_info(p, indent, obj);
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index d8b86df2ec0da..70c0f8c83629d 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -35,6 +35,7 @@
*/
#include <linux/kref.h>
+#include <linux/dma-buf.h>
#include <linux/dma-resv.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -570,6 +571,19 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje
return (obj->handle_count > 1) || obj->dma_buf;
}
+/**
+ * drm_gem_is_imported() - Tests if GEM object's buffer has been imported
+ * @obj: the GEM object
+ *
+ * Returns:
+ * True if the GEM object's buffer has been imported, false otherwise
+ */
+static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
+{
+ /* The dma-buf's priv field points to the original GEM object. */
+ return obj->dma_buf && (obj->dma_buf->priv != obj);
+}
+
#ifdef CONFIG_LOCKDEP
/**
* drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
--
2.39.5
From: Thomas Zimmermann <tzimmermann(a)suse.de>
[ Upstream commit 8260731ccad0451207b45844bb66eb161a209218 ]
Test struct drm_gem_object.import_attach to detect imported objects.
During object clenanup, the dma_buf field might be NULL. Testing it in
an object's free callback then incorrectly does a cleanup as for native
objects. Happens for calls to drm_mode_destroy_dumb_ioctl() that
clears the dma_buf field in drm_gem_object_exported_dma_buf_free().
v3:
- only test for import_attach (Boris)
v2:
- use import_attach.dmabuf instead of dma_buf (Christian)
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: b57aa47d39e9 ("drm/gem: Test for imported GEM buffers with helper")
Reported-by: Andy Yan <andyshrk(a)163.com>
Closes: https://lore.kernel.org/dri-devel/38d09d34.4354.196379aa560.Coremail.andysh…
Tested-by: Andy Yan <andyshrk(a)163.com>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Anusha Srivatsa <asrivats(a)redhat.com>
Cc: Christian König <christian.koenig(a)amd.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Simona Vetter <simona(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Reviewed-by: Boris Brezillon <boris.brezillon(a)collabora.com>
Reviewed-by: Simona Vetter <simona.vetter(a)ffwll.ch>
Link: https://lore.kernel.org/r/20250416065820.26076-1-tzimmermann@suse.de
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
include/drm/drm_gem.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 2bf893eabb4b2..bcd54020d6ba5 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -585,8 +585,7 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje
*/
static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
{
- /* The dma-buf's priv field points to the original GEM object. */
- return obj->dma_buf && (obj->dma_buf->priv != obj);
+ return !!obj->import_attach;
}
#ifdef CONFIG_LOCKDEP
--
2.39.5
From: Thomas Zimmermann <tzimmermann(a)suse.de>
[ Upstream commit b57aa47d39e94dc47403a745e2024664e544078c ]
Add drm_gem_is_imported() that tests if a GEM object's buffer has
been imported. Update the GEM code accordingly.
GEM code usually tests for imports if import_attach has been set
in struct drm_gem_object. But attaching a dma-buf on import requires
a DMA-capable importer device, which is not the case for many serial
busses like USB or I2C. The new helper tests if a GEM object's dma-buf
has been created from the GEM object.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Reviewed-by: Anusha Srivatsa <asrivats(a)redhat.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250226172457.217725-2-tzimm…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/drm_gem.c | 4 ++--
include/drm/drm_gem.h | 14 ++++++++++++++
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ee811764c3df4..c6240bab3fa55 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -348,7 +348,7 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
return -ENOENT;
/* Don't allow imported objects to be mapped */
- if (obj->import_attach) {
+ if (drm_gem_is_imported(obj)) {
ret = -EINVAL;
goto out;
}
@@ -1178,7 +1178,7 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
drm_vma_node_start(&obj->vma_node));
drm_printf_indent(p, indent, "size=%zu\n", obj->size);
drm_printf_indent(p, indent, "imported=%s\n",
- str_yes_no(obj->import_attach));
+ str_yes_no(drm_gem_is_imported(obj)));
if (obj->funcs->print_info)
obj->funcs->print_info(p, indent, obj);
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index fdae947682cd0..2bf893eabb4b2 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -35,6 +35,7 @@
*/
#include <linux/kref.h>
+#include <linux/dma-buf.h>
#include <linux/dma-resv.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -575,6 +576,19 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje
return (obj->handle_count > 1) || obj->dma_buf;
}
+/**
+ * drm_gem_is_imported() - Tests if GEM object's buffer has been imported
+ * @obj: the GEM object
+ *
+ * Returns:
+ * True if the GEM object's buffer has been imported, false otherwise
+ */
+static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
+{
+ /* The dma-buf's priv field points to the original GEM object. */
+ return obj->dma_buf && (obj->dma_buf->priv != obj);
+}
+
#ifdef CONFIG_LOCKDEP
/**
* drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
--
2.39.5
From: Christian König <christian.koenig(a)amd.com>
[ Upstream commit bd22e44ad415ac22e3a4f9a983d2a085f6cb4427 ]
Limiting the number of available VMIDs to enforce isolation causes some
issues with gang submit and applying certain HW workarounds which
require multiple VMIDs to work correctly.
So instead start to track all submissions to the relevant engines in a
per partition data structure and use the dma_fences of the submissions
to enforce isolation similar to what a VMID limit does.
v2: use ~0l for jobs without isolation to distinct it from kernel
submissions which uses NULL for the owner. Add some warning when we
are OOM.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 98 +++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 43 ++++------
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 16 +++-
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 19 +++++
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 1 +
6 files changed, 155 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 98f0c12df12bc..9a61f5fe3245a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1187,9 +1187,15 @@ struct amdgpu_device {
bool debug_enable_ras_aca;
bool debug_exp_resets;
- bool enforce_isolation[MAX_XCP];
- /* Added this mutex for cleaner shader isolation between GFX and compute processes */
+ /* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;
+ bool enforce_isolation[MAX_XCP];
+ struct amdgpu_isolation {
+ void *owner;
+ struct dma_fence *spearhead;
+ struct amdgpu_sync active;
+ struct amdgpu_sync prev;
+ } isolation[MAX_XCP];
struct amdgpu_init_level *init_lvl;
};
@@ -1470,6 +1476,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 71e8a76180ad6..e298b48488c22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4232,6 +4232,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.reset_sem_mutex);
/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
mutex_init(&adev->enforce_isolation_mutex);
+ for (i = 0; i < MAX_XCP; ++i) {
+ adev->isolation[i].spearhead = dma_fence_get_stub();
+ amdgpu_sync_create(&adev->isolation[i].active);
+ amdgpu_sync_create(&adev->isolation[i].prev);
+ }
mutex_init(&adev->gfx.kfd_sch_mutex);
amdgpu_device_init_apu_flags(adev);
@@ -4731,7 +4736,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{
- int idx;
+ int i, idx;
bool px;
amdgpu_device_ip_fini(adev);
@@ -4739,6 +4744,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false;
dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
+ for (i = 0; i < MAX_XCP; ++i) {
+ dma_fence_put(adev->isolation[i].spearhead);
+ amdgpu_sync_free(&adev->isolation[i].active);
+ amdgpu_sync_free(&adev->isolation[i].prev);
+ }
amdgpu_reset_fini(adev);
@@ -6875,6 +6885,92 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
return NULL;
}
+/**
+ * amdgpu_device_enforce_isolation - enforce HW isolation
+ * @adev: the amdgpu device pointer
+ * @ring: the HW ring the job is supposed to run on
+ * @job: the job which is about to be pushed to the HW ring
+ *
+ * Makes sure that only one client at a time can use the GFX block.
+ * Returns: The dependency to wait on before the job can be pushed to the HW.
+ * The function is called multiple times until NULL is returned.
+ */
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+ struct drm_sched_fence *f = job->base.s_fence;
+ struct dma_fence *dep;
+ void *owner;
+ int r;
+
+ /*
+ * For now enforce isolation only for the GFX block since we only need
+ * the cleaner shader on those rings.
+ */
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
+ ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return NULL;
+
+ /*
+ * All submissions where enforce isolation is false are handled as if
+ * they come from a single client. Use ~0l as the owner to distinct it
+ * from kernel submissions where the owner is NULL.
+ */
+ owner = job->enforce_isolation ? f->owner : (void *)~0l;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+
+ /*
+ * The "spearhead" submission is the first one which changes the
+ * ownership to its client. We always need to wait for it to be
+ * pushed to the HW before proceeding with anything.
+ */
+ if (&f->scheduled != isolation->spearhead &&
+ !dma_fence_is_signaled(isolation->spearhead)) {
+ dep = isolation->spearhead;
+ goto out_grab_ref;
+ }
+
+ if (isolation->owner != owner) {
+
+ /*
+ * Wait for any gang to be assembled before switching to a
+ * different owner or otherwise we could deadlock the
+ * submissions.
+ */
+ if (!job->gang_submit) {
+ dep = amdgpu_device_get_gang(adev);
+ if (!dma_fence_is_signaled(dep))
+ goto out_return_dep;
+ dma_fence_put(dep);
+ }
+
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(&f->scheduled);
+ amdgpu_sync_move(&isolation->active, &isolation->prev);
+ isolation->owner = owner;
+ }
+
+ /*
+ * Specifying the ring here helps to pipeline submissions even when
+ * isolation is enabled. If that is not desired for testing NULL can be
+ * used instead of the ring to enforce a CPU round trip while switching
+ * between clients.
+ */
+ dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
+ r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
+ if (r)
+ DRM_WARN("OOM tracking isolation\n");
+
+out_grab_ref:
+ dma_fence_get(dep);
+out_return_dep:
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ return dep;
+}
+
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 8e712a11aba5d..9008b7388e897 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -287,40 +287,27 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
(*id)->flushed_updates < updates ||
!(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
- !dma_fence_is_signaled((*id)->last_flush))) {
+ !dma_fence_is_signaled((*id)->last_flush)))
+ needs_flush = true;
+
+ if ((*id)->owner != vm->immediate.fence_context ||
+ (!adev->vm_manager.concurrent_flush && needs_flush)) {
struct dma_fence *tmp;
- /* Wait for the gang to be assembled before using a
- * reserved VMID or otherwise the gang could deadlock.
+ /* Don't use per engine and per process VMID at the
+ * same time
*/
- tmp = amdgpu_device_get_gang(adev);
- if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) {
+ if (adev->vm_manager.concurrent_flush)
+ ring = NULL;
+
+ /* to prevent one context starved by another context */
+ (*id)->pd_gpu_addr = 0;
+ tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+ if (tmp) {
*id = NULL;
- *fence = tmp;
+ *fence = dma_fence_get(tmp);
return 0;
}
- dma_fence_put(tmp);
-
- /* Make sure the id is owned by the gang before proceeding */
- if (!job->gang_submit ||
- (*id)->owner != vm->immediate.fence_context) {
-
- /* Don't use per engine and per process VMID at the
- * same time
- */
- if (adev->vm_manager.concurrent_flush)
- ring = NULL;
-
- /* to prevent one context starved by another context */
- (*id)->pd_gpu_addr = 0;
- tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
- if (tmp) {
- *id = NULL;
- *fence = dma_fence_get(tmp);
- return 0;
- }
- }
- needs_flush = true;
}
/* Good we can use this VMID. Remember this submission as
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 100f044759435..685c61a05af85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -342,17 +342,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
int r;
r = drm_sched_entity_error(s_entity);
if (r)
goto error;
- if (job->gang_submit)
+ if (job->gang_submit) {
fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+ if (fence)
+ return fence;
+ }
+
+ fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
+ if (fence)
+ return fence;
- if (!fence && job->vm && !job->vmid) {
+ if (job->vm && !job->vmid) {
r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
if (r) {
dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
@@ -365,9 +372,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
*/
if (!fence)
job->vm = NULL;
+ return fence;
}
- return fence;
+ return NULL;
error:
dma_fence_set_error(&job->base.s_fence->finished, r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c586ab4c911bf..d75715b3f1870 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -399,6 +399,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
return 0;
}
+/**
+ * amdgpu_sync_move - move all fences from src to dst
+ *
+ * @src: source of the fences, empty after function
+ * @dst: destination for the fences
+ *
+ * Moves all fences from source to destination. All fences in destination are
+ * freed and source is empty after the function call.
+ */
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
+{
+ unsigned int i;
+
+ amdgpu_sync_free(dst);
+
+ for (i = 0; i < HASH_SIZE(src->fences); ++i)
+ hlist_move_list(&src->fences[i], &dst->fences[i]);
+}
+
/**
* amdgpu_sync_push_to_job - push fences into job
* @sync: sync object to get the fences from
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index e3272dce798d7..a91a8eaf808b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -56,6 +56,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
--
2.39.5
Until CONFIG_DMABUF_SYSFS_STATS was added [1] it was only possible to
perform per-buffer accounting with debugfs which is not suitable for
production environments. Eventually we discovered the overhead with
per-buffer sysfs file creation/removal was significantly impacting
allocation and free times, and exacerbated kernfs lock contention. [2]
dma_buf_stats_setup() is responsible for 39% of single-page buffer
creation duration, or 74% of single-page dma_buf_export() duration when
stressing dmabuf allocations and frees.
I prototyped a change from per-buffer to per-exporter statistics with a
RCU protected list of exporter allocations that accommodates most (but
not all) of our use-cases and avoids almost all of the sysfs overhead.
While that adds less overhead than per-buffer sysfs, and less even than
the maintenance of the dmabuf debugfs_list, it's still *additional*
overhead on top of the debugfs_list and doesn't give us per-buffer info.
This series uses the existing dmabuf debugfs_list to implement a BPF
dmabuf iterator, which adds no overhead to buffer allocation/free and
provides per-buffer info. The list has been moved outside of
CONFIG_DEBUG_FS scope so that it is always populated. The BPF program
loaded by userspace that extracts per-buffer information gets to define
its own interface which avoids the lack of ABI stability with debugfs.
As this is a replacement for our use of CONFIG_DMABUF_SYSFS_STATS, the
last patch is a RFC for removing it from the kernel. Please see my
suggestion there regarding the timeline for that.
[1] https://lore.kernel.org/linux-media/20201210044400.1080308-1-hridya@google.…
[2] https://lore.kernel.org/all/20220516171315.2400578-1-tjmercier@google.com
v1: https://lore.kernel.org/all/20250414225227.3642618-1-tjmercier@google.com
v1 -> v2:
Make the DMA buffer list independent of CONFIG_DEBUG_FS per Christian König
Add CONFIG_DMA_SHARED_BUFFER check to kernel/bpf/Makefile per kernel test robot
Use BTF_ID_LIST_SINGLE instead of BTF_ID_LIST_GLOBAL_SINGLE per Song Liu
Fixup comment style, mixing code/declarations, and use ASSERT_OK_FD in selftest per Song Liu
Add BPF_ITER_RESCHED feature to bpf_dmabuf_reg_info per Alexei Starovoitov
Add open-coded iterator and selftest per Alexei Starovoitov
Add a second test buffer from the system dmabuf heap to selftests
Use the BPF program we'll use in production for selftest per Alexei Starovoitov
https://r.android.com/c/platform/system/bpfprogs/+/3616123/2/dmabufIter.chttps://r.android.com/c/platform/system/memory/libmeminfo/+/3614259/1/libdm…
T.J. Mercier (6):
dma-buf: Rename and expose debugfs symbols
bpf: Add dmabuf iterator
bpf: Add open coded dmabuf iterator
selftests/bpf: Add test for dmabuf_iter
selftests/bpf: Add test for open coded dmabuf_iter
RFC: dma-buf: Remove DMA-BUF statistics
.../ABI/testing/sysfs-kernel-dmabuf-buffers | 24 --
Documentation/driver-api/dma-buf.rst | 5 -
drivers/dma-buf/Kconfig | 15 -
drivers/dma-buf/Makefile | 1 -
drivers/dma-buf/dma-buf-sysfs-stats.c | 202 --------------
drivers/dma-buf/dma-buf-sysfs-stats.h | 35 ---
drivers/dma-buf/dma-buf.c | 58 +---
include/linux/dma-buf.h | 6 +-
kernel/bpf/Makefile | 3 +
kernel/bpf/dmabuf_iter.c | 177 ++++++++++++
kernel/bpf/helpers.c | 5 +
.../testing/selftests/bpf/bpf_experimental.h | 5 +
tools/testing/selftests/bpf/config | 3 +
.../selftests/bpf/prog_tests/dmabuf_iter.c | 258 ++++++++++++++++++
.../testing/selftests/bpf/progs/dmabuf_iter.c | 91 ++++++
15 files changed, 561 insertions(+), 327 deletions(-)
delete mode 100644 Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers
delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.c
delete mode 100644 drivers/dma-buf/dma-buf-sysfs-stats.h
create mode 100644 kernel/bpf/dmabuf_iter.c
create mode 100644 tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
create mode 100644 tools/testing/selftests/bpf/progs/dmabuf_iter.c
base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
--
2.49.0.906.g1f30a19c02-goog