Acquire GEM handles in drm_framebuffer_init() and release them in
the corresponding drm_framebuffer_cleanup(). Ties the handle's
lifetime to the framebuffer. Not all GEM buffer objects have GEM
handles. If not set, no refcounting takes place. This is the case
for some fbdev emulation. This is not a problem as these GEM objects
do not use dma-bufs and drivers will not release them while fbdev
emulation is running.
As all drivers use drm_framebuffer_init(), they will now all hold
dma-buf references as fixed in commit 5307dce878d4 ("drm/gem: Acquire
references on GEM handles for framebuffers").
In the GEM framebuffer helpers, restore the original ref counting
on buffer objects. As the helpers for handle refcounting are now
no longer called from outside the DRM core, unexport the symbols.
Gma500 (unnecessarily) clears the framebuffer's GEM-object pointer
before calling drm_framebuffer_cleanup(). Remove these lines to
make it consistent with the rest of the drivers. It's one of the
fbdev emulations with no GEM handle on their buffers. The change
to gma500 is therefore rather cosmetic.
Tested on i915, amdgpu (by Bert) and gma500. Also tested on i915
plus udl for the original problem with dma-buf sharing.
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: 5307dce878d4 ("drm/gem: Acquire references on GEM handles for framebuffers")
Reported-by: Bert Karwatzki <spasswolf(a)web.de>
Closes: https://lore.kernel.org/dri-devel/20250703115915.3096-1-spasswolf@web.de/
Tested-by: Bert Karwatzki <spasswolf(a)web.de>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Anusha Srivatsa <asrivats(a)redhat.com>
Cc: Christian König <christian.koenig(a)amd.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: linux-media(a)vger.kernel.org
Cc: dri-devel(a)lists.freedesktop.org
Cc: linaro-mm-sig(a)lists.linaro.org
Cc: <stable(a)vger.kernel.org>
---
drivers/gpu/drm/drm_framebuffer.c | 23 +++++++-
drivers/gpu/drm/drm_gem.c | 59 +++++++++++++-------
drivers/gpu/drm/drm_gem_framebuffer_helper.c | 16 +++---
drivers/gpu/drm/drm_internal.h | 4 +-
drivers/gpu/drm/gma500/fbdev.c | 2 -
5 files changed, 69 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index b781601946db..e4a10dd053fc 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -862,11 +862,17 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free);
int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
const struct drm_framebuffer_funcs *funcs)
{
+ unsigned int i;
int ret;
if (WARN_ON_ONCE(fb->dev != dev || !fb->format))
return -EINVAL;
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->obj[i])
+ drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]);
+ }
+
INIT_LIST_HEAD(&fb->filp_head);
fb->funcs = funcs;
@@ -875,7 +881,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB,
false, drm_framebuffer_free);
if (ret)
- goto out;
+ goto err;
mutex_lock(&dev->mode_config.fb_lock);
dev->mode_config.num_fb++;
@@ -883,7 +889,14 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
mutex_unlock(&dev->mode_config.fb_lock);
drm_mode_object_register(dev, &fb->base);
-out:
+
+ return 0;
+
+err:
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->obj[i])
+ drm_gem_object_handle_put_if_exists_unlocked(fb->obj[i]);
+ }
return ret;
}
EXPORT_SYMBOL(drm_framebuffer_init);
@@ -960,6 +973,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private);
void drm_framebuffer_cleanup(struct drm_framebuffer *fb)
{
struct drm_device *dev = fb->dev;
+ unsigned int i;
+
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->obj[i])
+ drm_gem_object_handle_put_if_exists_unlocked(fb->obj[i]);
+ }
mutex_lock(&dev->mode_config.fb_lock);
list_del(&fb->head);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index bc505d938b3e..9d8b9e6b7d25 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -224,23 +224,27 @@ static void drm_gem_object_handle_get(struct drm_gem_object *obj)
}
/**
- * drm_gem_object_handle_get_unlocked - acquire reference on user-space handles
+ * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any
* @obj: GEM object
*
- * Acquires a reference on the GEM buffer object's handle. Required
- * to keep the GEM object alive. Call drm_gem_object_handle_put_unlocked()
- * to release the reference.
+ * Acquires a reference on the GEM buffer object's handle. Required to keep
+ * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked()
+ * to release the reference. Does nothing if the buffer object has no handle.
*/
-void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj)
+void drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
guard(mutex)(&dev->object_name_lock);
- drm_WARN_ON(dev, !obj->handle_count); /* first ref taken in create-tail helper */
- drm_gem_object_handle_get(obj);
+ /*
+ * First ref taken during GEM object creation, if any. Some
+ * drivers set up internal framebuffers with GEM objects that
+ * do not have a GEM handle. Hence, this counter can be zero.
+ */
+ if (obj->handle_count)
+ drm_gem_object_handle_get(obj);
}
-EXPORT_SYMBOL(drm_gem_object_handle_get_unlocked);
/**
* drm_gem_object_handle_free - release resources bound to userspace handles
@@ -272,21 +276,11 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj)
}
}
-/**
- * drm_gem_object_handle_put_unlocked - releases reference on user-space handles
- * @obj: GEM object
- *
- * Releases a reference on the GEM buffer object's handle. Possibly releases
- * the GEM buffer object and associated dma-buf objects.
- */
-void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
+static void drm_gem_object_handle_put_unlocked_tail(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
bool final = false;
- if (WARN_ON(READ_ONCE(obj->handle_count) == 0))
- return;
-
/*
* Must bump handle count first as this may be the last
* ref, in which case the object would disappear before we
@@ -304,7 +298,32 @@ void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
if (final)
drm_gem_object_put(obj);
}
-EXPORT_SYMBOL(drm_gem_object_handle_put_unlocked);
+
+static void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+
+ if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0))
+ return;
+
+ drm_gem_object_handle_put_unlocked_tail(obj);
+}
+
+/**
+ * drm_gem_object_handle_put_if_exists_unlocked - releases reference on user-space handle, if any
+ * @obj: GEM object
+ *
+ * Releases a reference on the GEM buffer object's handle. Possibly releases
+ * the GEM buffer object and associated dma-buf objects. Does nothing if the
+ * buffer object has no handle.
+ */
+void drm_gem_object_handle_put_if_exists_unlocked(struct drm_gem_object *obj)
+{
+ if (!obj->handle_count)
+ return;
+
+ drm_gem_object_handle_put_unlocked_tail(obj);
+}
/*
* Called at device or object close to release the file's
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index c60d0044d036..618ce725cd75 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -100,7 +100,7 @@ void drm_gem_fb_destroy(struct drm_framebuffer *fb)
unsigned int i;
for (i = 0; i < fb->format->num_planes; i++)
- drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ drm_gem_object_put(fb->obj[i]);
drm_framebuffer_cleanup(fb);
kfree(fb);
@@ -183,10 +183,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev,
if (!objs[i]) {
drm_dbg_kms(dev, "Failed to lookup GEM object\n");
ret = -ENOENT;
- goto err_gem_object_handle_put_unlocked;
+ goto err_gem_object_put;
}
- drm_gem_object_handle_get_unlocked(objs[i]);
- drm_gem_object_put(objs[i]);
min_size = (height - 1) * mode_cmd->pitches[i]
+ drm_format_info_min_pitch(info, i, width)
@@ -196,22 +194,22 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev,
drm_dbg_kms(dev,
"GEM object size (%zu) smaller than minimum size (%u) for plane %d\n",
objs[i]->size, min_size, i);
- drm_gem_object_handle_put_unlocked(objs[i]);
+ drm_gem_object_put(objs[i]);
ret = -EINVAL;
- goto err_gem_object_handle_put_unlocked;
+ goto err_gem_object_put;
}
}
ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs);
if (ret)
- goto err_gem_object_handle_put_unlocked;
+ goto err_gem_object_put;
return 0;
-err_gem_object_handle_put_unlocked:
+err_gem_object_put:
while (i > 0) {
--i;
- drm_gem_object_handle_put_unlocked(objs[i]);
+ drm_gem_object_put(objs[i]);
}
return ret;
}
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index f7b414a813ae..9233019f54a8 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -161,8 +161,8 @@ void drm_sysfs_lease_event(struct drm_device *dev);
/* drm_gem.c */
int drm_gem_init(struct drm_device *dev);
-void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj);
-void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj);
+void drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj);
+void drm_gem_object_handle_put_if_exists_unlocked(struct drm_gem_object *obj);
int drm_gem_handle_create_tail(struct drm_file *file_priv,
struct drm_gem_object *obj,
u32 *handlep);
diff --git a/drivers/gpu/drm/gma500/fbdev.c b/drivers/gpu/drm/gma500/fbdev.c
index 8edefea2ef59..afd252108cfa 100644
--- a/drivers/gpu/drm/gma500/fbdev.c
+++ b/drivers/gpu/drm/gma500/fbdev.c
@@ -121,7 +121,6 @@ static void psb_fbdev_fb_destroy(struct fb_info *info)
drm_fb_helper_fini(fb_helper);
drm_framebuffer_unregister_private(fb);
- fb->obj[0] = NULL;
drm_framebuffer_cleanup(fb);
kfree(fb);
@@ -243,7 +242,6 @@ int psb_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
err_drm_framebuffer_unregister_private:
drm_framebuffer_unregister_private(fb);
- fb->obj[0] = NULL;
drm_framebuffer_cleanup(fb);
kfree(fb);
err_drm_gem_object_put:
--
2.50.0
Hi Philipp,
On 01/07/25 10:21, Philipp Stanner wrote:
> Since its inception, the GPU scheduler can leak memory if the driver
> calls drm_sched_fini() while there are still jobs in flight.
>
> The simplest way to solve this in a backwards compatible manner is by
> adding a new callback, drm_sched_backend_ops.cancel_job(), which
> instructs the driver to signal the hardware fence associated with the
> job. Afterwards, the scheduler can savely use the established free_job()
s/savely/safely
> callback for freeing the job.
>
> Implement the new backend_ops callback cancel_job().
>
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
> Link: https://lore.kernel.org/dri-devel/20250418113211.69956-1-tvrtko.ursulin@iga…
> Signed-off-by: Philipp Stanner <phasta(a)kernel.org>
Reviewed-by: Maíra Canal <mcanal(a)igalia.com>
Best Regards,
- Maíra
Hi
Am 03.07.25 um 19:23 schrieb Bert Karwatzki:
> Am Donnerstag, dem 03.07.2025 um 18:09 +0200 schrieb Thomas Zimmermann:
>> Hi,
>>
>> before I give up on the issue, could you please test the attached patch?
>>
>> Best regards
>> Thomas
>>
>>
>> --
>> Thomas Zimmermann
>> Graphics Driver Developer
>> SUSE Software Solutions Germany GmbH
>> Frankenstrasse 146, 90461 Nuernberg, Germany
>> GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
>> HRB 36809 (AG Nuernberg)
> I applied the patch on top of next-20250703
>
> $ git log --oneline
> 18ee3ed3cb60 (HEAD -> drm_gem_object_handle_put) drm/amdgpu: Provide custom framebuffer destroy function
> 8d6c58332c7a (tag: next-20250703, origin/master, origin/HEAD, master) Add linux-next specific files for 20250703
>
> and it solves the issue for me (i.e. no warnings).
Great, thanks for testing. If nothing else, that's the minimal workaround.
Here's another patch, which should solve the problem for all drivers.
Could you please revert the old fix and apply the new one and test again?
Best regards
Thomas
>
> Bert Karwatzki
--
--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Frankenstrasse 146, 90461 Nuernberg, Germany
GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
HRB 36809 (AG Nuernberg)
[Note: it would be really useful to Cc all relevant maintainers]
On Fri, Jun 27, 2025 at 04:10:27PM +0100, Pavel Begunkov wrote:
> This series implements it for read/write io_uring requests. The uAPI
> looks similar to normal registered buffers, the user will need to
> register a dmabuf in io_uring first and then use it as any other
> registered buffer. On registration the user also specifies a file
> to map the dmabuf for.
Just commenting from the in-kernel POV here, where the interface
feels wrong.
You can't just expose 'the DMA device' up file operations, because
there can be and often is more than one. Similarly stuffing a
dma_addr_t into an iovec is rather dangerous.
The model that should work much better is to have file operations
to attach to / detach from a dma_buf, and then have an iter that
specifies a dmabuf and offsets into. That way the code behind the
file operations can forward the attachment to all the needed
devices (including more/less while it remains attached to the file)
and can pick the right dma address for each device.
I also remember some discussion that new dma-buf importers should
use the dynamic imported model for long-term imports, but as I'm
everything but an expert in that area I'll let the dma-buf folks
speak.
Am Freitag, 6. Juni 2025, 08:28:23 Mitteleuropäische Sommerzeit schrieb Tomeu Vizoso:
> This uses the SHMEM DRM helpers and we map right away to the CPU and NPU
> sides, as all buffers are expected to be accessed from both.
>
> v2:
> - Sync the IOMMUs for the other cores when mapping and unmapping.
>
> v3:
> - Make use of GPL-2.0-only for the copyright notice (Jeff Hugo)
>
> v6:
> - Use mutexes guard (Markus Elfring)
>
> v7:
> - Assign its own IOMMU domain to each client, for isolation (Daniel
> Stone and Robin Murphy)
>
> Reviewed-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
> Signed-off-by: Tomeu Vizoso <tomeu(a)tomeuvizoso.net>
> ---
> diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..61b7f970a6885aa13784daa1222611a02aa10dee
> --- /dev/null
> +++ b/drivers/accel/rocket/rocket_gem.c
> @@ -0,0 +1,115 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright 2024-2025 Tomeu Vizoso <tomeu(a)tomeuvizoso.net> */
> +
> +#include <drm/drm_device.h>
> +#include <drm/drm_utils.h>
> +#include <drm/rocket_accel.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/iommu.h>
> +
> +#include "rocket_device.h"
> +#include "rocket_drv.h"
> +#include "rocket_gem.h"
> +
> +static void rocket_gem_bo_free(struct drm_gem_object *obj)
> +{
> + struct rocket_device *rdev = to_rocket_device(obj->dev);
> + struct rocket_gem_object *bo = to_rocket_bo(obj);
> + size_t unmapped;
> +
> + drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
This should probably be
drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1);
as pages_use_count is of type refcount_t since
commit 051b6646d36d ("drm/shmem-helper: Use refcount_t for pages_use_count")
Heiko
On Fri, Jun 6, 2025 at 1:29 AM Tomeu Vizoso <tomeu(a)tomeuvizoso.net> wrote:
>
> Using the DRM GPU scheduler infrastructure, with a scheduler for each
> core.
>
> Userspace can decide for a series of tasks to be executed sequentially
> in the same core, so SRAM locality can be taken advantage of.
>
> The job submission code was initially based on Panfrost.
>
> v2:
> - Remove hardcoded number of cores
> - Misc. style fixes (Jeffrey Hugo)
> - Repack IOCTL struct (Jeffrey Hugo)
>
> v3:
> - Adapt to a split of the register block in the DT bindings (Nicolas
> Frattaroli)
> - Make use of GPL-2.0-only for the copyright notice (Jeff Hugo)
> - Use drm_* logging functions (Thomas Zimmermann)
> - Rename reg i/o macros (Thomas Zimmermann)
> - Add padding to ioctls and check for zero (Jeff Hugo)
> - Improve error handling (Nicolas Frattaroli)
>
> v6:
> - Use mutexes guard (Markus Elfring)
> - Use u64_to_user_ptr (Jeff Hugo)
> - Drop rocket_fence (Rob Herring)
>
> v7:
> - Assign its own IOMMU domain to each client, for isolation (Daniel
> Stone and Robin Murphy)
>
> Signed-off-by: Tomeu Vizoso <tomeu(a)tomeuvizoso.net>
> ---
[...]
> --- a/include/uapi/drm/rocket_accel.h
> +++ b/include/uapi/drm/rocket_accel.h
> @@ -12,8 +12,10 @@ extern "C" {
> #endif
>
> #define DRM_ROCKET_CREATE_BO 0x00
> +#define DRM_ROCKET_SUBMIT 0x01
>
> #define DRM_IOCTL_ROCKET_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo)
> +#define DRM_IOCTL_ROCKET_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_SUBMIT, struct drm_rocket_submit)
>
> /**
> * struct drm_rocket_create_bo - ioctl argument for creating Rocket BOs.
> @@ -37,6 +39,68 @@ struct drm_rocket_create_bo {
> __u64 offset;
> };
>
> +/**
> + * struct drm_rocket_task - A task to be run on the NPU
> + *
> + * A task is the smallest unit of work that can be run on the NPU.
> + */
> +struct drm_rocket_task {
> + /** Input: DMA address to NPU mapping of register command buffer */
> + __u64 regcmd;
> +
> + /** Input: Number of commands in the register command buffer */
> + __u32 regcmd_count;
> +
> + /** Reserved, must be zero. */
> + __u32 reserved;
> +};
> +
> +/**
> + * struct drm_rocket_job - A job to be run on the NPU
> + *
> + * The kernel will schedule the execution of this job taking into account its
> + * dependencies with other jobs. All tasks in the same job will be executed
> + * sequentially on the same core, to benefit from memory residency in SRAM.
> + */
> +struct drm_rocket_job {
> + /** Input: Pointer to an array of struct drm_rocket_task. */
> + __u64 tasks;
> +
> + /** Input: Pointer to a u32 array of the BOs that are read by the job. */
> + __u64 in_bo_handles;
> +
> + /** Input: Pointer to a u32 array of the BOs that are written to by the job. */
> + __u64 out_bo_handles;
> +
> + /** Input: Number of tasks passed in. */
> + __u32 task_count;
> +
> + /** Input: Number of input BO handles passed in (size is that times 4). */
> + __u32 in_bo_handle_count;
> +
> + /** Input: Number of output BO handles passed in (size is that times 4). */
> + __u32 out_bo_handle_count;
> +
> + /** Reserved, must be zero. */
> + __u32 reserved;
> +};
> +
> +/**
> + * struct drm_rocket_submit - ioctl argument for submitting commands to the NPU.
> + *
> + * The kernel will schedule the execution of these jobs in dependency order.
> + */
> +struct drm_rocket_submit {
> + /** Input: Pointer to an array of struct drm_rocket_job. */
> + __u64 jobs;
> +
> + /** Input: Number of jobs passed in. */
> + __u32 job_count;
Isn't there a problem if you need to expand drm_rocket_job beyond
using the 1 reserved field? You can't add to the struct because then
you don't know the size here. So you have to modify drm_rocket_submit
to modify drm_rocket_job. Maybe better if you plan for that now rather
than later by making the size explicit.
Though etnaviv at least has similar issues.
Rob
> +
> + /** Reserved, must be zero. */
> + __u32 reserved;
> +};