For deployments of systems with very tight memory and a system resource manager process that can make well-founded decisions on trade-offs, a way to tell panthor's GEM shrinker to run a reclaim on a specific process immediately is of interest.
Make it possible to do this by adding a new sysfs file, called "mem_reclaim", which kicks off a reclaim for private BOs owned by the process whose TGID is written to the file.
Processes may trigger an explicit reclaim on themselves, but triggering it on other processes requires the CAP_SYS_RESOURCE capability.
Signed-off-by: Nicolas Frattaroli nicolas.frattaroli@collabora.com --- Documentation/ABI/testing/sysfs-driver-panthor-mem | 17 ++++++ drivers/gpu/drm/panthor/panthor_drv.c | 67 ++++++++++++++++++++++ drivers/gpu/drm/panthor/panthor_gem.c | 4 +- drivers/gpu/drm/panthor/panthor_gem.h | 1 + drivers/gpu/drm/panthor/panthor_mmu.c | 33 +++++++++++ drivers/gpu/drm/panthor/panthor_mmu.h | 2 + 6 files changed, 122 insertions(+), 2 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-driver-panthor-mem b/Documentation/ABI/testing/sysfs-driver-panthor-mem new file mode 100644 index 000000000000..6639394abed2 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-panthor-mem @@ -0,0 +1,17 @@ +What: /sys/bus/platform/drivers/panthor/.../mem_reclaim +Date: May 2026 +Contact: Nicolas Frattaroli nicolas.frattaroli@collabora.com +Description: + (WO) Writing to this file will trigger a GPU memory reclaim run for all + panthor GPU contexts associated with the TGID written to it. The write + completes when the reclaim operation has finished. + + To run a reclaim on a TGID other than its own, a process requires the + CAP_SYS_RESOURCE capability. + + Possible error codes: + * -ERANGE: given TGID is too large/small for the TGID type. + * -EINVAL: given TGID could not be parsed. + * -EPERM: insufficient permissions to run a reclaim on given TGID + * -EINTR: interrupted by signal + * -ESRCH: given TGID is not using panthor, and might not exist at all diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 66996c9147c2..7d19b8785ea3 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1858,8 +1858,75 @@ static ssize_t profiling_store(struct device *dev,
static DEVICE_ATTR_RW(profiling);
+/** + * panthor_run_on_pfiles_of_tgid - Run function on each panthor_file of process + * @ptdev: pointer to the &struct panthor_device + * @tgid: The TGID of the process to look for + * @func: function pointer to run on every &struct panthor_file opened by @tgid + * + * Searches through the list of all panthor DRM files for ones that are opened + * by a process with TGID @tgid. For every match found, runs @func with the + * associated &struct panthor_file. + * + * Returns: negative errno on error, number of files matching @tgid otherwise. + */ +static int panthor_run_on_pfiles_of_tgid(struct panthor_device *ptdev, pid_t tgid, + void (*func)(struct panthor_file *pf)) +{ + struct drm_file *file; + int found = 0; + + scoped_cond_guard(mutex_intr, return -EINTR, &ptdev->base.filelist_mutex) { + list_for_each_entry(file, &ptdev->base.filelist, lhead) { + struct task_struct *task; + struct pid *file_pid; + + rcu_read_lock(); + file_pid = rcu_dereference(file->pid); + task = pid_task(file_pid, PIDTYPE_TGID); + if (!task || task->tgid != tgid) { + rcu_read_unlock(); + continue; + } + rcu_read_unlock(); + found++; + /* func needs to run outside RCU lock */ + func(file->driver_priv); + } + } + + return found; +} + +static ssize_t mem_reclaim_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct panthor_device *ptdev = dev_get_drvdata(dev); + pid_t tgid; + int ret; + + ret = kstrtoint(buf, 0, &tgid); + if (ret) + return ret; + + if (tgid != current->tgid && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + ret = panthor_run_on_pfiles_of_tgid(ptdev, tgid, panthor_mmu_force_reclaim); + if (ret < 0) + return ret; + else if (!ret) + return -ESRCH; + + return len; +} + +static DEVICE_ATTR_WO(mem_reclaim); + static struct attribute *panthor_attrs[] = { &dev_attr_profiling.attr, + &dev_attr_mem_reclaim.attr, NULL, };
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 80e82238f3c5..5780fd51170c 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -1473,8 +1473,8 @@ static bool panthor_gem_try_evict_no_resv_wait(struct drm_gem_object *obj, return ret == 0; }
-static bool panthor_gem_try_evict(struct drm_gem_object *obj, - struct ww_acquire_ctx *ticket) +bool panthor_gem_try_evict(struct drm_gem_object *obj, + struct ww_acquire_ctx *ticket) { struct panthor_gem_object *bo = to_panthor_bo(obj);
diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index ae0491d0b121..593d8e7b7aaf 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -254,6 +254,7 @@ panthor_gem_get_dev_sgt(struct panthor_gem_object *bo); int panthor_gem_pin(struct panthor_gem_object *bo); void panthor_gem_unpin(struct panthor_gem_object *bo); int panthor_gem_swapin_locked(struct panthor_gem_object *bo); +bool panthor_gem_try_evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket); void panthor_gem_update_reclaim_state_locked(struct panthor_gem_object *bo, enum panthor_gem_reclaim_state *old_state); int panthor_gem_shrinker_init(struct panthor_device *ptdev); diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index b81388b35a58..e185787f5657 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -3199,6 +3199,39 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev, return freed; }
+/** + * panthor_mmu_force_reclaim - Run a reclaim on all VMs associated with a file + * @pfile: pointer to the &struct panthor_file to reclaim memory on + * + * Attempt to evict all private BOs of VMs associated with @pfile. In other + * words, attempt to swap out as much GPU memory of the context associated with + * @pfile as possible. + */ +void panthor_mmu_force_reclaim(struct panthor_file *pfile) +{ + struct panthor_device *ptdev = pfile->ptdev; + unsigned long i, remaining, freed; + unsigned int nr_to_scan = 0; + struct panthor_vm *vm; + size_t freed_sz; + + scoped_guard(mutex, &ptdev->reclaim.lock) { + xa_for_each(&pfile->vms->xa, i, vm) { + /* Skip VMs that already aren't backed by any pages */ + if (!vm->reclaim.lru.count) + continue; + + nr_to_scan += vm->reclaim.lru.count; + list_move(&vm->reclaim.lru_node, &ptdev->reclaim.vms); + } + } + freed = panthor_mmu_reclaim_priv_bos(ptdev, nr_to_scan, &remaining, + &freed_sz, panthor_gem_try_evict); + drm_dbg(&ptdev->base, + "Reclaimed %lu pages (%zu bytes) out of %u scanned, with %lu remaining\n", + freed, freed_sz, nr_to_scan, remaining); +} + /** * panthor_mmu_unplug() - Unplug the MMU logic * @ptdev: Device. diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h index 12b18b5f90e1..34adca4b4e95 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.h +++ b/drivers/gpu/drm/panthor/panthor_mmu.h @@ -11,6 +11,7 @@ struct drm_exec; struct drm_sched_job; struct drm_memory_stats; +struct panthor_file; struct panthor_gem_object; struct panthor_heap_pool; struct panthor_vm; @@ -23,6 +24,7 @@ void panthor_mmu_pre_reset(struct panthor_device *ptdev); void panthor_mmu_post_reset(struct panthor_device *ptdev); void panthor_mmu_suspend(struct panthor_device *ptdev); void panthor_mmu_resume(struct panthor_device *ptdev); +void panthor_mmu_force_reclaim(struct panthor_file *pfile);
int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo, u64 offset, u64 size, u64 va, u32 flags);