This should allow amdkfd_fences to outlive the amdgpu module.
v2: implement Felix suggestion to lock the fence while signaling it.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 6 +++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 39 ++++++++----------- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +- 4 files changed, 27 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8bdfcde2029b..6254cef04213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +void amdkfd_fence_signal(struct dma_fence *f); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, @@ -210,6 +211,11 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; }
+static inline +void amdkfd_fence_signal(struct dma_fence *f) +{ +} + static inline struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 09c919f72b6c..f76c3c52a2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (!svm_range_schedule_evict_svm_bo(fence)) return true; } - return false; -} - -/** - * amdkfd_fence_release - callback that fence can be freed - * - * @f: dma_fence - * - * This function is called when the reference count becomes zero. - * Drops the mm_struct reference and RCU schedules freeing up the fence. - */ -static void amdkfd_fence_release(struct dma_fence *f) -{ - struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); - - /* Unconditionally signal the fence. The process is getting - * terminated. - */ - if (WARN_ON(!fence)) - return; /* Not an amdgpu_amdkfd_fence */ - mmdrop(fence->mm); - kfree_rcu(f, rcu); + fence->mm = NULL; + return false; }
/** @@ -174,9 +154,22 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; }
+void amdkfd_fence_signal(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + long flags; + + dma_fence_lock_irqsafe(f, flags) + if (fence->mm) { + mmdrop(fence->mm); + fence->mm = NULL; + } + dma_fence_signal_locked(f); + dma_fence_unlock_irqrestore(f, flags) +} + static const struct dma_fence_ops amdkfd_fence_ops = { .get_driver_name = amdkfd_fence_get_driver_name, .get_timeline_name = amdkfd_fence_get_timeline_name, .enable_signaling = amdkfd_fence_enable_signaling, - .release = amdkfd_fence_release, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a085faac9fe1..8fac70b839ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1173,7 +1173,7 @@ static void kfd_process_wq_release(struct work_struct *work) synchronize_rcu(); ef = rcu_access_pointer(p->ef); if (ef) - dma_fence_signal(ef); + amdkfd_fence_signal(ef);
kfd_process_remove_sysfs(p); kfd_debugfs_remove_process(p); @@ -1990,7 +1990,6 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, static int signal_eviction_fence(struct kfd_process *p) { struct dma_fence *ef; - int ret;
rcu_read_lock(); ef = dma_fence_get_rcu_safe(&p->ef); @@ -1998,10 +1997,10 @@ static int signal_eviction_fence(struct kfd_process *p) if (!ef) return -EINVAL;
- ret = dma_fence_signal(ef); + amdkfd_fence_signal(ef); dma_fence_put(ef);
- return ret; + return 0; }
static void evict_process_worker(struct work_struct *work) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c30dfb8ec236..566950702b7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) /* We're not in the eviction worker. Signal the fence. */ - dma_fence_signal(&svm_bo->eviction_fence->base); + amdkfd_fence_signal(&svm_bo->eviction_fence->base); dma_fence_put(&svm_bo->eviction_fence->base); amdgpu_bo_unref(&svm_bo->bo); kfree(svm_bo); @@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mmap_read_unlock(mm); mmput(mm);
- dma_fence_signal(&svm_bo->eviction_fence->base); + amdkfd_fence_signal(&svm_bo->eviction_fence->base);
/* This is the last reference to svm_bo, after svm_range_vram_node_free * has been called in svm_migrate_vram_to_ram