> diff --git a/block/bio.c b/block/bio.c
> index 7b13bdf72de0..8793f1ee559d 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -843,6 +843,11 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
> bio_clone_blkg_association(bio, bio_src);
> }
>
> + if (bio_flagged(bio_src, BIO_DMA_TOKEN)) {
> + bio->dma_token = bio_src->dma_token;
> + bio_set_flag(bio, BIO_DMA_TOKEN);
> + }
Historically __bio_clone itself does not clone the payload, just the
bio. But we got rid of the callers that want to clone a bio but not
the payload long time ago.
I'd suggest a prep patch that moves assigning bi_io_vec from
bio_alloc_clone and bio_init_clone into __bio_clone, and given that they
are the same field that'll take carw of the dma token as well.
Alternatively do it in an if/else that the compiler will hopefully
optimize away.
> @@ -1349,6 +1366,10 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
> bio_iov_bvec_set(bio, iter);
> iov_iter_advance(iter, bio->bi_iter.bi_size);
> return 0;
> + } else if (iov_iter_is_dma_token(iter)) {
No else after an return please.
> +++ b/block/blk-merge.c
> @@ -328,6 +328,29 @@ int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
> unsigned nsegs = 0, bytes = 0, gaps = 0;
> struct bvec_iter iter;
>
> + if (bio_flagged(bio, BIO_DMA_TOKEN)) {
Please split the dmabuf logic into a self-contained
helper here.
> + int offset = offset_in_page(bio->bi_iter.bi_bvec_done);
> +
> + nsegs = ALIGN(bio->bi_iter.bi_size + offset, PAGE_SIZE);
> + nsegs >>= PAGE_SHIFT;
Why are we hardcoding PAGE_SIZE based "segments" here?
> +
> + if (offset & lim->dma_alignment || bytes & len_align_mask)
> + return -EINVAL;
> +
> + if (bio->bi_iter.bi_size > max_bytes) {
> + bytes = max_bytes;
> + nsegs = (bytes + offset) >> PAGE_SHIFT;
> + goto split;
> + } else if (nsegs > lim->max_segments) {
No else after a goto either.
On Sun, Nov 23, 2025 at 10:51:23PM +0000, Pavel Begunkov wrote:
> We'll need bio_flagged() earlier in bio.h in the next patch, move it
> together with all related helpers, and mark the bio_flagged()'s bio
> argument as const.
>
> Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Looks good:
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Maybe ask Jens to queue it up ASAP to get it out of the way?
On Sun, Nov 23, 2025 at 10:51:22PM +0000, Pavel Begunkov wrote:
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 5b127043a151..1b22594ca35b 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -29,6 +29,7 @@ enum iter_type {
> ITER_FOLIOQ,
> ITER_XARRAY,
> ITER_DISCARD,
> + ITER_DMA_TOKEN,
Please use DMABUF/dmabuf naming everywhere, this is about dmabufs and
not dma in general.
Otherwise this looks good.
On Sun, Nov 23, 2025 at 10:51:21PM +0000, Pavel Begunkov wrote:
> +static inline struct dma_token *
> +dma_token_create(struct file *file, struct dma_token_params *params)
> +{
> + struct dma_token *res;
> +
> + if (!file->f_op->dma_map)
> + return ERR_PTR(-EOPNOTSUPP);
> + res = file->f_op->dma_map(file, params);
Calling the file operation ->dmap_map feels really misleading.
create_token as in the function name is already much better, but
it really is not just dma, but dmabuf related, and that should really
be encoded in the name.
Also why not pass the dmabuf and direction directly instead of wrapping
it in the odd params struct making the whole thing hard to follow?
For retrieving a pointer to the struct dma_resv for a given GEM object. We
also introduce it in a new trait, BaseObjectPrivate, which we automatically
implement for all gem objects and don't expose to users outside of the
crate.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
---
rust/kernel/drm/gem/mod.rs | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
index 5c215e83c1b09..ec3c1b1775196 100644
--- a/rust/kernel/drm/gem/mod.rs
+++ b/rust/kernel/drm/gem/mod.rs
@@ -199,6 +199,18 @@ fn create_mmap_offset(&self) -> Result<u64> {
impl<T: IntoGEMObject> BaseObject for T {}
+/// Crate-private base operations shared by all GEM object classes.
+#[expect(unused)]
+pub(crate) trait BaseObjectPrivate: IntoGEMObject {
+ /// Return a pointer to this object's dma_resv.
+ fn raw_dma_resv(&self) -> *mut bindings::dma_resv {
+ // SAFETY: `as_gem_obj()` always returns a valid pointer to the base DRM gem object
+ unsafe { (*self.as_raw()).resv }
+ }
+}
+
+impl<T: IntoGEMObject> BaseObjectPrivate for T {}
+
/// A base GEM object.
///
/// # Invariants
--
2.52.0
On 11/27/25 11:50, Philipp Stanner wrote:
> On Thu, 2025-11-13 at 15:51 +0100, Christian König wrote:
>> Using the inline lock is now the recommended way for dma_fence implementations.
>>
>> So use this approach for the framework internal fences as well.
>
> nit:
> s/framework/framework's
>
>>
>> Also saves about 4 bytes for the external spinlock.
>
> On all platforms or just AMD64?
I think most if not all platforms. Or is anybody really using 64bits for a spinlock?
Christian.
>
>>
>> Signed-off-by: Christian König <christian.koenig(a)amd.com>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
>> ---
>> Â drivers/dma-buf/dma-fence.c | 20 ++++----------------
>> Â 1 file changed, 4 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
>> index 9a5aa9e44e13..e6d26c2e0391 100644
>> --- a/drivers/dma-buf/dma-fence.c
>> +++ b/drivers/dma-buf/dma-fence.c
>> @@ -24,7 +24,6 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
>> Â EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
>> Â EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
>> Â
>> -static DEFINE_SPINLOCK(dma_fence_stub_lock);
>> Â static struct dma_fence dma_fence_stub;
>> Â
>> Â /*
>> @@ -123,12 +122,8 @@ static const struct dma_fence_ops dma_fence_stub_ops = {
>> Â
>> Â static int __init dma_fence_init_stub(void)
>> Â {
>> - dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops,
>> - Â Â Â Â Â Â &dma_fence_stub_lock, 0, 0);
>> -
>> - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>> - &dma_fence_stub.flags);
>
> That change is unrelated, isn't it? Enlarges the diff and breaks git-
> blame.
>
>> -
>> + dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, NULL, 0, 0);
>> + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &dma_fence_stub.flags);
>> Â dma_fence_signal(&dma_fence_stub);
>> Â return 0;
>> Â }
>> @@ -160,16 +155,9 @@ struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp)
>> Â if (fence == NULL)
>> Â return NULL;
>> Â
>> - dma_fence_init(fence,
>> - Â Â Â Â Â Â &dma_fence_stub_ops,
>> - Â Â Â Â Â Â &dma_fence_stub_lock,
>> - Â Â Â Â Â Â 0, 0);
>> -
>> - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>> - &fence->flags);
>
> Same.
>
>> -
>> + dma_fence_init(fence, &dma_fence_stub_ops, NULL, 0, 0);
>> + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags);
>> Â dma_fence_signal_timestamp(fence, timestamp);
>> -
>
> I wouldn't remove that empty line (nit).
>
>> Â return fence;
>> Â }
>> Â EXPORT_SYMBOL(dma_fence_allocate_private_stub);
>
On 11/28/25 11:10, Philipp Stanner wrote:
> On Fri, 2025-11-28 at 11:06 +0100, Christian König wrote:
>> On 11/27/25 12:10, Philipp Stanner wrote:
>>> On Thu, 2025-11-13 at 15:51 +0100, Christian König wrote:
>>>> This should allow amdkfd_fences to outlive the amdgpu module.
>>>>
>>>> v2: implement Felix suggestion to lock the fence while signaling it.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig(a)amd.com>
>>>> ---
>>>>
>>>>
>
> […]
>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> index a085faac9fe1..8fac70b839ed 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> @@ -1173,7 +1173,7 @@ static void kfd_process_wq_release(struct work_struct *work)
>>>> Â synchronize_rcu();
>>>> Â ef = rcu_access_pointer(p->ef);
>>>> Â if (ef)
>>>> - dma_fence_signal(ef);
>>>> + amdkfd_fence_signal(ef);
>>>> Â
>>>> Â kfd_process_remove_sysfs(p);
>>>> Â kfd_debugfs_remove_process(p);
>>>> @@ -1990,7 +1990,6 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
>>>> Â static int signal_eviction_fence(struct kfd_process *p)
>>>> Â {
>>>> Â struct dma_fence *ef;
>>>> - int ret;
>>>> Â
>>>> Â rcu_read_lock();
>>>> Â ef = dma_fence_get_rcu_safe(&p->ef);
>>>> @@ -1998,10 +1997,10 @@ static int signal_eviction_fence(struct kfd_process *p)
>>>> Â if (!ef)
>>>> Â return -EINVAL;
>>>> Â
>>>> - ret = dma_fence_signal(ef);
>>>> + amdkfd_fence_signal(ef);
>>>> Â dma_fence_put(ef);
>>>> Â
>>>> - return ret;
>>>> + return 0;
>>>
>>> Oh wait, that's the code I'm also touching in my return code series!
>>>
>>> https://lore.kernel.org/dri-devel/cef83fed-5994-4c77-962c-9c7aac9f7306@amd.…
>>>
>>>
>>> Does this series then solve the problem Felix pointed out in
>>> evict_process_worker()?
>>
>> No it doesn't, I wasn't aware that the higher level code actually needs the status. After all Felix is the maintainer of this part.
>>
>> This patch here needs to be rebased on top of yours and changed accordingly to still return the fence status correctly.
>>
>> But thanks for pointing that out.
>
>
> Alright, so my (repaired, v2) status-code-removal series shall enter drm-misc-next first, and then your series here. ACK?
Works for me, I just need both to re-base the amdgpu patches on top.
Christian.
>
>
> P.
On 11/27/25 12:10, Philipp Stanner wrote:
> On Thu, 2025-11-13 at 15:51 +0100, Christian König wrote:
>> This should allow amdkfd_fences to outlive the amdgpu module.
>>
>> v2: implement Felix suggestion to lock the fence while signaling it.
>>
>> Signed-off-by: Christian König <christian.koenig(a)amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 6 +++
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 39 ++++++++-----------
>>  drivers/gpu/drm/amd/amdkfd/kfd_process.c     | 7 ++--
>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c         | 4 +-
>> Â 4 files changed, 27 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 8bdfcde2029b..6254cef04213 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -196,6 +196,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
>> Â #endif
>> Â #if IS_ENABLED(CONFIG_HSA_AMD)
>> Â bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
>> +void amdkfd_fence_signal(struct dma_fence *f);
>> Â struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
>> Â void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
>> Â int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
>> @@ -210,6 +211,11 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
>> Â return false;
>> Â }
>> Â
>> +static inline
>> +void amdkfd_fence_signal(struct dma_fence *f)
>> +{
>
> I would add a short comment here: "Empty function because …"
>
>> +}
>> +
>> Â static inline
>> Â struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
>> Â {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> index 09c919f72b6c..f76c3c52a2a1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
>> @@ -127,29 +127,9 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
>> Â if (!svm_range_schedule_evict_svm_bo(fence))
>> Â return true;
>> Â }
>> - return false;
>> -}
>> -
>> -/**
>> - * amdkfd_fence_release - callback that fence can be freed
>> - *
>> - * @f: dma_fence
>> - *
>> - * This function is called when the reference count becomes zero.
>> - * Drops the mm_struct reference and RCU schedules freeing up the fence.
>> - */
>> -static void amdkfd_fence_release(struct dma_fence *f)
>> -{
>> - struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> -
>> - /* Unconditionally signal the fence. The process is getting
>> - * terminated.
>> - */
>> - if (WARN_ON(!fence))
>> - return; /* Not an amdgpu_amdkfd_fence */
>> -
>> Â mmdrop(fence->mm);
>> - kfree_rcu(f, rcu);
>> + fence->mm = NULL;
>
> That the storage actually takes place is guaranteed by the lock taken
> when calling the fence ops?
>
>> + return false;
>> Â }
>> Â
>> Â /**
>> @@ -174,9 +154,22 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
>> Â return false;
>> Â }
>> Â
>> +void amdkfd_fence_signal(struct dma_fence *f)
>> +{
>> + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
>> + long flags;
>> +
>> + dma_fence_lock_irqsafe(f, flags)
>> + if (fence->mm) {
>> + mmdrop(fence->mm);
>> + fence->mm = NULL;
>> + }
>> + dma_fence_signal_locked(f);
>> + dma_fence_unlock_irqrestore(f, flags)
>> +}
>> +
>> Â static const struct dma_fence_ops amdkfd_fence_ops = {
>> Â .get_driver_name = amdkfd_fence_get_driver_name,
>> Â .get_timeline_name = amdkfd_fence_get_timeline_name,
>> Â .enable_signaling = amdkfd_fence_enable_signaling,
>> - .release = amdkfd_fence_release,
>> Â };
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index a085faac9fe1..8fac70b839ed 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -1173,7 +1173,7 @@ static void kfd_process_wq_release(struct work_struct *work)
>> Â synchronize_rcu();
>> Â ef = rcu_access_pointer(p->ef);
>> Â if (ef)
>> - dma_fence_signal(ef);
>> + amdkfd_fence_signal(ef);
>> Â
>> Â kfd_process_remove_sysfs(p);
>> Â kfd_debugfs_remove_process(p);
>> @@ -1990,7 +1990,6 @@ kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
>> Â static int signal_eviction_fence(struct kfd_process *p)
>> Â {
>> Â struct dma_fence *ef;
>> - int ret;
>> Â
>> Â rcu_read_lock();
>> Â ef = dma_fence_get_rcu_safe(&p->ef);
>> @@ -1998,10 +1997,10 @@ static int signal_eviction_fence(struct kfd_process *p)
>> Â if (!ef)
>> Â return -EINVAL;
>> Â
>> - ret = dma_fence_signal(ef);
>> + amdkfd_fence_signal(ef);
>> Â dma_fence_put(ef);
>> Â
>> - return ret;
>> + return 0;
>
> Oh wait, that's the code I'm also touching in my return code series!
>
> https://lore.kernel.org/dri-devel/cef83fed-5994-4c77-962c-9c7aac9f7306@amd.…
>
>
> Does this series then solve the problem Felix pointed out in
> evict_process_worker()?
No it doesn't, I wasn't aware that the higher level code actually needs the status. After all Felix is the maintainer of this part.
This patch here needs to be rebased on top of yours and changed accordingly to still return the fence status correctly.
But thanks for pointing that out.
Regards,
Christian.
>
>
> P.
>
>
>> Â }
>> Â
>> Â static void evict_process_worker(struct work_struct *work)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index c30dfb8ec236..566950702b7d 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -428,7 +428,7 @@ static void svm_range_bo_release(struct kref *kref)
>> Â
>> Â if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
>> Â /* We're not in the eviction worker. Signal the fence. */
>> - dma_fence_signal(&svm_bo->eviction_fence->base);
>> + amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>> Â dma_fence_put(&svm_bo->eviction_fence->base);
>> Â amdgpu_bo_unref(&svm_bo->bo);
>> Â kfree(svm_bo);
>> @@ -3628,7 +3628,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
>> Â mmap_read_unlock(mm);
>> Â mmput(mm);
>> Â
>> - dma_fence_signal(&svm_bo->eviction_fence->base);
>> + amdkfd_fence_signal(&svm_bo->eviction_fence->base);
>> Â
>> Â /* This is the last reference to svm_bo, after svm_range_vram_node_free
>> Â * has been called in svm_migrate_vram_to_ram
>