Current dma-buf vmap semantics require that the mapped buffer remains
in place until the corresponding vunmap has completed.
For GEM-SHMEM, this used to be guaranteed by a pin operation while creating
an S/G table in import. GEM-SHMEN can now import dma-buf objects without
creating the S/G table, so the pin is missing. Leads to page-fault errors,
such as the one shown below.
[ 102.101726] BUG: unable to handle page fault for address: ffffc90127000000
[...]
[ 102.157102] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl]
[...]
[ 102.243250] Call Trace:
[ 102.245695] <TASK>
[ 102.2477V95] ? validate_chain+0x24e/0x5e0
[ 102.251805] ? __lock_acquire+0x568/0xae0
[ 102.255807] udl_render_hline+0x165/0x341 [udl]
[ 102.260338] ? __pfx_udl_render_hline+0x10/0x10 [udl]
[ 102.265379] ? local_clock_noinstr+0xb/0x100
[ 102.269642] ? __lock_release.isra.0+0x16c/0x2e0
[ 102.274246] ? mark_held_locks+0x40/0x70
[ 102.278177] udl_primary_plane_helper_atomic_update+0x43e/0x680 [udl]
[ 102.284606] ? __pfx_udl_primary_plane_helper_atomic_update+0x10/0x10 [udl]
[ 102.291551] ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170
[ 102.297208] ? lockdep_hardirqs_on+0x88/0x130
[ 102.301554] ? _raw_spin_unlock_irq+0x24/0x50
[ 102.305901] ? wait_for_completion_timeout+0x2bb/0x3a0
[ 102.311028] ? drm_atomic_helper_calc_timestamping_constants+0x141/0x200
[ 102.317714] ? drm_atomic_helper_commit_planes+0x3b6/0x1030
[ 102.323279] drm_atomic_helper_commit_planes+0x3b6/0x1030
[ 102.328664] drm_atomic_helper_commit_tail+0x41/0xb0
[ 102.333622] commit_tail+0x204/0x330
[...]
[ 102.529946] ---[ end trace 0000000000000000 ]---
[ 102.651980] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl]
In this stack strace, udl (based on GEM-SHMEM) imported and vmap'ed a
dma-buf from amdgpu. Amdgpu relocated the buffer, thereby invalidating the
mapping.
Provide a custom dma-buf vmap method in amdgpu that pins the object before
mapping it's buffer's pages into kernel address space. Do the opposite in
vunmap.
Note that dma-buf vmap differs from GEM vmap in how it handles relocation.
While dma-buf vmap keeps the buffer in place, GEM vmap requires the caller
to keep the buffer in place. Hence, this fix is in amdgpu's dma-buf code
instead of its GEM code.
A discussion of various approaches to solving the problem is available
at [1].
v3:
- try (GTT | VRAM); drop CPU domain (Christian)
v2:
- only use mapable domains (Christian)
- try pinning to domains in preferred order
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: 660cd44659a0 ("drm/shmem-helper: Import dmabuf without mapping its sg_table")
Reported-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Closes: https://lore.kernel.org/dri-devel/ba1bdfb8-dbf7-4372-bdcb-df7e0511c702@suse…
Cc: Shixiong Ou <oushixiong(a)kylinos.cn>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Simona Vetter <simona(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Link: https://lore.kernel.org/dri-devel/9792c6c3-a2b8-4b2b-b5ba-fba19b153e21@suse… # [1]
---
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 34 +++++++++++++++++++--
1 file changed, 32 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 5743ebb2f1b7..ce27cb5bb05e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -285,6 +285,36 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ int ret;
+
+ /*
+ * Pin to keep buffer in place while it's vmap'ed. The actual
+ * domain is not that important as long as it's mapable. Using
+ * GTT and VRAM should be compatible with most use cases.
+ */
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM);
+ if (ret)
+ return ret;
+ ret = drm_gem_dmabuf_vmap(dma_buf, map);
+ if (ret)
+ amdgpu_bo_unpin(bo);
+
+ return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ drm_gem_dmabuf_vunmap(dma_buf, map);
+ amdgpu_bo_unpin(bo);
+}
+
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
.pin = amdgpu_dma_buf_pin,
@@ -294,8 +324,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
+ .vmap = amdgpu_dma_buf_vmap,
+ .vunmap = amdgpu_dma_buf_vunmap,
};
/**
--
2.50.1
Hi,
This series is the follow-up of the discussion that John and I had some
time ago here:
https://lore.kernel.org/all/CANDhNCquJn6bH3KxKf65BWiTYLVqSd9892-xtFDHHqqyrr…
The initial problem we were discussing was that I'm currently working on
a platform which has a memory layout with ECC enabled. However, enabling
the ECC has a number of drawbacks on that platform: lower performance,
increased memory usage, etc. So for things like framebuffers, the
trade-off isn't great and thus there's a memory region with ECC disabled
to allocate from for such use cases.
After a suggestion from John, I chose to first start using heap
allocations flags to allow for userspace to ask for a particular ECC
setup. This is then backed by a new heap type that runs from reserved
memory chunks flagged as such, and the existing DT properties to specify
the ECC properties.
After further discussion, it was considered that flags were not the
right solution, and relying on the names of the heaps would be enough to
let userspace know the kind of buffer it deals with.
Thus, even though the uAPI part of it has been dropped in this second
version, we still need a driver to create heaps out of carved-out memory
regions. In addition to the original usecase, a similar driver can be
found in BSPs from most vendors, so I believe it would be a useful
addition to the kernel.
Let me know what you think,
Maxime
Signed-off-by: Maxime Ripard <mripard(a)kernel.org>
---
Changes in v5:
- Rebased on 6.16-rc2
- Switch from property to dedicated binding
- Link to v4: https://lore.kernel.org/r/20250520-dma-buf-ecc-heap-v4-1-bd2e1f1bb42c@kerne…
Changes in v4:
- Rebased on 6.15-rc7
- Map buffers only when map is actually called, not at allocation time
- Deal with restricted-dma-pool and shared-dma-pool
- Reword Kconfig options
- Properly report dma_map_sgtable failures
- Link to v3: https://lore.kernel.org/r/20250407-dma-buf-ecc-heap-v3-0-97cdd36a5f29@kerne…
Changes in v3:
- Reworked global variable patch
- Link to v2: https://lore.kernel.org/r/20250401-dma-buf-ecc-heap-v2-0-043fd006a1af@kerne…
Changes in v2:
- Add vmap/vunmap operations
- Drop ECC flags uapi
- Rebase on top of 6.14
- Link to v1: https://lore.kernel.org/r/20240515-dma-buf-ecc-heap-v1-0-54cbbd049511@kerne…
---
Maxime Ripard (2):
dt-bindings: reserved-memory: Introduce carved-out memory region binding
dma-buf: heaps: Introduce a new heap for reserved memory
.../bindings/reserved-memory/carved-out.yaml | 49 +++
drivers/dma-buf/heaps/Kconfig | 8 +
drivers/dma-buf/heaps/Makefile | 1 +
drivers/dma-buf/heaps/carveout_heap.c | 362 +++++++++++++++++++++
4 files changed, 420 insertions(+)
---
base-commit: d076bed8cb108ba2236d4d49c92303fda4036893
change-id: 20240515-dma-buf-ecc-heap-28a311d2c94e
Best regards,
--
Maxime Ripard <mripard(a)kernel.org>
Current dma-buf vmap semantics require that the mapped buffer remains
in place until the corresponding vunmap has completed.
For GEM-SHMEM, this used to be guaranteed by a pin operation while creating
an S/G table in import. GEM-SHMEN can now import dma-buf objects without
creating the S/G table, so the pin is missing. Leads to page-fault errors,
such as the one shown below.
[ 102.101726] BUG: unable to handle page fault for address: ffffc90127000000
[...]
[ 102.157102] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl]
[...]
[ 102.243250] Call Trace:
[ 102.245695] <TASK>
[ 102.2477V95] ? validate_chain+0x24e/0x5e0
[ 102.251805] ? __lock_acquire+0x568/0xae0
[ 102.255807] udl_render_hline+0x165/0x341 [udl]
[ 102.260338] ? __pfx_udl_render_hline+0x10/0x10 [udl]
[ 102.265379] ? local_clock_noinstr+0xb/0x100
[ 102.269642] ? __lock_release.isra.0+0x16c/0x2e0
[ 102.274246] ? mark_held_locks+0x40/0x70
[ 102.278177] udl_primary_plane_helper_atomic_update+0x43e/0x680 [udl]
[ 102.284606] ? __pfx_udl_primary_plane_helper_atomic_update+0x10/0x10 [udl]
[ 102.291551] ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170
[ 102.297208] ? lockdep_hardirqs_on+0x88/0x130
[ 102.301554] ? _raw_spin_unlock_irq+0x24/0x50
[ 102.305901] ? wait_for_completion_timeout+0x2bb/0x3a0
[ 102.311028] ? drm_atomic_helper_calc_timestamping_constants+0x141/0x200
[ 102.317714] ? drm_atomic_helper_commit_planes+0x3b6/0x1030
[ 102.323279] drm_atomic_helper_commit_planes+0x3b6/0x1030
[ 102.328664] drm_atomic_helper_commit_tail+0x41/0xb0
[ 102.333622] commit_tail+0x204/0x330
[...]
[ 102.529946] ---[ end trace 0000000000000000 ]---
[ 102.651980] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl]
In this stack strace, udl (based on GEM-SHMEM) imported and vmap'ed a
dma-buf from amdgpu. Amdgpu relocated the buffer, thereby invalidating the
mapping.
Provide a custom dma-buf vmap method in amdgpu that pins the object before
mapping it's buffer's pages into kernel address space. Do the opposite in
vunmap.
Note that dma-buf vmap differs from GEM vmap in how it handles relocation.
While dma-buf vmap keeps the buffer in place, GEM vmap requires the caller
to keep the buffer in place. Hence, this fix is in amdgpu's dma-buf code
instead of its GEM code.
A discussion of various approaches to solving the problem is available
at [1].
v2:
- only use mapable domains (Christian)
- try pinning to domains in prefered order
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: 660cd44659a0 ("drm/shmem-helper: Import dmabuf without mapping its sg_table")
Reported-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Closes: https://lore.kernel.org/dri-devel/ba1bdfb8-dbf7-4372-bdcb-df7e0511c702@suse…
Cc: Shixiong Ou <oushixiong(a)kylinos.cn>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Simona Vetter <simona(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Link: https://lore.kernel.org/dri-devel/9792c6c3-a2b8-4b2b-b5ba-fba19b153e21@suse… # [1]
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 41 ++++++++++++++++++++-
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 5743ebb2f1b7..471b41bd3e29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -285,6 +285,43 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ int ret;
+
+ /*
+ * Pin to keep buffer in place while it's vmap'ed. The actual
+ * domain is not that important as long as it's mapable. Using
+ * GTT should be compatible with most use cases. VRAM and CPU
+ * are the fallbacks if the buffer has already been pinned there.
+ */
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ if (ret) {
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_CPU);
+ if (ret)
+ return ret;
+ }
+ }
+ ret = drm_gem_dmabuf_vmap(dma_buf, map);
+ if (ret)
+ amdgpu_bo_unpin(bo);
+
+ return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ drm_gem_dmabuf_vunmap(dma_buf, map);
+ amdgpu_bo_unpin(bo);
+}
+
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
.pin = amdgpu_dma_buf_pin,
@@ -294,8 +331,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
+ .vmap = amdgpu_dma_buf_vmap,
+ .vunmap = amdgpu_dma_buf_vunmap,
};
/**
--
2.50.1
On Mon, 18 Aug 2025 16:54:23 +0530 Meghana Malladi wrote:
> @@ -1332,6 +1350,13 @@ static int prueth_xsk_wakeup(struct net_device *ndev, u32 qid, u32 flags)
> }
> }
>
> + if (flags & XDP_WAKEUP_RX) {
> + if (!napi_if_scheduled_mark_missed(&emac->napi_rx)) {
> + if (likely(napi_schedule_prep(&emac->napi_rx)))
> + __napi_schedule(&emac->napi_rx);
> + }
> + }
> +
> return 0;
I suspect this series is generated against old source or there's
another conflicting series in flight, because git ends up applying
this chunk to prueth_xsk_pool_disable() :S
Before you proceed with AF_XDP could you make this driver build under
COMPILE_TEST on x86? This is very easy to miss, luckily we got an off
list report but its pure luck. And obviously much more effort for the
maintainers to investigate than if it was caught by the CI.
--
pw-bot: cr
On 19.08.25 11:04, Janusz Krzysztofik wrote:
> On Monday, 18 August 2025 16:42:56 CEST Christian König wrote:
>> On 18.08.25 16:30, Janusz Krzysztofik wrote:
>>> Hi Christian,
>>>
>>> On Thursday, 14 August 2025 14:24:29 CEST Christian König wrote:
>>>>
>>>> On 14.08.25 10:16, Janusz Krzysztofik wrote:
>>>>> When first user starts waiting on a not yet signaled fence of a chain
>>>>> link, a dma_fence_chain callback is added to a user fence of that link.
>>>>> When the user fence of that chain link is then signaled, the chain is
>>>>> traversed in search for a first not signaled link and the callback is
>>>>> rearmed on a user fence of that link.
>>>>>
>>>>> Since chain fences may be exposed to user space, e.g. over drm_syncobj
>>>>> IOCTLs, users may start waiting on any link of the chain, then many links
>>>>> of a chain may have signaling enabled and their callbacks added to their
>>>>> user fences. Once an arbitrary user fence is signaled, all
>>>>> dma_fence_chain callbacks added to it so far must be rearmed to another
>>>>> user fence of the chain. In extreme scenarios, when all N links of a
>>>>> chain are awaited and then signaled in reverse order, the dma_fence_chain
>>>>> callback may be called up to N * (N + 1) / 2 times (an arithmetic series).
>>>>>
>>>>> To avoid that potential excessive accumulation of dma_fence_chain
>>>>> callbacks, rearm a trimmed-down, signal only callback version to the base
>>>>> fence of a previous link, if not yet signaled, otherwise just signal the
>>>>> base fence of the current link instead of traversing the chain in search
>>>>> for a first not signaled link and moving all callbacks collected so far to
>>>>> a user fence of that link.
>>>>
>>>> Well clear NAK to that! You can easily overflow the kernel stack with that!
>>>
>>> I'll be happy to propose a better solution, but for that I need to understand
>>> better your message. Could you please point out an exact piece of the
>>> proposed code and/or describe a scenario where you can see the risk of stack
>>> overflow?
>>
>> The sentence "rearm .. to the base fence of a previous link" sounds like you are trying to install a callback on the signaling to the previous chain element.
>>
>> That is exactly what I pointed out previously where you need to be super careful because when this chain signals the callbacks will execute recursively which means that you can trivially overflow the kernel stack if you have more than a handful of chain elements.
>>
>> In other words A waits for B, B waits for C, C waits for D etc.... when D finally signals it will call C which in turn calls B which in turn calls A.
>
> OK, maybe my commit description was not precise enough, however, I didn't
> describe implementation details (how) intentionally.
> When D signals then it doesn't call C directly, only it submits an irq work
> that calls C. Then C doesn't just call B, only it submits another irq work
> that calls B, and so on.
> Doesn't that code pattern effectively break the recursion loop into separate
> work items, each with its own separate stack?
No, it's architecture dependent if the irq_work executes on a separate stack or not.
You would need a work_struct to really separate the two and I would reject that because it adds additional latency to the signaling path.
>>
>> Even if the chain is a recursive data structure you absolutely can't use recursion for the handling of it.
>>
>> Maybe I misunderstood your textual description but reading a sentence like this rings all alarm bells here. Otherwise I can't see what the patch is supposed to be optimizing.
>
> OK, maybe I should start my commit description of this patch with a copy of
> the first sentence from cover letter and also from patch 1/4 description that
> informs about the problem as reported by CI. Maybe I should also provide a
> comparison of measured signaling times from trybot executions [1][2][3].
> Here are example numbers from CI machine fi-bsw-n3050:
Yeah and I've pointed out before that this is irrelevant.
The problem is *not* the dma_fence_chain implementation, that one is doing exactly what is expected to do.
The problem is that the test case is nonsense. I've pointed that out numerous times now!
Regards,
Christian.
>
> With signaling time reports only added to selftests (patch 1 of 4):
> <6> [777.914451] dma-buf: Running dma_fence_chain/wait_forward
> <6> [778.123516] wait_forward: 4096 signals in 21373487 ns
> <6> [778.335709] dma-buf: Running dma_fence_chain/wait_backward
> <6> [795.791546] wait_backward: 4096 signals in 17249051192 ns
> <6> [795.859699] dma-buf: Running dma_fence_chain/wait_random
> <6> [796.161375] wait_random: 4096 signals in 97386256 ns
>
> With dma_fence_enable_signaling() replaced in selftests with dma_fence_wait()
> (patches 1-3 of 4):
> <6> [782.505692] dma-buf: Running dma_fence_chain/wait_forward
> <6> [784.609213] wait_forward: 4096 signals in 36513103 ns
> <3> [784.837226] Reported -4 for kthread_stop_put(0)!
> <6> [785.147643] dma-buf: Running dma_fence_chain/wait_backward
> <6> [806.367763] wait_backward: 4096 signals in 18428009499 ns
> <6> [807.175325] dma-buf: Running dma_fence_chain/wait_random
> <6> [809.453942] wait_random: 4096 signals in 119761950 ns
>
> With the fix (patches 1-4 of 4):
> <6> [731.519020] dma-buf: Running dma_fence_chain/wait_forward
> <6> [733.623375] wait_forward: 4096 signals in 31890220 ns
> <6> [734.258972] dma-buf: Running dma_fence_chain/wait_backward
> <6> [736.267325] wait_backward: 4096 signals in 39007955 ns
> <6> [736.700221] dma-buf: Running dma_fence_chain/wait_random
> <6> [739.346706] wait_random: 4096 signals in 48384865 ns
>
> Signaling time in wait_backward selftest has been reduced from 17s to 39ms.
>
> [1] https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_152785v1/index.html?
> [2] https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_152828v2/index.html?
> [3] https://intel-gfx-ci.01.org/tree/drm-tip/Trybot_152830v2/index.html?
>
>>
>>>>
>>>> Additional to this messing with the fence ops outside of the dma_fence code is an absolute no-go.
>>>
>>> Could you please explain what piece of code you are referring to when you say
>>> "messing with the fence ops outside the dma_fence code"? If not this patch
>>> then which particular one of this series did you mean? I'm assuming you
>>> didn't mean drm_syncobj code that I mentioned in my commit descriptions.
>>
>> See below.
>>
>>>
>>> Thanks,
>>> Janusz
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12904
>>>>> Suggested-by: Chris Wilson <chris.p.wilson(a)linux.intel.com>
>>>>> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik(a)linux.intel.com>
>>>>> ---
>>>>> drivers/dma-buf/dma-fence-chain.c | 101 +++++++++++++++++++++++++-----
>>>>> 1 file changed, 84 insertions(+), 17 deletions(-)
>>>>>
>>>>> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
>>>>> index a8a90acf4f34d..90eff264ee05c 100644
>>>>> --- a/drivers/dma-buf/dma-fence-chain.c
>>>>> +++ b/drivers/dma-buf/dma-fence-chain.c
>>>>> @@ -119,46 +119,113 @@ static const char *dma_fence_chain_get_timeline_name(struct dma_fence *fence)
>>>>> return "unbound";
>>>>> }
>>>>>
>>>>> -static void dma_fence_chain_irq_work(struct irq_work *work)
>>>>> +static void signal_irq_work(struct irq_work *work)
>>>>> {
>>>>> struct dma_fence_chain *chain;
>>>>>
>>>>> chain = container_of(work, typeof(*chain), work);
>>>>>
>>>>> - /* Try to rearm the callback */
>>>>> - if (!dma_fence_chain_enable_signaling(&chain->base))
>>>>> - /* Ok, we are done. No more unsignaled fences left */
>>>>> - dma_fence_signal(&chain->base);
>>>>> + dma_fence_signal(&chain->base);
>>>>> dma_fence_put(&chain->base);
>>>>> }
>>>>>
>>>>> -static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
>>>>> +static void signal_cb(struct dma_fence *f, struct dma_fence_cb *cb)
>>>>> +{
>>>>> + struct dma_fence_chain *chain;
>>>>> +
>>>>> + chain = container_of(cb, typeof(*chain), cb);
>>>>> + init_irq_work(&chain->work, signal_irq_work);
>>>>> + irq_work_queue(&chain->work);
>>>>> +}
>>>>> +
>>>>> +static void rearm_irq_work(struct irq_work *work)
>>>>> +{
>>>>> + struct dma_fence_chain *chain;
>>>>> + struct dma_fence *prev;
>>>>> +
>>>>> + chain = container_of(work, typeof(*chain), work);
>>>>> +
>>>>> + rcu_read_lock();
>>>>> + prev = rcu_dereference(chain->prev);
>>>>> + if (prev && dma_fence_add_callback(prev, &chain->cb, signal_cb))
>>>>> + prev = NULL;
>>>>> + rcu_read_unlock();
>>>>> + if (prev)
>>>>> + return;
>>>>> +
>>>>> + /* Ok, we are done. No more unsignaled fences left */
>>>>> + signal_irq_work(work);
>>>>> +}
>>>>> +
>>>>> +static inline bool fence_is_signaled__nested(struct dma_fence *fence)
>>>>> +{
>>>>> + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
>>>>> + return true;
>>>>> +
>>
>>>>> + if (fence->ops->signaled && fence->ops->signaled(fence)) {
>>
>> Calling this outside of dma-fence.[ch] is a clear no-go.
>
> But this patch applies only to drivers/dma-buf/dma-fence-chain.c, not
> outside of it.
>
> Thanks,
> Janusz
>
>>
>> Regards,
>> Christian.
>>
>>>>> + unsigned long flags;
>>>>> +
>>>>> + spin_lock_irqsave_nested(fence->lock, flags, SINGLE_DEPTH_NESTING);
>>>>> + dma_fence_signal_locked(fence);
>>>>> + spin_unlock_irqrestore(fence->lock, flags);
>>>>> +
>>>>> + return true;
>>>>> + }
>>>>> +
>>>>> + return false;
>>>>> +}
>>>>> +
>>>>> +static bool prev_is_signaled(struct dma_fence_chain *chain)
>>>>> +{
>>>>> + struct dma_fence *prev;
>>>>> + bool result;
>>>>> +
>>>>> + rcu_read_lock();
>>>>> + prev = rcu_dereference(chain->prev);
>>>>> + result = !prev || fence_is_signaled__nested(prev);
>>>>> + rcu_read_unlock();
>>>>> +
>>>>> + return result;
>>>>> +}
>>>>> +
>>>>> +static void rearm_or_signal_cb(struct dma_fence *f, struct dma_fence_cb *cb)
>>>>> {
>>>>> struct dma_fence_chain *chain;
>>>>>
>>>>> chain = container_of(cb, typeof(*chain), cb);
>>>>> - init_irq_work(&chain->work, dma_fence_chain_irq_work);
>>>>> + if (prev_is_signaled(chain)) {
>>>>> + /* Ok, we are done. No more unsignaled fences left */
>>>>> + init_irq_work(&chain->work, signal_irq_work);
>>>>> + } else {
>>>>> + /* Try to rearm the callback */
>>>>> + init_irq_work(&chain->work, rearm_irq_work);
>>>>> + }
>>>>> +
>>>>> irq_work_queue(&chain->work);
>>>>> - dma_fence_put(f);
>>>>> }
>>>>>
>>>>> static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>>>>> {
>>>>> struct dma_fence_chain *head = to_dma_fence_chain(fence);
>>>>> + int err = -ENOENT;
>>>>>
>>>>> - dma_fence_get(&head->base);
>>>>> - dma_fence_chain_for_each(fence, &head->base) {
>>>>> - struct dma_fence *f = dma_fence_chain_contained(fence);
>>>>> + if (WARN_ON(!head))
>>>>> + return false;
>>>>>
>>>>> - dma_fence_get(f);
>>>>> - if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
>>>>> + dma_fence_get(fence);
>>>>> + if (head->fence)
>>>>> + err = dma_fence_add_callback(head->fence, &head->cb, rearm_or_signal_cb);
>>>>> + if (err) {
>>>>> + if (prev_is_signaled(head)) {
>>>>> dma_fence_put(fence);
>>>>> - return true;
>>>>> + } else {
>>>>> + init_irq_work(&head->work, rearm_irq_work);
>>>>> + irq_work_queue(&head->work);
>>>>> + err = 0;
>>>>> }
>>>>> - dma_fence_put(f);
>>>>> }
>>>>> - dma_fence_put(&head->base);
>>>>> - return false;
>>>>> +
>>>>> + return !err;
>>>>> }
>>>>>
>>>>> static bool dma_fence_chain_signaled(struct dma_fence *fence)
>>>>
>>>>
>>>
>>>
>>>
>>>
>>
>>
>
>
>
>
Current dma-buf vmap semantics require that the mapped buffer remains
in place until the corresponding vunmap has completed.
For GEM-SHMEM, this used to be guaranteed by a pin operation while creating
an S/G table in import. GEM-SHMEN can now import dma-buf objects without
creating the S/G table, so the pin is missing. Leads to page-fault errors,
such as the one shown below.
[ 102.101726] BUG: unable to handle page fault for address: ffffc90127000000
[...]
[ 102.157102] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl]
[...]
[ 102.243250] Call Trace:
[ 102.245695] <TASK>
[ 102.2477V95] ? validate_chain+0x24e/0x5e0
[ 102.251805] ? __lock_acquire+0x568/0xae0
[ 102.255807] udl_render_hline+0x165/0x341 [udl]
[ 102.260338] ? __pfx_udl_render_hline+0x10/0x10 [udl]
[ 102.265379] ? local_clock_noinstr+0xb/0x100
[ 102.269642] ? __lock_release.isra.0+0x16c/0x2e0
[ 102.274246] ? mark_held_locks+0x40/0x70
[ 102.278177] udl_primary_plane_helper_atomic_update+0x43e/0x680 [udl]
[ 102.284606] ? __pfx_udl_primary_plane_helper_atomic_update+0x10/0x10 [udl]
[ 102.291551] ? lockdep_hardirqs_on_prepare.part.0+0x92/0x170
[ 102.297208] ? lockdep_hardirqs_on+0x88/0x130
[ 102.301554] ? _raw_spin_unlock_irq+0x24/0x50
[ 102.305901] ? wait_for_completion_timeout+0x2bb/0x3a0
[ 102.311028] ? drm_atomic_helper_calc_timestamping_constants+0x141/0x200
[ 102.317714] ? drm_atomic_helper_commit_planes+0x3b6/0x1030
[ 102.323279] drm_atomic_helper_commit_planes+0x3b6/0x1030
[ 102.328664] drm_atomic_helper_commit_tail+0x41/0xb0
[ 102.333622] commit_tail+0x204/0x330
[...]
[ 102.529946] ---[ end trace 0000000000000000 ]---
[ 102.651980] RIP: 0010:udl_compress_hline16+0x219/0x940 [udl]
In this stack strace, udl (based on GEM-SHMEM) imported and vmap'ed a
dma-buf from amdgpu. Amdgpu relocated the buffer, thereby invalidating the
mapping.
Provide a custom dma-buf vmap method in amdgpu that pins the object before
mapping it's buffer's pages into kernel address space. Do the opposite in
vunmap.
Note that dma-buf vmap differs from GEM vmap in how it handles relocation.
While dma-buf vmap keeps the buffer in place, GEM vmap requires the caller
to keep the buffer in place. Hence, this fix is in amdgpu's dma-buf code
instead of its GEM code.
A discussion of various approaches to solving the problem is available
at [1].
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Fixes: 660cd44659a0 ("drm/shmem-helper: Import dmabuf without mapping its sg_table")
Reported-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Closes: https://lore.kernel.org/dri-devel/ba1bdfb8-dbf7-4372-bdcb-df7e0511c702@suse…
Cc: Shixiong Ou <oushixiong(a)kylinos.cn>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Simona Vetter <simona(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Link: https://lore.kernel.org/dri-devel/9792c6c3-a2b8-4b2b-b5ba-fba19b153e21@suse… # [1]
---
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 36 +++++++++++++++++++--
1 file changed, 34 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 5743ebb2f1b7..5b33776eeece 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -285,6 +285,38 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ int ret;
+
+ /*
+ * Pin to keep buffer in place while it's vmap'ed. The actual
+ * location is not important as long as it's mapable.
+ *
+ * This code is required for exporting to GEM-SHMEM without S/G table.
+ * Once GEM-SHMEM supports dynamic imports, it should be dropped.
+ */
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_MASK);
+ if (ret)
+ return ret;
+ ret = drm_gem_dmabuf_vmap(dma_buf, map);
+ if (ret)
+ amdgpu_bo_unpin(bo);
+
+ return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ drm_gem_dmabuf_vunmap(dma_buf, map);
+ amdgpu_bo_unpin(bo);
+}
+
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
.pin = amdgpu_dma_buf_pin,
@@ -294,8 +326,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
- .vmap = drm_gem_dmabuf_vmap,
- .vunmap = drm_gem_dmabuf_vunmap,
+ .vmap = amdgpu_dma_buf_vmap,
+ .vunmap = amdgpu_dma_buf_vunmap,
};
/**
--
2.50.1
On 18.08.25 16:30, Janusz Krzysztofik wrote:
> Hi Christian,
>
> On Thursday, 14 August 2025 14:24:29 CEST Christian König wrote:
>>
>> On 14.08.25 10:16, Janusz Krzysztofik wrote:
>>> When first user starts waiting on a not yet signaled fence of a chain
>>> link, a dma_fence_chain callback is added to a user fence of that link.
>>> When the user fence of that chain link is then signaled, the chain is
>>> traversed in search for a first not signaled link and the callback is
>>> rearmed on a user fence of that link.
>>>
>>> Since chain fences may be exposed to user space, e.g. over drm_syncobj
>>> IOCTLs, users may start waiting on any link of the chain, then many links
>>> of a chain may have signaling enabled and their callbacks added to their
>>> user fences. Once an arbitrary user fence is signaled, all
>>> dma_fence_chain callbacks added to it so far must be rearmed to another
>>> user fence of the chain. In extreme scenarios, when all N links of a
>>> chain are awaited and then signaled in reverse order, the dma_fence_chain
>>> callback may be called up to N * (N + 1) / 2 times (an arithmetic series).
>>>
>>> To avoid that potential excessive accumulation of dma_fence_chain
>>> callbacks, rearm a trimmed-down, signal only callback version to the base
>>> fence of a previous link, if not yet signaled, otherwise just signal the
>>> base fence of the current link instead of traversing the chain in search
>>> for a first not signaled link and moving all callbacks collected so far to
>>> a user fence of that link.
>>
>> Well clear NAK to that! You can easily overflow the kernel stack with that!
>
> I'll be happy to propose a better solution, but for that I need to understand
> better your message. Could you please point out an exact piece of the
> proposed code and/or describe a scenario where you can see the risk of stack
> overflow?
The sentence "rearm .. to the base fence of a previous link" sounds like you are trying to install a callback on the signaling to the previous chain element.
That is exactly what I pointed out previously where you need to be super careful because when this chain signals the callbacks will execute recursively which means that you can trivially overflow the kernel stack if you have more than a handful of chain elements.
In other words A waits for B, B waits for C, C waits for D etc.... when D finally signals it will call C which in turn calls B which in turn calls A.
Even if the chain is a recursive data structure you absolutely can't use recursion for the handling of it.
Maybe I misunderstood your textual description but reading a sentence like this rings all alarm bells here. Otherwise I can't see what the patch is supposed to be optimizing.
>>
>> Additional to this messing with the fence ops outside of the dma_fence code is an absolute no-go.
>
> Could you please explain what piece of code you are referring to when you say
> "messing with the fence ops outside the dma_fence code"? If not this patch
> then which particular one of this series did you mean? I'm assuming you
> didn't mean drm_syncobj code that I mentioned in my commit descriptions.
See below.
>
> Thanks,
> Janusz
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12904
>>> Suggested-by: Chris Wilson <chris.p.wilson(a)linux.intel.com>
>>> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik(a)linux.intel.com>
>>> ---
>>> drivers/dma-buf/dma-fence-chain.c | 101 +++++++++++++++++++++++++-----
>>> 1 file changed, 84 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
>>> index a8a90acf4f34d..90eff264ee05c 100644
>>> --- a/drivers/dma-buf/dma-fence-chain.c
>>> +++ b/drivers/dma-buf/dma-fence-chain.c
>>> @@ -119,46 +119,113 @@ static const char *dma_fence_chain_get_timeline_name(struct dma_fence *fence)
>>> return "unbound";
>>> }
>>>
>>> -static void dma_fence_chain_irq_work(struct irq_work *work)
>>> +static void signal_irq_work(struct irq_work *work)
>>> {
>>> struct dma_fence_chain *chain;
>>>
>>> chain = container_of(work, typeof(*chain), work);
>>>
>>> - /* Try to rearm the callback */
>>> - if (!dma_fence_chain_enable_signaling(&chain->base))
>>> - /* Ok, we are done. No more unsignaled fences left */
>>> - dma_fence_signal(&chain->base);
>>> + dma_fence_signal(&chain->base);
>>> dma_fence_put(&chain->base);
>>> }
>>>
>>> -static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
>>> +static void signal_cb(struct dma_fence *f, struct dma_fence_cb *cb)
>>> +{
>>> + struct dma_fence_chain *chain;
>>> +
>>> + chain = container_of(cb, typeof(*chain), cb);
>>> + init_irq_work(&chain->work, signal_irq_work);
>>> + irq_work_queue(&chain->work);
>>> +}
>>> +
>>> +static void rearm_irq_work(struct irq_work *work)
>>> +{
>>> + struct dma_fence_chain *chain;
>>> + struct dma_fence *prev;
>>> +
>>> + chain = container_of(work, typeof(*chain), work);
>>> +
>>> + rcu_read_lock();
>>> + prev = rcu_dereference(chain->prev);
>>> + if (prev && dma_fence_add_callback(prev, &chain->cb, signal_cb))
>>> + prev = NULL;
>>> + rcu_read_unlock();
>>> + if (prev)
>>> + return;
>>> +
>>> + /* Ok, we are done. No more unsignaled fences left */
>>> + signal_irq_work(work);
>>> +}
>>> +
>>> +static inline bool fence_is_signaled__nested(struct dma_fence *fence)
>>> +{
>>> + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
>>> + return true;
>>> +
>>> + if (fence->ops->signaled && fence->ops->signaled(fence)) {
Calling this outside of dma-fence.[ch] is a clear no-go.
Regards,
Christian.
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave_nested(fence->lock, flags, SINGLE_DEPTH_NESTING);
>>> + dma_fence_signal_locked(fence);
>>> + spin_unlock_irqrestore(fence->lock, flags);
>>> +
>>> + return true;
>>> + }
>>> +
>>> + return false;
>>> +}
>>> +
>>> +static bool prev_is_signaled(struct dma_fence_chain *chain)
>>> +{
>>> + struct dma_fence *prev;
>>> + bool result;
>>> +
>>> + rcu_read_lock();
>>> + prev = rcu_dereference(chain->prev);
>>> + result = !prev || fence_is_signaled__nested(prev);
>>> + rcu_read_unlock();
>>> +
>>> + return result;
>>> +}
>>> +
>>> +static void rearm_or_signal_cb(struct dma_fence *f, struct dma_fence_cb *cb)
>>> {
>>> struct dma_fence_chain *chain;
>>>
>>> chain = container_of(cb, typeof(*chain), cb);
>>> - init_irq_work(&chain->work, dma_fence_chain_irq_work);
>>> + if (prev_is_signaled(chain)) {
>>> + /* Ok, we are done. No more unsignaled fences left */
>>> + init_irq_work(&chain->work, signal_irq_work);
>>> + } else {
>>> + /* Try to rearm the callback */
>>> + init_irq_work(&chain->work, rearm_irq_work);
>>> + }
>>> +
>>> irq_work_queue(&chain->work);
>>> - dma_fence_put(f);
>>> }
>>>
>>> static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>>> {
>>> struct dma_fence_chain *head = to_dma_fence_chain(fence);
>>> + int err = -ENOENT;
>>>
>>> - dma_fence_get(&head->base);
>>> - dma_fence_chain_for_each(fence, &head->base) {
>>> - struct dma_fence *f = dma_fence_chain_contained(fence);
>>> + if (WARN_ON(!head))
>>> + return false;
>>>
>>> - dma_fence_get(f);
>>> - if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
>>> + dma_fence_get(fence);
>>> + if (head->fence)
>>> + err = dma_fence_add_callback(head->fence, &head->cb, rearm_or_signal_cb);
>>> + if (err) {
>>> + if (prev_is_signaled(head)) {
>>> dma_fence_put(fence);
>>> - return true;
>>> + } else {
>>> + init_irq_work(&head->work, rearm_irq_work);
>>> + irq_work_queue(&head->work);
>>> + err = 0;
>>> }
>>> - dma_fence_put(f);
>>> }
>>> - dma_fence_put(&head->base);
>>> - return false;
>>> +
>>> + return !err;
>>> }
>>>
>>> static bool dma_fence_chain_signaled(struct dma_fence *fence)
>>
>>
>
>
>
>