Fix the dup_sg_table function to initialize the dma_address of the new
sg list entries instead of the source dma_address entries.
Since ION duplicates the sg_list this issue does not appear to result in
an actual bug.
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
Acked-by: Laura Abbott <labbott(a)redhat.com>
---
Changes in v2:
- Add to commit message that it doesn't cause an actual bug
- Remove 'Fixes:' since it doesn't cause a bug
- Add Acked-by from Laura Abbott
drivers/staging/android/ion/ion.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 57e0d8035b2e..517d4f40d1b7 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -187,7 +187,7 @@ static struct sg_table *dup_sg_table(struct sg_table *table)
new_sg = new_table->sgl;
for_each_sg(table->sgl, sg, table->nents, i) {
memcpy(new_sg, sg, sizeof(*sg));
- sg->dma_address = 0;
+ new_sg->dma_address = 0;
new_sg = sg_next(new_sg);
}
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Fix the dup_sg_table function to initialize the dma_address of the new
sg list entries instead of the source dma_address entries.
Fixes: 17fd283f3870 ("staging: android: ion: Duplicate sg_table")
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
drivers/staging/android/ion/ion.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index f480885e346b..3ace3a0d9210 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -197,7 +197,7 @@ static struct sg_table *dup_sg_table(struct sg_table *table)
new_sg = new_table->sgl;
for_each_sg(table->sgl, sg, table->nents, i) {
memcpy(new_sg, sg, sizeof(*sg));
- sg->dma_address = 0;
+ new_sg->dma_address = 0;
new_sg = sg_next(new_sg);
}
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
The ION begin_cpu_access and end_cpu_access functions use the
dma_sync_sg_for_cpu and dma_sync_sg_for_device APIs to perform cache
maintenance.
Currently it is possible to apply cache maintenance, via the
begin_cpu_access and end_cpu_access APIs, to ION buffers which are not
dma mapped.
The dma sync sg APIs should not be called on sg lists which have not been
dma mapped as this can result in cache maintenance being applied to the
wrong address. If an sg list has not been dma mapped then its dma_address
field has not been populated, some dma ops such as the swiotlb_dma_ops ops
use the dma_address field to calculate the address onto which to apply
cache maintenance.
Fix the ION begin_cpu_access and end_cpu_access functions to only apply
cache maintenance to buffers which have been dma mapped.
Fixes: 2a55e7b5e544 ("staging: android: ion: Call dma_map_sg for syncing and mapping")
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
drivers/staging/android/ion/ion.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index f480885e346b..e5df5272823d 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -214,6 +214,7 @@ struct ion_dma_buf_attachment {
struct device *dev;
struct sg_table *table;
struct list_head list;
+ bool dma_mapped;
};
static int ion_dma_buf_attach(struct dma_buf *dmabuf, struct device *dev,
@@ -235,6 +236,7 @@ static int ion_dma_buf_attach(struct dma_buf *dmabuf, struct device *dev,
a->table = table;
a->dev = dev;
+ a->dma_mapped = false;
INIT_LIST_HEAD(&a->list);
attachment->priv = a;
@@ -272,6 +274,7 @@ static struct sg_table *ion_map_dma_buf(struct dma_buf_attachment *attachment,
direction))
return ERR_PTR(-ENOMEM);
+ a->dma_mapped = true;
return table;
}
@@ -279,7 +282,10 @@ static void ion_unmap_dma_buf(struct dma_buf_attachment *attachment,
struct sg_table *table,
enum dma_data_direction direction)
{
+ struct ion_dma_buf_attachment *a = attachment->priv;
+
dma_unmap_sg(attachment->dev, table->sgl, table->nents, direction);
+ a->dma_mapped = false;
}
static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
@@ -345,8 +351,9 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
- dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
- direction);
+ if (a->dma_mapped)
+ dma_sync_sg_for_cpu(a->dev, a->table->sgl,
+ a->table->nents, direction);
}
mutex_unlock(&buffer->lock);
@@ -367,8 +374,9 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
- dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
- direction);
+ if (a->dma_mapped)
+ dma_sync_sg_for_device(a->dev, a->table->sgl,
+ a->table->nents, direction);
}
mutex_unlock(&buffer->lock);
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Since commit 204f672255c2 ("staging: android: ion: Use CMA APIs directly")
the CMA API is now used directly and therefore the allocated memory is no
longer automatically zeroed.
Explicitly zero CMA allocated memory to ensure that no data is exposed to
userspace.
Fixes: 204f672255c2 ("staging: android: ion: Use CMA APIs directly")
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
Changes in v2:
- Clean up the commit message.
- Add 'Fixes:'
Changes in v3:
- Add support for highmem pages
drivers/staging/android/ion/ion_cma_heap.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index 86196ffd2faf..fa3e4b7e0c9f 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -21,6 +21,7 @@
#include <linux/err.h>
#include <linux/cma.h>
#include <linux/scatterlist.h>
+#include <linux/highmem.h>
#include "ion.h"
@@ -51,6 +52,22 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
if (!pages)
return -ENOMEM;
+ if (PageHighMem(pages)) {
+ unsigned long nr_clear_pages = nr_pages;
+ struct page *page = pages;
+
+ while (nr_clear_pages > 0) {
+ void *vaddr = kmap_atomic(page);
+
+ memset(vaddr, 0, PAGE_SIZE);
+ kunmap_atomic(vaddr);
+ page++;
+ nr_clear_pages--;
+ }
+ } else {
+ memset(page_address(pages), 0, size);
+ }
+
table = kmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
goto err;
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Since commit 204f672255c2 ("staging: android: ion: Use CMA APIs directly")
the CMA API is now used directly and therefore the allocated memory is no
longer automatically zeroed.
Explicitly zero CMA allocated memory to ensure that no data is exposed to
userspace.
Fixes: 204f672255c2 ("staging: android: ion: Use CMA APIs directly")
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
Changes in v2:
- Clean up the commit message.
- Add 'Fixes:'
drivers/staging/android/ion/ion_cma_heap.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index 86196ffd2faf..91a98785607a 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -51,6 +51,8 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
if (!pages)
return -ENOMEM;
+ memset(page_address(pages), 0, size);
+
table = kmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
goto err;
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Since the CMA API is now used directly the allocated memory is no longer
automatically zeroed.
Explicitly zero CMA allocated memory to ensure that no data is exposed
to userspace.
Change-Id: I08e143707a0d31610821a7f16826c262bf3c1999
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
drivers/staging/android/ion/ion_cma_heap.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index 86196ff..91a9878 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -51,6 +51,8 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
if (!pages)
return -ENOMEM;
+ memset(page_address(pages), 0, size);
+
table = kmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
goto err;
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Change reservation_object_get_fences_rcu to make the exclusive fence
pointer optional.
If not specified the exclusive fence is put into the fence array as
well.
This is helpful for a couple of cases where we need all fences in a
single array.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
---
drivers/dma-buf/reservation.c | 31 ++++++++++++++++++++++---------
1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index b759a569b7b8..461afa9febd4 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -374,8 +374,9 @@ EXPORT_SYMBOL(reservation_object_copy_fences);
* @pshared: the array of shared fence ptrs returned (array is krealloc'd to
* the required size, and must be freed by caller)
*
- * RETURNS
- * Zero or -errno
+ * Retrieve all fences from the reservation object. If the pointer for the
+ * exclusive fence is not specified the fence is put into the array of the
+ * shared fences as well. Returns either zero or -ENOMEM.
*/
int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct dma_fence **pfence_excl,
@@ -389,8 +390,8 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
do {
struct reservation_object_list *fobj;
- unsigned seq;
- unsigned int i;
+ unsigned int i, seq;
+ size_t sz = 0;
shared_count = i = 0;
@@ -402,9 +403,14 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
goto unlock;
fobj = rcu_dereference(obj->fence);
- if (fobj) {
+ if (fobj)
+ sz += sizeof(*shared) * fobj->shared_max;
+
+ if (!pfence_excl && fence_excl)
+ sz += sizeof(*shared);
+
+ if (sz) {
struct dma_fence **nshared;
- size_t sz = sizeof(*shared) * fobj->shared_max;
nshared = krealloc(shared, sz,
GFP_NOWAIT | __GFP_NOWARN);
@@ -420,13 +426,19 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
break;
}
shared = nshared;
- shared_count = fobj->shared_count;
-
+ shared_count = fobj ? fobj->shared_count : 0;
for (i = 0; i < shared_count; ++i) {
shared[i] = rcu_dereference(fobj->shared[i]);
if (!dma_fence_get_rcu(shared[i]))
break;
}
+
+ if (!pfence_excl && fence_excl) {
+ shared[i] = fence_excl;
+ fence_excl = NULL;
+ ++i;
+ ++shared_count;
+ }
}
if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
@@ -448,7 +460,8 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
*pshared_count = shared_count;
*pshared = shared;
- *pfence_excl = fence_excl;
+ if (pfence_excl)
+ *pfence_excl = fence_excl;
return ret;
}
--
2.14.1
Change reservation_object_get_fences_rcu to make the exclusive fence
pointer optional.
If not specified the exclusive fence is put into the fence array as
well.
This is helpful for a couple of cases where we need all fences in a
single array.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
---
drivers/dma-buf/reservation.c | 31 ++++++++++++++++++++++---------
1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index b759a569b7b8..461afa9febd4 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -374,8 +374,9 @@ EXPORT_SYMBOL(reservation_object_copy_fences);
* @pshared: the array of shared fence ptrs returned (array is krealloc'd to
* the required size, and must be freed by caller)
*
- * RETURNS
- * Zero or -errno
+ * Retrieve all fences from the reservation object. If the pointer for the
+ * exclusive fence is not specified the fence is put into the array of the
+ * shared fences as well. Returns either zero or -ENOMEM.
*/
int reservation_object_get_fences_rcu(struct reservation_object *obj,
struct dma_fence **pfence_excl,
@@ -389,8 +390,8 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
do {
struct reservation_object_list *fobj;
- unsigned seq;
- unsigned int i;
+ unsigned int i, seq;
+ size_t sz = 0;
shared_count = i = 0;
@@ -402,9 +403,14 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
goto unlock;
fobj = rcu_dereference(obj->fence);
- if (fobj) {
+ if (fobj)
+ sz += sizeof(*shared) * fobj->shared_max;
+
+ if (!pfence_excl && fence_excl)
+ sz += sizeof(*shared);
+
+ if (sz) {
struct dma_fence **nshared;
- size_t sz = sizeof(*shared) * fobj->shared_max;
nshared = krealloc(shared, sz,
GFP_NOWAIT | __GFP_NOWARN);
@@ -420,13 +426,19 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
break;
}
shared = nshared;
- shared_count = fobj->shared_count;
-
+ shared_count = fobj ? fobj->shared_count : 0;
for (i = 0; i < shared_count; ++i) {
shared[i] = rcu_dereference(fobj->shared[i]);
if (!dma_fence_get_rcu(shared[i]))
break;
}
+
+ if (!pfence_excl && fence_excl) {
+ shared[i] = fence_excl;
+ fence_excl = NULL;
+ ++i;
+ ++shared_count;
+ }
}
if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
@@ -448,7 +460,8 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
*pshared_count = shared_count;
*pshared = shared;
- *pfence_excl = fence_excl;
+ if (pfence_excl)
+ *pfence_excl = fence_excl;
return ret;
}
--
2.14.1
Am 01.12.2017 um 01:23 schrieb Lyude Paul:
> I haven't gone to see where it started, but as of late a good number of
> pretty nasty deadlock issues have appeared with the kernel. Easy
> reproduction recipe on a laptop with i915/amdgpu prime with lockdep enabled:
>
> DRI_PRIME=1 glxinfo
Acked-by: Christian König <christian.koenig(a)amd.com>
Thanks for taking care of this,
Christian.
>
> Additionally, some more race conditions exist that I've managed to
> trigger with piglit and lockdep enabled after applying these patches:
>
> =============================
> WARNING: suspicious RCU usage
> 4.14.3Lyude-Test+ #2 Not tainted
> -----------------------------
> ./include/linux/reservation.h:216 suspicious rcu_dereference_protected() usage!
>
> other info that might help us debug this:
>
> rcu_scheduler_active = 2, debug_locks = 1
> 1 lock held by ext_image_dma_b/27451:
> #0: (reservation_ww_class_mutex){+.+.}, at: [<ffffffffa034f2ff>] ttm_bo_unref+0x9f/0x3c0 [ttm]
>
> stack backtrace:
> CPU: 0 PID: 27451 Comm: ext_image_dma_b Not tainted 4.14.3Lyude-Test+ #2
> Hardware name: HP HP ZBook 15 G4/8275, BIOS P70 Ver. 01.02 06/09/2017
> Call Trace:
> dump_stack+0x8e/0xce
> lockdep_rcu_suspicious+0xc5/0x100
> reservation_object_copy_fences+0x292/0x2b0
> ? ttm_bo_unref+0x9f/0x3c0 [ttm]
> ttm_bo_unref+0xbd/0x3c0 [ttm]
> amdgpu_bo_unref+0x2a/0x50 [amdgpu]
> amdgpu_gem_object_free+0x4b/0x50 [amdgpu]
> drm_gem_object_free+0x1f/0x40 [drm]
> drm_gem_object_put_unlocked+0x40/0xb0 [drm]
> drm_gem_object_handle_put_unlocked+0x6c/0xb0 [drm]
> drm_gem_object_release_handle+0x51/0x90 [drm]
> drm_gem_handle_delete+0x5e/0x90 [drm]
> ? drm_gem_handle_create+0x40/0x40 [drm]
> drm_gem_close_ioctl+0x20/0x30 [drm]
> drm_ioctl_kernel+0x5d/0xb0 [drm]
> drm_ioctl+0x2f7/0x3b0 [drm]
> ? drm_gem_handle_create+0x40/0x40 [drm]
> ? trace_hardirqs_on_caller+0xf4/0x190
> ? trace_hardirqs_on+0xd/0x10
> amdgpu_drm_ioctl+0x4f/0x90 [amdgpu]
> do_vfs_ioctl+0x93/0x670
> ? __fget+0x108/0x1f0
> SyS_ioctl+0x79/0x90
> entry_SYSCALL_64_fastpath+0x23/0xc2
>
> I've also added the relevant fixes for the issue mentioned above.
>
> Christian König (3):
> drm/ttm: fix ttm_bo_cleanup_refs_or_queue once more
> dma-buf: make reservation_object_copy_fences rcu save
> drm/amdgpu: reserve root PD while releasing it
>
> Michel Dänzer (1):
> drm/ttm: Always and only destroy bo->ttm_resv in ttm_bo_release_list
>
> drivers/dma-buf/reservation.c | 56 +++++++++++++++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++--
> drivers/gpu/drm/ttm/ttm_bo.c | 43 +++++++++++++-------------
> 3 files changed, 74 insertions(+), 38 deletions(-)
>
> --
> 2.14.3
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx(a)lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
If we are testing if a reservation object's fences have been
signaled with timeout=0 (non-blocking), we need to pass 0 for
timeout to dma_fence_wait_timeout().
Plus bonus spelling correction.
Signed-off-by: Rob Clark <robdclark(a)gmail.com>
---
drivers/dma-buf/reservation.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index dec3a815455d..71f51140a9ad 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -420,7 +420,7 @@ EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu);
*
* RETURNS
* Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or
- * greater than zer on success.
+ * greater than zero on success.
*/
long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
bool wait_all, bool intr,
@@ -483,7 +483,14 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
goto retry;
}
- ret = dma_fence_wait_timeout(fence, intr, ret);
+ /*
+ * Note that dma_fence_wait_timeout() will return 1 if
+ * the fence is already signaled, so in the wait_all
+ * case when we go through the retry loop again, ret
+ * will be greater than 0 and we don't want this to
+ * cause _wait_timeout() to block
+ */
+ ret = dma_fence_wait_timeout(fence, intr, timeout ? ret : 0);
dma_fence_put(fence);
if (ret > 0 && wait_all && (i + 1 < shared_count))
goto retry;
--
2.13.6
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
When building drm+i915 I get around 150 lines of sparse noise from
dma_fence __rcu warnings. This series eliminates all of that.
The first two patches were already posted by Chris, but there wasn't
any real reaction, so I figured I'd repost with a wider Cc list.
As for the other two patches, I'm no expert on dma_fence and I didn't
spend a lot of time looking at it so I can't be sure I annotated all
the accesses correctly. But I figured someone will scream at me if
I got it wrong ;)
Cc: Dave Airlie <airlied(a)redhat.com>
Cc: Jason Ekstrand <jason(a)jlekstrand.net>
Cc: linaro-mm-sig(a)lists.linaro.org
Cc: linux-media(a)vger.kernel.org
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Christian König <christian.koenig(a)amd.com>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Chris Wilson (2):
drm/syncobj: Mark up the fence as an RCU protected pointer
dma-buf/fence: Sparse wants __rcu on the object itself
Ville Syrjälä (2):
drm/syncobj: Use proper methods for accessing rcu protected pointers
dma-buf: Use rcu_assign_pointer() to set rcu protected pointers
drivers/dma-buf/reservation.c | 2 +-
drivers/gpu/drm/drm_syncobj.c | 11 +++++++----
include/drm/drm_syncobj.h | 2 +-
include/linux/dma-fence.h | 2 +-
4 files changed, 10 insertions(+), 7 deletions(-)
--
2.13.6
Quoting Christian König (2017-09-04 14:27:33)
> From: Christian König <christian.koenig(a)amd.com>
>
> The logic is buggy and unnecessary complex. When dma_fence_get_rcu() fails to
> acquire a reference it doesn't necessary mean that there is no fence at all.
>
> It usually mean that the fence was replaced by a new one and in this situation
> we certainly want to have the new one as result and *NOT* NULL.
Which is not guaranteed by the code you wrote either.
The point of the comment is that the mb is only inside the successful
kref_atomic_inc_unless_zero, and that only after that mb do you know
whether or not you have the current fence.
You can argue that you want to replace the
if (!dma_fence_get_rcu())
return NULL
with
if (!dma_fence_get_rcu()
continue;
but it would be incorrect to say that by simply ignoring the
post-condition check that you do have the right fence.
-Chris
From: Christian König <christian.koenig(a)amd.com>
When dma_fence_get_rcu() fails to acquire a reference it doesn't necessary
mean that there is no fence at all.
It usually mean that the fence was replaced by a new one and in this situation
we certainly want to have the new one as result and *NOT* NULL.
v2: Keep extra check after dma_fence_get_rcu().
Signed-off-by: Christian König <christian.koenig(a)amd.com>
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: linux-media(a)vger.kernel.org
Cc: dri-devel(a)lists.freedesktop.org
Cc: linaro-mm-sig(a)lists.linaro.org
---
include/linux/dma-fence.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 0a186c4..f4f23cb 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -248,9 +248,12 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep)
struct dma_fence *fence;
fence = rcu_dereference(*fencep);
- if (!fence || !dma_fence_get_rcu(fence))
+ if (!fence)
return NULL;
+ if (!dma_fence_get_rcu(fence))
+ continue;
+
/* The atomic_inc_not_zero() inside dma_fence_get_rcu()
* provides a full memory barrier upon success (such as now).
* This is paired with the write barrier from assigning
--
2.7.4
From: Colin Ian King <colin.king(a)canonical.com>
sg_table is being initialized and is never read before it is updated
again later on, hence making the initialization redundant. Remove
the initialization.
Detected by clang scan-build:
"warning: Value stored to 'sg_table' during its initialization is
never read"
Signed-off-by: Colin Ian King <colin.king(a)canonical.com>
---
drivers/dma-buf/dma-buf.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 4a038dcf5361..bc1cb284111c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(dma_buf_detach);
struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
enum dma_data_direction direction)
{
- struct sg_table *sg_table = ERR_PTR(-EINVAL);
+ struct sg_table *sg_table;
might_sleep();
--
2.14.1
On Tue, Jul 25, 2017 at 11:11:35AM +0200, Christian König wrote:
> Am 24.07.2017 um 13:57 schrieb Daniel Vetter:
> > On Mon, Jul 24, 2017 at 11:51 AM, Christian König
> > <deathsimple(a)vodafone.de> wrote:
> > > Am 24.07.2017 um 10:33 schrieb Daniel Vetter:
> > > > On Fri, Jul 21, 2017 at 06:20:01PM +0200, Christian König wrote:
> > > > > From: Christian König <christian.koenig(a)amd.com>
> > > > >
> > > > > With hardware resets in mind it is possible that all shared fences are
> > > > > signaled, but the exlusive isn't. Fix waiting for everything in this
> > > > > situation.
> > > > How did you end up with both shared and exclusive fences on the same
> > > > reservation object? At least I thought the point of exclusive was that
> > > > it's exclusive (and has an implicit barrier on all previous shared
> > > > fences). Same for shared fences, they need to wait for the exclusive one
> > > > (and replace it).
> > > >
> > > > Is this fallout from the amdgpu trickery where by default you do all
> > > > shared fences? I thought we've aligned semantics a while back ...
> > >
> > > No, that is perfectly normal even for other drivers. Take a look at the
> > > reservation code.
> > >
> > > The exclusive fence replaces all shared fences, but adding a shared fence
> > > doesn't replace the exclusive fence. That actually makes sense, cause when
> > > you want to add move shared fences those need to wait for the last exclusive
> > > fence as well.
> > Hm right.
> >
> > > Now normally I would agree that when you have shared fences it is sufficient
> > > to wait for all of them cause those operations can't start before the
> > > exclusive one finishes. But with GPU reset and/or the ability to abort
> > > already submitted operations it is perfectly possible that you end up with
> > > an exclusive fence which isn't signaled and a shared fence which is signaled
> > > in the same reservation object.
> > How does that work? The batch(es) with the shared fence are all
> > supposed to wait for the exclusive fence before they start, which
> > means even if you gpu reset and restart/cancel certain things, they
> > shouldn't be able to complete out of order.
>
> Assume the following:
> 1. The exclusive fence is some move operation by the kernel which executes
> on a DMA engine.
> 2. The shared fence is a 3D operation submitted by userspace which executes
> on the 3D engine.
>
> Now we found the 3D engine to be hung and needs a reset, all currently
> submitted jobs are aborted, marked with an error code and their fences put
> into the signaled state.
>
> Since we only reset the 3D engine, the move operation (fortunately) isn't
> affected by this.
>
> I think this applies to all drivers and isn't something amdgpu specific.
Not i915 because:
- At first we only had system wide gpu reset that killed everything, which
means all requests will be completed, not just on a single engine.
- Now we have per-engine reset, but combined with replaying them (the
offending one gets a no-op batch to avoid re-hanging), to make sure the
depency tree doesn't fall apart.
Now I see that doing this isn't all that simple, and either way we still
have the case where one driver resets but not the other (in multi-gpu),
but I'm not exactly sure how to best handle this.
What exactly is the downside of not dropping this assumption, i.e. why do
you want this patch? What blows up?
-Daniel
>
> Regards,
> Christian.
>
> >
> > If you outright cancel a fence then you're supposed to first call
> > dma_fence_set_error(-EIO) and then complete it. Note that atm that
> > part might be slightly overengineered and I'm not sure about how we
> > expose stuff to userspace, e.g. dma_fence_set_error(-EAGAIN) is (or
> > soon, has been) used by i915 for it's internal book-keeping, which
> > might not be the best to leak to other consumers. But completing
> > fences (at least exported ones, where userspace or other drivers can
> > get at them) shouldn't be possible.
> > -Daniel
>
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
On Mon, Jul 24, 2017 at 11:51 AM, Christian König
<deathsimple(a)vodafone.de> wrote:
> Am 24.07.2017 um 10:33 schrieb Daniel Vetter:
>>
>> On Fri, Jul 21, 2017 at 06:20:01PM +0200, Christian König wrote:
>>>
>>> From: Christian König <christian.koenig(a)amd.com>
>>>
>>> With hardware resets in mind it is possible that all shared fences are
>>> signaled, but the exlusive isn't. Fix waiting for everything in this
>>> situation.
>>
>> How did you end up with both shared and exclusive fences on the same
>> reservation object? At least I thought the point of exclusive was that
>> it's exclusive (and has an implicit barrier on all previous shared
>> fences). Same for shared fences, they need to wait for the exclusive one
>> (and replace it).
>>
>> Is this fallout from the amdgpu trickery where by default you do all
>> shared fences? I thought we've aligned semantics a while back ...
>
>
> No, that is perfectly normal even for other drivers. Take a look at the
> reservation code.
>
> The exclusive fence replaces all shared fences, but adding a shared fence
> doesn't replace the exclusive fence. That actually makes sense, cause when
> you want to add move shared fences those need to wait for the last exclusive
> fence as well.
Hm right.
> Now normally I would agree that when you have shared fences it is sufficient
> to wait for all of them cause those operations can't start before the
> exclusive one finishes. But with GPU reset and/or the ability to abort
> already submitted operations it is perfectly possible that you end up with
> an exclusive fence which isn't signaled and a shared fence which is signaled
> in the same reservation object.
How does that work? The batch(es) with the shared fence are all
supposed to wait for the exclusive fence before they start, which
means even if you gpu reset and restart/cancel certain things, they
shouldn't be able to complete out of order.
If you outright cancel a fence then you're supposed to first call
dma_fence_set_error(-EIO) and then complete it. Note that atm that
part might be slightly overengineered and I'm not sure about how we
expose stuff to userspace, e.g. dma_fence_set_error(-EAGAIN) is (or
soon, has been) used by i915 for it's internal book-keeping, which
might not be the best to leak to other consumers. But completing
fences (at least exported ones, where userspace or other drivers can
get at them) shouldn't be possible.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
On 2017年07月22日 00:20, Christian König wrote:
> From: Christian König <christian.koenig(a)amd.com>
>
> With hardware resets in mind it is possible that all shared fences are
> signaled, but the exlusive isn't. Fix waiting for everything in this situation.
>
> Signed-off-by: Christian König <christian.koenig(a)amd.com>
> ---
> drivers/dma-buf/reservation.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
> index e2eff86..ce3f9c1 100644
> --- a/drivers/dma-buf/reservation.c
> +++ b/drivers/dma-buf/reservation.c
> @@ -461,7 +461,7 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
> }
> }
>
> - if (!shared_count) {
> + if (!fence) {
previous code seems be a bug, the exclusive fence isn't be waited at all
if shared_count != 0.
With your fix, there still is a case the exclusive fence could be
skipped, that when fobj->shared[shared_count-1] isn't signalled.
Regards,
David Zhou
> struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>
> if (fence_excl &&
On Fri, Jul 21, 2017 at 06:20:01PM +0200, Christian König wrote:
> From: Christian König <christian.koenig(a)amd.com>
>
> With hardware resets in mind it is possible that all shared fences are
> signaled, but the exlusive isn't. Fix waiting for everything in this situation.
How did you end up with both shared and exclusive fences on the same
reservation object? At least I thought the point of exclusive was that
it's exclusive (and has an implicit barrier on all previous shared
fences). Same for shared fences, they need to wait for the exclusive one
(and replace it).
Is this fallout from the amdgpu trickery where by default you do all
shared fences? I thought we've aligned semantics a while back ...
-Daniel
>
> Signed-off-by: Christian König <christian.koenig(a)amd.com>
> ---
> drivers/dma-buf/reservation.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
> index e2eff86..ce3f9c1 100644
> --- a/drivers/dma-buf/reservation.c
> +++ b/drivers/dma-buf/reservation.c
> @@ -461,7 +461,7 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj,
> }
> }
>
> - if (!shared_count) {
> + if (!fence) {
> struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>
> if (fence_excl &&
> --
> 2.7.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel(a)lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch