Hi everybody,
since I've got positive feedback from Daniel I continued working on this approach.
A few issues are still open:
1. Daniel suggested that I make the invalidate_mappings callback a parameter of dma_buf_attach().
This approach unfortunately won't work because when the attachment is created the importer is not necessarily ready to handle invalidation events.
E.g. in the amdgpu example we first need to setup the imported GEM/TMM objects and install that in the attachment.
My solution is to introduce a separate function to grab the locks and set the callback, this function could then be used to pin the buffer later on if that turns out to be necessary after all.
2. With my example setup this currently results in a ping/pong situation because the exporter prefers a VRAM placement while the importer prefers a GTT placement.
This results in quite a performance drop, but can be fixed by a simple mesa patch which allows shred BOs to be placed in both VRAM and GTT.
Question is what should we do in the meantime? Accept the performance drop or only allow unpinned sharing with new Mesa?
Please review and comment,
Christian.
Each importer can now provide an invalidate_mappings callback.
This allows the exporter to provide the mappings without the need to pin
the backing store.
v2: don't try to invalidate mappings when the callback is NULL,
lock the reservation obj while using the attachments,
add helper to set the callback
v3: move flag for invalidation support into the DMA-buf,
use new attach_info structure to set the callback
Signed-off-by: Christian König <christian.koenig(a)amd.com>
---
drivers/dma-buf/dma-buf.c | 43 +++++++++++++++++++++++++++++++++++++++++++
include/linux/dma-buf.h | 28 ++++++++++++++++++++++++++++
2 files changed, 71 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index d2e8ca0d9427..ffaa2f9a9c2c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -566,6 +566,7 @@ struct dma_buf_attachment *dma_buf_attach(const struct dma_buf_attach_info *info
attach->dev = info->dev;
attach->dmabuf = dmabuf;
attach->priv = info->priv;
+ attach->invalidate = info->invalidate;
mutex_lock(&dmabuf->lock);
@@ -574,7 +575,9 @@ struct dma_buf_attachment *dma_buf_attach(const struct dma_buf_attach_info *info
if (ret)
goto err_attach;
}
+ reservation_object_lock(dmabuf->resv, NULL);
list_add(&attach->node, &dmabuf->attachments);
+ reservation_object_unlock(dmabuf->resv);
mutex_unlock(&dmabuf->lock);
return attach;
@@ -600,7 +603,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
return;
mutex_lock(&dmabuf->lock);
+ reservation_object_lock(dmabuf->resv, NULL);
list_del(&attach->node);
+ reservation_object_unlock(dmabuf->resv);
if (dmabuf->ops->detach)
dmabuf->ops->detach(dmabuf, attach);
@@ -634,10 +639,23 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
+ /*
+ * Mapping a DMA-buf can trigger its invalidation, prevent sending this
+ * event to the caller by temporary removing this attachment from the
+ * list.
+ */
+ if (attach->invalidate) {
+ reservation_object_assert_held(attach->dmabuf->resv);
+ list_del(&attach->node);
+ }
+
sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
if (!sg_table)
sg_table = ERR_PTR(-ENOMEM);
+ if (attach->invalidate)
+ list_add(&attach->node, &attach->dmabuf->attachments);
+
return sg_table;
}
EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
@@ -658,6 +676,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
{
might_sleep();
+ if (attach->invalidate)
+ reservation_object_assert_held(attach->dmabuf->resv);
+
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
@@ -666,6 +687,26 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
}
EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
+/**
+ * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
+ *
+ * @dmabuf: [in] buffer which mappings should be invalidated
+ *
+ * Informs all attachmenst that they need to destroy and recreated all their
+ * mappings.
+ */
+void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
+{
+ struct dma_buf_attachment *attach;
+
+ reservation_object_assert_held(dmabuf->resv);
+
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ if (attach->invalidate)
+ attach->invalidate(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
+
/**
* DOC: cpu access
*
@@ -1123,10 +1164,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
seq_puts(s, "\tAttached Devices:\n");
attach_count = 0;
+ reservation_object_lock(buf_obj->resv, NULL);
list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
attach_count++;
}
+ reservation_object_unlock(buf_obj->resv);
seq_printf(s, "Total %d devices attached\n\n",
attach_count);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 2c27568d44af..15dd8598bff1 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -270,6 +270,8 @@ struct dma_buf_ops {
* @poll: for userspace poll support
* @cb_excl: for userspace poll support
* @cb_shared: for userspace poll support
+ * @invalidation_supported: True when the exporter supports unpinned operation
+ * using the reservation lock.
*
* This represents a shared buffer, created by calling dma_buf_export(). The
* userspace representation is a normal file descriptor, which can be created by
@@ -293,6 +295,7 @@ struct dma_buf {
struct list_head list_node;
void *priv;
struct reservation_object *resv;
+ bool invalidation_supported;
/* poll support */
wait_queue_head_t poll;
@@ -326,6 +329,28 @@ struct dma_buf_attachment {
struct device *dev;
struct list_head node;
void *priv;
+
+ /**
+ * @invalidate:
+ *
+ * Optional callback provided by the importer of the dma-buf.
+ *
+ * If provided the exporter can avoid pinning the backing store while
+ * mappings exists.
+ *
+ * The function is called with the lock of the reservation object
+ * associated with the dma_buf held and the mapping function must be
+ * called with this lock held as well. This makes sure that no mapping
+ * is created concurrently with an ongoing invalidation.
+ *
+ * After the callback all existing mappings are still valid until all
+ * fences in the dma_bufs reservation object are signaled, but should be
+ * destroyed by the importer as soon as possible.
+ *
+ * New mappings can be created immediately, but can't be used before the
+ * exclusive fence in the dma_bufs reservation object is signaled.
+ */
+ void (*invalidate)(struct dma_buf_attachment *attach);
};
/**
@@ -367,6 +392,7 @@ struct dma_buf_export_info {
* @dmabuf: the exported dma_buf
* @dev: the device which wants to import the attachment
* @priv: private data of importer to this attachment
+ * @invalidate: callback to use for invalidating mappings
*
* This structure holds the information required to attach to a buffer. Used
* with dma_buf_attach() only.
@@ -375,6 +401,7 @@ struct dma_buf_attach_info {
struct dma_buf *dmabuf;
struct device *dev;
void *priv;
+ void (*invalidate)(struct dma_buf_attachment *attach);
};
/**
@@ -406,6 +433,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
enum dma_data_direction);
void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
enum dma_data_direction);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction dir);
int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
--
2.14.1
This set of patches adds an option invalidate_mappings callback to each DMA-buf attachment which can be filled in by the importer.
This callback allows the exporter to provided the DMA-buf content without pinning it. The reservation objects lock acts as synchronization point for buffer moves and creating mappings.
This set includes an implementation for amdgpu which should be rather easily portable to other DRM drivers.
Please comment,
Christian.
Each importer can now provide an invalidate_mappings callback.
This allows the exporter to provide the mappings without the need to pin
the backing store.
Signed-off-by: Christian König <christian.koenig(a)amd.com>
---
drivers/dma-buf/dma-buf.c | 25 +++++++++++++++++++++++++
include/linux/dma-buf.h | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index d78d5fc173dc..ed8d5844ae74 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -629,6 +629,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
might_sleep();
+ if (attach->invalidate_mappings)
+ reservation_object_assert_held(attach->dmabuf->resv);
+
if (WARN_ON(!attach || !attach->dmabuf))
return ERR_PTR(-EINVAL);
@@ -656,6 +659,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
{
might_sleep();
+ if (attach->invalidate_mappings)
+ reservation_object_assert_held(attach->dmabuf->resv);
+
if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
return;
@@ -664,6 +670,25 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
}
EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
+/**
+ * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
+ *
+ * @dmabuf: [in] buffer which mappings should be invalidated
+ *
+ * Informs all attachmenst that they need to destroy and recreated all their
+ * mappings.
+ */
+void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
+{
+ struct dma_buf_attachment *attach;
+
+ reservation_object_assert_held(dmabuf->resv);
+
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ attach->invalidate_mappings(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
+
/**
* DOC: cpu access
*
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 085db2fee2d7..c1e2f7d93509 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -91,6 +91,18 @@ struct dma_buf_ops {
*/
void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
+ /**
+ * @supports_mapping_invalidation:
+ *
+ * True for exporters which supports unpinned DMA-buf operation using
+ * the reservation lock.
+ *
+ * When attachment->invalidate_mappings is set the @map_dma_buf and
+ * @unmap_dma_buf callbacks can be called with the reservation lock
+ * held.
+ */
+ bool supports_mapping_invalidation;
+
/**
* @map_dma_buf:
*
@@ -326,6 +338,29 @@ struct dma_buf_attachment {
struct device *dev;
struct list_head node;
void *priv;
+
+ /**
+ * @invalidate_mappings:
+ *
+ * Optional callback provided by the importer of the attachment which
+ * must be set before mappings are created.
+ *
+ * If provided the exporter can avoid pinning the backing store while
+ * mappings exists.
+ *
+ * The function is called with the lock of the reservation object
+ * associated with the dma_buf held and the mapping function must be
+ * called with this lock held as well. This makes sure that no mapping
+ * is created concurrently with an ongoing invalidation.
+ *
+ * After the callback all existing mappings are still valid until all
+ * fences in the dma_bufs reservation object are signaled, but should be
+ * destroyed by the importer as soon as possible.
+ *
+ * New mappings can be created immediately, but can't be used before the
+ * exclusive fence in the dma_bufs reservation object is signaled.
+ */
+ void (*invalidate_mappings)(struct dma_buf_attachment *attach);
};
/**
@@ -391,6 +426,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
enum dma_data_direction);
void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
enum dma_data_direction);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction dir);
int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
--
2.14.1
Fix the dup_sg_table function to initialize the dma_address of the new
sg list entries instead of the source dma_address entries.
Since ION duplicates the sg_list this issue does not appear to result in
an actual bug.
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
Acked-by: Laura Abbott <labbott(a)redhat.com>
---
Changes in v2:
- Add to commit message that it doesn't cause an actual bug
- Remove 'Fixes:' since it doesn't cause a bug
- Add Acked-by from Laura Abbott
drivers/staging/android/ion/ion.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 57e0d8035b2e..517d4f40d1b7 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -187,7 +187,7 @@ static struct sg_table *dup_sg_table(struct sg_table *table)
new_sg = new_table->sgl;
for_each_sg(table->sgl, sg, table->nents, i) {
memcpy(new_sg, sg, sizeof(*sg));
- sg->dma_address = 0;
+ new_sg->dma_address = 0;
new_sg = sg_next(new_sg);
}
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Fix the dup_sg_table function to initialize the dma_address of the new
sg list entries instead of the source dma_address entries.
Fixes: 17fd283f3870 ("staging: android: ion: Duplicate sg_table")
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
drivers/staging/android/ion/ion.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index f480885e346b..3ace3a0d9210 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -197,7 +197,7 @@ static struct sg_table *dup_sg_table(struct sg_table *table)
new_sg = new_table->sgl;
for_each_sg(table->sgl, sg, table->nents, i) {
memcpy(new_sg, sg, sizeof(*sg));
- sg->dma_address = 0;
+ new_sg->dma_address = 0;
new_sg = sg_next(new_sg);
}
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
The ION begin_cpu_access and end_cpu_access functions use the
dma_sync_sg_for_cpu and dma_sync_sg_for_device APIs to perform cache
maintenance.
Currently it is possible to apply cache maintenance, via the
begin_cpu_access and end_cpu_access APIs, to ION buffers which are not
dma mapped.
The dma sync sg APIs should not be called on sg lists which have not been
dma mapped as this can result in cache maintenance being applied to the
wrong address. If an sg list has not been dma mapped then its dma_address
field has not been populated, some dma ops such as the swiotlb_dma_ops ops
use the dma_address field to calculate the address onto which to apply
cache maintenance.
Fix the ION begin_cpu_access and end_cpu_access functions to only apply
cache maintenance to buffers which have been dma mapped.
Fixes: 2a55e7b5e544 ("staging: android: ion: Call dma_map_sg for syncing and mapping")
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
drivers/staging/android/ion/ion.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index f480885e346b..e5df5272823d 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -214,6 +214,7 @@ struct ion_dma_buf_attachment {
struct device *dev;
struct sg_table *table;
struct list_head list;
+ bool dma_mapped;
};
static int ion_dma_buf_attach(struct dma_buf *dmabuf, struct device *dev,
@@ -235,6 +236,7 @@ static int ion_dma_buf_attach(struct dma_buf *dmabuf, struct device *dev,
a->table = table;
a->dev = dev;
+ a->dma_mapped = false;
INIT_LIST_HEAD(&a->list);
attachment->priv = a;
@@ -272,6 +274,7 @@ static struct sg_table *ion_map_dma_buf(struct dma_buf_attachment *attachment,
direction))
return ERR_PTR(-ENOMEM);
+ a->dma_mapped = true;
return table;
}
@@ -279,7 +282,10 @@ static void ion_unmap_dma_buf(struct dma_buf_attachment *attachment,
struct sg_table *table,
enum dma_data_direction direction)
{
+ struct ion_dma_buf_attachment *a = attachment->priv;
+
dma_unmap_sg(attachment->dev, table->sgl, table->nents, direction);
+ a->dma_mapped = false;
}
static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
@@ -345,8 +351,9 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
- dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
- direction);
+ if (a->dma_mapped)
+ dma_sync_sg_for_cpu(a->dev, a->table->sgl,
+ a->table->nents, direction);
}
mutex_unlock(&buffer->lock);
@@ -367,8 +374,9 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
- dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
- direction);
+ if (a->dma_mapped)
+ dma_sync_sg_for_device(a->dev, a->table->sgl,
+ a->table->nents, direction);
}
mutex_unlock(&buffer->lock);
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
Since commit 204f672255c2 ("staging: android: ion: Use CMA APIs directly")
the CMA API is now used directly and therefore the allocated memory is no
longer automatically zeroed.
Explicitly zero CMA allocated memory to ensure that no data is exposed to
userspace.
Fixes: 204f672255c2 ("staging: android: ion: Use CMA APIs directly")
Signed-off-by: Liam Mark <lmark(a)codeaurora.org>
---
Changes in v2:
- Clean up the commit message.
- Add 'Fixes:'
Changes in v3:
- Add support for highmem pages
drivers/staging/android/ion/ion_cma_heap.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index 86196ffd2faf..fa3e4b7e0c9f 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -21,6 +21,7 @@
#include <linux/err.h>
#include <linux/cma.h>
#include <linux/scatterlist.h>
+#include <linux/highmem.h>
#include "ion.h"
@@ -51,6 +52,22 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
if (!pages)
return -ENOMEM;
+ if (PageHighMem(pages)) {
+ unsigned long nr_clear_pages = nr_pages;
+ struct page *page = pages;
+
+ while (nr_clear_pages > 0) {
+ void *vaddr = kmap_atomic(page);
+
+ memset(vaddr, 0, PAGE_SIZE);
+ kunmap_atomic(vaddr);
+ page++;
+ nr_clear_pages--;
+ }
+ } else {
+ memset(page_address(pages), 0, size);
+ }
+
table = kmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
goto err;
--
1.8.5.2
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project