Almost everyone uses dma_fence_default_wait.
Signed-off-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: Gustavo Padovan <gustavo(a)padovan.org>
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
---
drivers/dma-buf/dma-fence-array.c | 1 -
drivers/dma-buf/dma-fence.c | 5 ++++-
drivers/dma-buf/sw_sync.c | 1 -
include/linux/dma-fence.h | 13 ++++++++-----
4 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
index dd1edfb27b61..a8c254497251 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -104,7 +104,6 @@ const struct dma_fence_ops dma_fence_array_ops = {
.get_timeline_name = dma_fence_array_get_timeline_name,
.enable_signaling = dma_fence_array_enable_signaling,
.signaled = dma_fence_array_signaled,
- .wait = dma_fence_default_wait,
.release = dma_fence_array_release,
};
EXPORT_SYMBOL(dma_fence_array_ops);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 59049375bd19..30fcbe415ff4 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -158,7 +158,10 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
return -EINVAL;
trace_dma_fence_wait_start(fence);
- ret = fence->ops->wait(fence, intr, timeout);
+ if (fence->ops->wait)
+ ret = fence->ops->wait(fence, intr, timeout);
+ else
+ ret = dma_fence_default_wait(fence, intr, timeout);
trace_dma_fence_wait_end(fence);
return ret;
}
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 3d78ca89a605..53c1d6d36a64 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -188,7 +188,6 @@ static const struct dma_fence_ops timeline_fence_ops = {
.get_timeline_name = timeline_fence_get_timeline_name,
.enable_signaling = timeline_fence_enable_signaling,
.signaled = timeline_fence_signaled,
- .wait = dma_fence_default_wait,
.release = timeline_fence_release,
.fence_value_str = timeline_fence_value_str,
.timeline_value_str = timeline_fence_timeline_value_str,
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index c730f569621a..d05496ff0d10 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -191,11 +191,14 @@ struct dma_fence_ops {
/**
* @wait:
*
- * Custom wait implementation, or dma_fence_default_wait.
+ * Custom wait implementation, defaults to dma_fence_default_wait() if
+ * not set.
*
- * Must not be NULL, set to dma_fence_default_wait for default implementation.
- * the dma_fence_default_wait implementation should work for any fence, as long
- * as enable_signaling works correctly.
+ * The dma_fence_default_wait implementation should work for any fence, as long
+ * as @enable_signaling works correctly. This hook allows drivers to
+ * have an optimized version for the case where a process context is
+ * already available, e.g. if @enable_signaling for the general case
+ * needs to set up a worker thread.
*
* Must return -ERESTARTSYS if the wait is intr = true and the wait was
* interrupted, and remaining jiffies if fence has signaled, or 0 if wait
@@ -203,7 +206,7 @@ struct dma_fence_ops {
* which should be treated as if the fence is signaled. For example a hardware
* lockup could be reported like that.
*
- * This callback is mandatory.
+ * This callback is optional.
*/
signed long (*wait)(struct dma_fence *fence,
bool intr, signed long timeout);
--
2.17.0
- Switch to inline member docs for dma_fence_ops.
- Mild polish all around.
- hyperlink all the things!
v2: - Remove the various [in] annotations, they seem really uncommon
in kerneldoc and look funny.
Signed-off-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
---
include/linux/dma-fence.h | 235 +++++++++++++++++++++++++-------------
1 file changed, 154 insertions(+), 81 deletions(-)
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 4c008170fe65..9d6f39bf2111 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -94,11 +94,11 @@ typedef void (*dma_fence_func_t)(struct dma_fence *fence,
struct dma_fence_cb *cb);
/**
- * struct dma_fence_cb - callback for dma_fence_add_callback
- * @node: used by dma_fence_add_callback to append this struct to fence::cb_list
+ * struct dma_fence_cb - callback for dma_fence_add_callback()
+ * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
* @func: dma_fence_func_t to call
*
- * This struct will be initialized by dma_fence_add_callback, additional
+ * This struct will be initialized by dma_fence_add_callback(), additional
* data can be passed along by embedding dma_fence_cb in another struct.
*/
struct dma_fence_cb {
@@ -108,75 +108,142 @@ struct dma_fence_cb {
/**
* struct dma_fence_ops - operations implemented for fence
- * @get_driver_name: returns the driver name.
- * @get_timeline_name: return the name of the context this fence belongs to.
- * @enable_signaling: enable software signaling of fence.
- * @signaled: [optional] peek whether the fence is signaled, can be null.
- * @wait: custom wait implementation, or dma_fence_default_wait.
- * @release: [optional] called on destruction of fence, can be null
- * @fill_driver_data: [optional] callback to fill in free-form debug info
- * Returns amount of bytes filled, or -errno.
- * @fence_value_str: [optional] fills in the value of the fence as a string
- * @timeline_value_str: [optional] fills in the current value of the timeline
- * as a string
*
- * Notes on enable_signaling:
- * For fence implementations that have the capability for hw->hw
- * signaling, they can implement this op to enable the necessary
- * irqs, or insert commands into cmdstream, etc. This is called
- * in the first wait() or add_callback() path to let the fence
- * implementation know that there is another driver waiting on
- * the signal (ie. hw->sw case).
- *
- * This function can be called from atomic context, but not
- * from irq context, so normal spinlocks can be used.
- *
- * A return value of false indicates the fence already passed,
- * or some failure occurred that made it impossible to enable
- * signaling. True indicates successful enabling.
- *
- * fence->error may be set in enable_signaling, but only when false is
- * returned.
- *
- * Calling dma_fence_signal before enable_signaling is called allows
- * for a tiny race window in which enable_signaling is called during,
- * before, or after dma_fence_signal. To fight this, it is recommended
- * that before enable_signaling returns true an extra reference is
- * taken on the fence, to be released when the fence is signaled.
- * This will mean dma_fence_signal will still be called twice, but
- * the second time will be a noop since it was already signaled.
- *
- * Notes on signaled:
- * May set fence->error if returning true.
- *
- * Notes on wait:
- * Must not be NULL, set to dma_fence_default_wait for default implementation.
- * the dma_fence_default_wait implementation should work for any fence, as long
- * as enable_signaling works correctly.
- *
- * Must return -ERESTARTSYS if the wait is intr = true and the wait was
- * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
- * timed out. Can also return other error values on custom implementations,
- * which should be treated as if the fence is signaled. For example a hardware
- * lockup could be reported like that.
- *
- * Notes on release:
- * Can be NULL, this function allows additional commands to run on
- * destruction of the fence. Can be called from irq context.
- * If pointer is set to NULL, kfree will get called instead.
*/
-
struct dma_fence_ops {
+ /**
+ * @get_driver_name:
+ *
+ * Returns the driver name. This is a callback to allow drivers to
+ * compute the name at runtime, without having it to store permanently
+ * for each fence, or build a cache of some sort.
+ *
+ * This callback is mandatory.
+ */
const char * (*get_driver_name)(struct dma_fence *fence);
+
+ /**
+ * @get_timeline_name:
+ *
+ * Return the name of the context this fence belongs to. This is a
+ * callback to allow drivers to compute the name at runtime, without
+ * having it to store permanently for each fence, or build a cache of
+ * some sort.
+ *
+ * This callback is mandatory.
+ */
const char * (*get_timeline_name)(struct dma_fence *fence);
+
+ /**
+ * @enable_signaling:
+ *
+ * Enable software signaling of fence.
+ *
+ * For fence implementations that have the capability for hw->hw
+ * signaling, they can implement this op to enable the necessary
+ * interrupts, or insert commands into cmdstream, etc, to avoid these
+ * costly operations for the common case where only hw->hw
+ * synchronization is required. This is called in the first
+ * dma_fence_wait() or dma_fence_add_callback() path to let the fence
+ * implementation know that there is another driver waiting on the
+ * signal (ie. hw->sw case).
+ *
+ * This function can be called from atomic context, but not
+ * from irq context, so normal spinlocks can be used.
+ *
+ * A return value of false indicates the fence already passed,
+ * or some failure occurred that made it impossible to enable
+ * signaling. True indicates successful enabling.
+ *
+ * &dma_fence.error may be set in enable_signaling, but only when false
+ * is returned.
+ *
+ * Since many implementations can call dma_fence_signal() even when before
+ * @enable_signaling has been called there's a race window, where the
+ * dma_fence_signal() might result in the final fence reference being
+ * released and its memory freed. To avoid this, implementations of this
+ * callback should grab their own reference using dma_fence_get(), to be
+ * released when the fence is signalled (through e.g. the interrupt
+ * handler).
+ *
+ * This callback is mandatory.
+ */
bool (*enable_signaling)(struct dma_fence *fence);
+
+ /**
+ * @signaled:
+ *
+ * Peek whether the fence is signaled, as a fastpath optimization for
+ * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
+ * callback does not need to make any guarantees beyond that a fence
+ * once indicates as signalled must always return true from this
+ * callback. This callback may return false even if the fence has
+ * completed already, in this case information hasn't propogated throug
+ * the system yet. See also dma_fence_is_signaled().
+ *
+ * May set &dma_fence.error if returning true.
+ *
+ * This callback is optional.
+ */
bool (*signaled)(struct dma_fence *fence);
+
+ /**
+ * @wait:
+ *
+ * Custom wait implementation, or dma_fence_default_wait.
+ *
+ * Must not be NULL, set to dma_fence_default_wait for default implementation.
+ * the dma_fence_default_wait implementation should work for any fence, as long
+ * as enable_signaling works correctly.
+ *
+ * Must return -ERESTARTSYS if the wait is intr = true and the wait was
+ * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
+ * timed out. Can also return other error values on custom implementations,
+ * which should be treated as if the fence is signaled. For example a hardware
+ * lockup could be reported like that.
+ *
+ * This callback is mandatory.
+ */
signed long (*wait)(struct dma_fence *fence,
bool intr, signed long timeout);
+
+ /**
+ * @release:
+ *
+ * Called on destruction of fence to release additional resources.
+ * Can be called from irq context. This callback is optional. If it is
+ * NULL, then dma_fence_free() is instead called as the default
+ * implementation.
+ */
void (*release)(struct dma_fence *fence);
+ /**
+ * @fill_driver_data:
+ *
+ * Callback to fill in free-form debug info Returns amount of bytes
+ * filled, or negative error on failure.
+ *
+ * This callback is optional.
+ */
int (*fill_driver_data)(struct dma_fence *fence, void *data, int size);
+
+ /**
+ * @fence_value_str:
+ *
+ * Callback to fill in free-form debug info specific to this fence, like
+ * the sequence number.
+ *
+ * This callback is optional.
+ */
void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
+
+ /**
+ * @timeline_value_str:
+ *
+ * Fills in the current value of the timeline as a string, like the
+ * sequence number. This should match what @fill_driver_data prints for
+ * the most recently signalled fence (assuming no delayed signalling).
+ */
void (*timeline_value_str)(struct dma_fence *fence,
char *str, int size);
};
@@ -189,7 +256,7 @@ void dma_fence_free(struct dma_fence *fence);
/**
* dma_fence_put - decreases refcount of the fence
- * @fence: [in] fence to reduce refcount of
+ * @fence: fence to reduce refcount of
*/
static inline void dma_fence_put(struct dma_fence *fence)
{
@@ -199,7 +266,7 @@ static inline void dma_fence_put(struct dma_fence *fence)
/**
* dma_fence_get - increases refcount of the fence
- * @fence: [in] fence to increase refcount of
+ * @fence: fence to increase refcount of
*
* Returns the same fence, with refcount increased by 1.
*/
@@ -213,7 +280,7 @@ static inline struct dma_fence *dma_fence_get(struct dma_fence *fence)
/**
* dma_fence_get_rcu - get a fence from a reservation_object_list with
* rcu read lock
- * @fence: [in] fence to increase refcount of
+ * @fence: fence to increase refcount of
*
* Function returns NULL if no refcount could be obtained, or the fence.
*/
@@ -227,7 +294,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence)
/**
* dma_fence_get_rcu_safe - acquire a reference to an RCU tracked fence
- * @fencep: [in] pointer to fence to increase refcount of
+ * @fencep: pointer to fence to increase refcount of
*
* Function returns NULL if no refcount could be obtained, or the fence.
* This function handles acquiring a reference to a fence that may be
@@ -289,14 +356,16 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence);
/**
* dma_fence_is_signaled_locked - Return an indication if the fence
* is signaled yet.
- * @fence: [in] the fence to check
+ * @fence: the fence to check
*
* Returns true if the fence was already signaled, false if not. Since this
* function doesn't enable signaling, it is not guaranteed to ever return
- * true if dma_fence_add_callback, dma_fence_wait or
- * dma_fence_enable_sw_signaling haven't been called before.
+ * true if dma_fence_add_callback(), dma_fence_wait() or
+ * dma_fence_enable_sw_signaling() haven't been called before.
*
- * This function requires fence->lock to be held.
+ * This function requires &dma_fence.lock to be held.
+ *
+ * See also dma_fence_is_signaled().
*/
static inline bool
dma_fence_is_signaled_locked(struct dma_fence *fence)
@@ -314,17 +383,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
/**
* dma_fence_is_signaled - Return an indication if the fence is signaled yet.
- * @fence: [in] the fence to check
+ * @fence: the fence to check
*
* Returns true if the fence was already signaled, false if not. Since this
* function doesn't enable signaling, it is not guaranteed to ever return
- * true if dma_fence_add_callback, dma_fence_wait or
- * dma_fence_enable_sw_signaling haven't been called before.
+ * true if dma_fence_add_callback(), dma_fence_wait() or
+ * dma_fence_enable_sw_signaling() haven't been called before.
*
* It's recommended for seqno fences to call dma_fence_signal when the
* operation is complete, it makes it possible to prevent issues from
* wraparound between time of issue and time of use by checking the return
* value of this function before calling hardware-specific wait instructions.
+ *
+ * See also dma_fence_is_signaled_locked().
*/
static inline bool
dma_fence_is_signaled(struct dma_fence *fence)
@@ -342,8 +413,8 @@ dma_fence_is_signaled(struct dma_fence *fence)
/**
* __dma_fence_is_later - return if f1 is chronologically later than f2
- * @f1: [in] the first fence's seqno
- * @f2: [in] the second fence's seqno from the same context
+ * @f1: the first fence's seqno
+ * @f2: the second fence's seqno from the same context
*
* Returns true if f1 is chronologically later than f2. Both fences must be
* from the same context, since a seqno is not common across contexts.
@@ -355,8 +426,8 @@ static inline bool __dma_fence_is_later(u32 f1, u32 f2)
/**
* dma_fence_is_later - return if f1 is chronologically later than f2
- * @f1: [in] the first fence from the same context
- * @f2: [in] the second fence from the same context
+ * @f1: the first fence from the same context
+ * @f2: the second fence from the same context
*
* Returns true if f1 is chronologically later than f2. Both fences must be
* from the same context, since a seqno is not re-used across contexts.
@@ -372,8 +443,8 @@ static inline bool dma_fence_is_later(struct dma_fence *f1,
/**
* dma_fence_later - return the chronologically later fence
- * @f1: [in] the first fence from the same context
- * @f2: [in] the second fence from the same context
+ * @f1: the first fence from the same context
+ * @f2: the second fence from the same context
*
* Returns NULL if both fences are signaled, otherwise the fence that would be
* signaled last. Both fences must be from the same context, since a seqno is
@@ -398,7 +469,7 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1,
/**
* dma_fence_get_status_locked - returns the status upon completion
- * @fence: [in] the dma_fence to query
+ * @fence: the dma_fence to query
*
* Drivers can supply an optional error status condition before they signal
* the fence (to indicate whether the fence was completed due to an error
@@ -422,8 +493,8 @@ int dma_fence_get_status(struct dma_fence *fence);
/**
* dma_fence_set_error - flag an error condition on the fence
- * @fence: [in] the dma_fence
- * @error: [in] the error to store
+ * @fence: the dma_fence
+ * @error: the error to store
*
* Drivers can supply an optional error status condition before they signal
* the fence, to indicate that the fence was completed due to an error
@@ -449,8 +520,8 @@ signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
/**
* dma_fence_wait - sleep until the fence gets signaled
- * @fence: [in] the fence to wait on
- * @intr: [in] if true, do an interruptible wait
+ * @fence: the fence to wait on
+ * @intr: if true, do an interruptible wait
*
* This function will return -ERESTARTSYS if interrupted by a signal,
* or 0 if the fence was signaled. Other error values may be
@@ -459,6 +530,8 @@ signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
* Performs a synchronous wait on this fence. It is assumed the caller
* directly or indirectly holds a reference to the fence, otherwise the
* fence might be freed before return, resulting in undefined behavior.
+ *
+ * See also dma_fence_wait_timeout() and dma_fence_wait_any_timeout().
*/
static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
{
--
2.17.0
(changed subject and decoupling from udmabuf thread)
On Wed, Apr 11, 2018 at 08:59:32AM +0300, Oleksandr Andrushchenko wrote:
> On 04/10/2018 08:26 PM, Dongwon Kim wrote:
> >On Tue, Apr 10, 2018 at 09:37:53AM +0300, Oleksandr Andrushchenko wrote:
> >>On 04/06/2018 09:57 PM, Dongwon Kim wrote:
> >>>On Fri, Apr 06, 2018 at 03:36:03PM +0300, Oleksandr Andrushchenko wrote:
> >>>>On 04/06/2018 02:57 PM, Gerd Hoffmann wrote:
> >>>>> Hi,
> >>>>>
> >>>>>>>I fail to see any common ground for xen-zcopy and udmabuf ...
> >>>>>>Does the above mean you can assume that xen-zcopy and udmabuf
> >>>>>>can co-exist as two different solutions?
> >>>>>Well, udmabuf route isn't fully clear yet, but yes.
> >>>>>
> >>>>>See also gvt (intel vgpu), where the hypervisor interface is abstracted
> >>>>>away into a separate kernel modules even though most of the actual vgpu
> >>>>>emulation code is common.
> >>>>Thank you for your input, I'm just trying to figure out
> >>>>which of the three z-copy solutions intersect and how much
> >>>>>>And what about hyper-dmabuf?
> >>>xen z-copy solution is pretty similar fundamentally to hyper_dmabuf
> >>>in terms of these core sharing feature:
> >>>
> >>>1. the sharing process - import prime/dmabuf from the producer -> extract
> >>>underlying pages and get those shared -> return references for shared pages
> >Another thing is danvet was kind of against to the idea of importing existing
> >dmabuf/prime buffer and forward it to the other domain due to synchronization
> >issues. He proposed to make hyper_dmabuf only work as an exporter so that it
> >can have a full control over the buffer. I think we need to talk about this
> >further as well.
> Yes, I saw this. But this limits the use-cases so much.
I agree. Our current approach is a lot more flexible. You can find very
similar feedback in my reply to those review messages. However, I also
understand Daniel's concern as well. I believe we need more dicussion
regarding this matter.
> For instance, running Android as a Guest (which uses ION to allocate
> buffers) means that finally HW composer will import dma-buf into
> the DRM driver. Then, in case of xen-front for example, it needs to be
> shared with the backend (Host side). Of course, we can change user-space
> to make xen-front allocate the buffers (make it exporter), but what we try
> to avoid is to change user-space which in normal world would have remain
> unchanged otherwise.
> So, I do think we have to support this use-case and just have to understand
> the complexity.
>
> >
> >danvet, can you comment on this topic?
> >
> >>>2. the page sharing mechanism - it uses Xen-grant-table.
> >>>
> >>>And to give you a quick summary of differences as far as I understand
> >>>between two implementations (please correct me if I am wrong, Oleksandr.)
> >>>
> >>>1. xen-zcopy is DRM specific - can import only DRM prime buffer
> >>>while hyper_dmabuf can export any dmabuf regardless of originator
> >>Well, this is true. And at the same time this is just a matter
> >>of extending the API: xen-zcopy is a helper driver designed for
> >>xen-front/back use-case, so this is why it only has DRM PRIME API
> >>>2. xen-zcopy doesn't seem to have dma-buf synchronization between two VMs
> >>>while (as danvet called it as remote dmabuf api sharing) hyper_dmabuf sends
> >>>out synchronization message to the exporting VM for synchronization.
> >>This is true. Again, this is because of the use-cases it covers.
> >>But having synchronization for a generic solution seems to be a good idea.
> >Yeah, understood xen-zcopy works ok with your use case. But I am just curious
> >if it is ok not to have any inter-domain synchronization in this sharing model.
> The synchronization is done with displif protocol [1]
> >The buffer being shared is technically dma-buf and originator needs to be able
> >to keep track of it.
> As I am working in DRM terms the tracking is done by the DRM core
> for me for free. (This might be one of the reasons Daniel sees DRM
> based implementation fit very good from code-reuse POV).
yeah but once you have a DRM object (whether it's dmabuf or not) on a remote
domain, it is totally new object and out of sync (correct me if I am wrong)
with original DRM prime, isn't it? How could these two different but based on
same pages be synchronized?
> >
> >>>3. 1-level references - when using grant-table for sharing pages, there will
> >>>be same # of refs (each 8 byte)
> >>To be precise, grant ref is 4 bytes
> >You are right. Thanks for correction.;)
> >
> >>>as # of shared pages, which is passed to
> >>>the userspace to be shared with importing VM in case of xen-zcopy.
> >>The reason for that is that xen-zcopy is a helper driver, e.g.
> >>the grant references come from the display backend [1], which implements
> >>Xen display protocol [2]. So, effectively the backend extracts references
> >>from frontend's requests and passes those to xen-zcopy as an array
> >>of refs.
> >>> Compared
> >>>to this, hyper_dmabuf does multiple level addressing to generate only one
> >>>reference id that represents all shared pages.
> >>In the protocol [2] only one reference to the gref directory is passed
> >>between VMs
> >>(and the gref directory is a single-linked list of shared pages containing
> >>all
> >>of the grefs of the buffer).
> >ok, good to know. I will look into its implementation in more details but is
> >this gref directory (chained grefs) something that can be used for any general
> >memory sharing use case or is it jsut for xen-display (in current code base)?
> Not to mislead you: one grant ref is passed via displif protocol,
> but the page it's referencing contains the rest of the grant refs.
>
I checked displif.h. I like the concept of chaining 2nd level grefs.
As you should have already realized, our multi-level addressing is almost
identical to gref directory except that we defined another level on top to
address multiple 2nd level grefs instead of creating a linked list. And I
see there would be an advantage in terms of memory saving in your method.
Now I think why it should be remaining as one of displif features. I think
we could expand this to any type of large buffer sharing use-case in Xen
(possibly as an extension to grant-table driver?)
> As to if this can be used for any memory: yes. It is the same for
> sndif and displif Xen protocols, but defined twice as strictly speaking
> sndif and displif are two separate protocols.
>
> While reviewing your RFC v2 one of the comments I had [2] was that if we
> can start from defining such a generic protocol for hyper-dmabuf.
> It can be a header file, which not only has the description part
> (which then become a part of Documentation/...rst file), but also defines
> all the required constants for requests, responses, defines message formats,
> state diagrams etc. all at one place. Of course this protocol must not be
> Xen specific, but be OS/hypervisor agnostic.
> Having that will trigger a new round of discussion, so we have it all
> designed
> and discussed before we start implementing.
>
> Besides the protocol we have to design UAPI part as well and make sure
> the hyper-dmabuf is not only accessible from user-space, but there will be
> number
> of kernel-space users as well.
> >
> >>>4. inter VM messaging (hype_dmabuf only) - hyper_dmabuf has inter-vm msg
> >>>communication defined for dmabuf synchronization and private data (meta
> >>>info that Matt Roper mentioned) exchange.
> >>This is true, xen-zcopy has no means for inter VM sync and meta-data,
> >>simply because it doesn't have any code for inter VM exchange in it,
> >>e.g. the inter VM protocol is handled by the backend [1].
> >>>5. driver-to-driver notification (hyper_dmabuf only) - importing VM gets
> >>>notified when newdmabuf is exported from other VM - uevent can be optionally
> >>>generated when this happens.
> >>>
> >>>6. structure - hyper_dmabuf is targetting to provide a generic solution for
> >>>inter-domain dmabuf sharing for most hypervisors, which is why it has two
> >>>layers as mattrope mentioned, front-end that contains standard API and backend
> >>>that is specific to hypervisor.
> >>Again, xen-zcopy is decoupled from inter VM communication
> >>>>>No idea, didn't look at it in detail.
> >>>>>
> >>>>>Looks pretty complex from a distant view. Maybe because it tries to
> >>>>>build a communication framework using dma-bufs instead of a simple
> >>>>>dma-buf passing mechanism.
> >>>we started with simple dma-buf sharing but realized there are many
> >>>things we need to consider in real use-case, so we added communication
> >>>, notification and dma-buf synchronization then re-structured it to
> >>>front-end and back-end (this made things more compicated..) since Xen
> >>>was not our only target. Also, we thought passing the reference for the
> >>>buffer (hyper_dmabuf_id) is not secure so added uvent mechanism later.
> >>>
> >>>>Yes, I am looking at it now, trying to figure out the full story
> >>>>and its implementation. BTW, Intel guys were about to share some
> >>>>test application for hyper-dmabuf, maybe I have missed one.
> >>>>It could probably better explain the use-cases and the complexity
> >>>>they have in hyper-dmabuf.
> >>>One example is actually in github. If you want take a look at it, please
> >>>visit:
> >>>
> >>>https://github.com/downor/linux_hyper_dmabuf_test/tree/xen/simple_export
> >>Thank you, I'll have a look
> >>>>>Like xen-zcopy it seems to depend on the idea that the hypervisor
> >>>>>manages all memory it is easy for guests to share pages with the help of
> >>>>>the hypervisor.
> >>>>So, for xen-zcopy we were not trying to make it generic,
> >>>>it just solves display (dumb) zero-copying use-cases for Xen.
> >>>>We implemented it as a DRM helper driver because we can't see any
> >>>>other use-cases as of now.
> >>>>For example, we also have Xen para-virtualized sound driver, but
> >>>>its buffer memory usage is not comparable to what display wants
> >>>>and it works somewhat differently (e.g. there is no "frame done"
> >>>>event, so one can't tell when the sound buffer can be "flipped").
> >>>>At the same time, we do not use virtio-gpu, so this could probably
> >>>>be one more candidate for shared dma-bufs some day.
> >>>>> Which simply isn't the case on kvm.
> >>>>>
> >>>>>hyper-dmabuf and xen-zcopy could maybe share code, or hyper-dmabuf build
> >>>>>on top of xen-zcopy.
> >>>>Hm, I can imagine that: xen-zcopy could be a library code for hyper-dmabuf
> >>>>in terms of implementing all that page sharing fun in multiple directions,
> >>>>e.g. Host->Guest, Guest->Host, Guest<->Guest.
> >>>>But I'll let Matt and Dongwon to comment on that.
> >>>I think we can definitely collaborate. Especially, maybe we are using some
> >>>outdated sharing mechanism/grant-table mechanism in our Xen backend (thanks
> >>>for bringing that up Oleksandr). However, the question is once we collaborate
> >>>somehow, can xen-zcopy's usecase use the standard API that hyper_dmabuf
> >>>provides? I don't think we need different IOCTLs that do the same in the final
> >>>solution.
> >>>
> >>If you think of xen-zcopy as a library (which implements Xen
> >>grant references mangling) and DRM PRIME wrapper on top of that
> >>library, we can probably define proper API for that library,
> >>so both xen-zcopy and hyper-dmabuf can use it. What is more, I am
> >>about to start upstreaming Xen para-virtualized sound device driver soon,
> >>which also uses similar code and gref passing mechanism [3].
> >>(Actually, I was about to upstream drm/xen-front, drm/xen-zcopy and
> >>snd/xen-front and then propose a Xen helper library for sharing big buffers,
> >>so common code of the above drivers can use the same code w/o code
> >>duplication)
> >I think it is possible to use your functions for memory sharing part in
> >hyper_dmabuf's backend (this 'backend' means the layer that does page sharing
> >and inter-vm communication with xen-specific way.), so why don't we work on
> >"Xen helper library for sharing big buffers" first while we continue our
> >discussion on the common API layer that can cover any dmabuf sharing cases.
> >
> Well, I would love we reuse the code that I have, but I also
> understand that it was limited by my use-cases. So, I do not
> insist we have to ;)
> If we start designing and discussing hyper-dmabuf protocol we of course
> can work on this helper library in parallel.
> >>Thank you,
> >>Oleksandr
> >>
> >>P.S. All, is it a good idea to move this out of udmabuf thread into a
> >>dedicated one?
> >Either way is fine with me.
> So, if you can start designing the protocol we may have a dedicated mail
> thread for that. I will try to help with the protocol as much as I can
Sure thanks. We can talk about it. just FYI, I have prepared a application
note that contains definition of hyper_dmabuf messages included in RFC v2
patch. That would be a great starting point. It will be great if you can
review it.
>
> >>>>>cheers,
> >>>>> Gerd
> >>>>>
> >>>>Thank you,
> >>>>Oleksandr
> >>>>
> >>>>P.S. Sorry for making your original mail thread to discuss things much
> >>>>broader than your RFC...
> >>>>
> >>[1] https://github.com/xen-troops/displ_be
> >>[2] https://elixir.bootlin.com/linux/v4.16-rc7/source/include/xen/interface/io/…
> >>[3] https://elixir.bootlin.com/linux/v4.16-rc7/source/include/xen/interface/io/…
> >>
> [1] https://elixir.bootlin.com/linux/v4.16-rc7/source/include/xen/interface/io/…
> [2]
> https://lists.xenproject.org/archives/html/xen-devel/2018-04/msg00685.html
This patch series contains the implementation of a new device driver,
hyper_DMABUF driver, which provides a way to expand the boundary of
Linux DMA-BUF sharing to across different VM instances in Multi-OS platform
enabled by a Hypervisor (e.g. XEN)
This version 2 series is basically refactored version of old series starting
with "[RFC PATCH 01/60] hyper_dmabuf: initial working version of hyper_dmabuf
drv"
Implementation details of this driver are described in the reference guide
added by the second patch, "[RFC PATCH v2 2/5] hyper_dmabuf: architecture
specification and reference guide".
Attaching 'Overview' section here as a quick summary.
------------------------------------------------------------------------------
Section 1. Overview
------------------------------------------------------------------------------
Hyper_DMABUF driver is a Linux device driver running on multiple Virtual
achines (VMs), which expands DMA-BUF sharing capability to the VM environment
where multiple different OS instances need to share same physical data without
data-copy across VMs.
To share a DMA_BUF across VMs, an instance of the Hyper_DMABUF drv on the
exporting VM (so called, “exporter”) imports a local DMA_BUF from the original
producer of the buffer, then re-exports it with an unique ID, hyper_dmabuf_id
for the buffer to the importing VM (so called, “importer”).
Another instance of the Hyper_DMABUF driver on importer registers
a hyper_dmabuf_id together with reference information for the shared physical
pages associated with the DMA_BUF to its database when the export happens.
The actual mapping of the DMA_BUF on the importer’s side is done by
the Hyper_DMABUF driver when user space issues the IOCTL command to access
the shared DMA_BUF. The Hyper_DMABUF driver works as both an importing and
exporting driver as is, that is, no special configuration is required.
Consequently, only a single module per VM is needed to enable cross-VM DMA_BUF
exchange.
------------------------------------------------------------------------------
There is a git repository at github.com where this series of patches are all
integrated in Linux kernel tree based on the commit:
commit ae64f9bd1d3621b5e60d7363bc20afb46aede215
Author: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Date: Sun Dec 3 11:01:47 2018 -0500
Linux 4.15-rc2
https://github.com/downor/linux_hyper_dmabuf.git hyper_dmabuf_integration_v4
Dongwon Kim, Mateusz Polrola (9):
hyper_dmabuf: initial upload of hyper_dmabuf drv core framework
hyper_dmabuf: architecture specification and reference guide
MAINTAINERS: adding Hyper_DMABUF driver section in MAINTAINERS
hyper_dmabuf: user private data attached to hyper_DMABUF
hyper_dmabuf: hyper_DMABUF synchronization across VM
hyper_dmabuf: query ioctl for retreiving various hyper_DMABUF info
hyper_dmabuf: event-polling mechanism for detecting a new hyper_DMABUF
hyper_dmabuf: threaded interrupt in Xen-backend
hyper_dmabuf: default backend for XEN hypervisor
Documentation/hyper-dmabuf-sharing.txt | 734 ++++++++++++++++
MAINTAINERS | 11 +
drivers/dma-buf/Kconfig | 2 +
drivers/dma-buf/Makefile | 1 +
drivers/dma-buf/hyper_dmabuf/Kconfig | 50 ++
drivers/dma-buf/hyper_dmabuf/Makefile | 44 +
.../backends/xen/hyper_dmabuf_xen_comm.c | 944 +++++++++++++++++++++
.../backends/xen/hyper_dmabuf_xen_comm.h | 78 ++
.../backends/xen/hyper_dmabuf_xen_comm_list.c | 158 ++++
.../backends/xen/hyper_dmabuf_xen_comm_list.h | 67 ++
.../backends/xen/hyper_dmabuf_xen_drv.c | 46 +
.../backends/xen/hyper_dmabuf_xen_drv.h | 53 ++
.../backends/xen/hyper_dmabuf_xen_shm.c | 525 ++++++++++++
.../backends/xen/hyper_dmabuf_xen_shm.h | 46 +
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_drv.c | 410 +++++++++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_drv.h | 122 +++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_event.c | 122 +++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_event.h | 38 +
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_id.c | 135 +++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_id.h | 53 ++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ioctl.c | 794 +++++++++++++++++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ioctl.h | 52 ++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_list.c | 295 +++++++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_list.h | 73 ++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_msg.c | 416 +++++++++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_msg.h | 89 ++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ops.c | 415 +++++++++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ops.h | 34 +
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_query.c | 174 ++++
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_query.h | 36 +
.../hyper_dmabuf/hyper_dmabuf_remote_sync.c | 324 +++++++
.../hyper_dmabuf/hyper_dmabuf_remote_sync.h | 32 +
.../dma-buf/hyper_dmabuf/hyper_dmabuf_sgl_proc.c | 257 ++++++
.../dma-buf/hyper_dmabuf/hyper_dmabuf_sgl_proc.h | 43 +
drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_struct.h | 143 ++++
include/uapi/linux/hyper_dmabuf.h | 134 +++
36 files changed, 6950 insertions(+)
create mode 100644 Documentation/hyper-dmabuf-sharing.txt
create mode 100644 drivers/dma-buf/hyper_dmabuf/Kconfig
create mode 100644 drivers/dma-buf/hyper_dmabuf/Makefile
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_comm.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_comm.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_comm_list.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_comm_list.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_drv.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_drv.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_shm.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/backends/xen/hyper_dmabuf_xen_shm.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_drv.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_drv.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_event.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_event.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_id.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_id.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ioctl.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ioctl.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_list.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_list.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_msg.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_msg.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ops.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_ops.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_query.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_query.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_remote_sync.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_remote_sync.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_sgl_proc.c
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_sgl_proc.h
create mode 100644 drivers/dma-buf/hyper_dmabuf/hyper_dmabuf_struct.h
create mode 100644 include/uapi/linux/hyper_dmabuf.h
--
2.16.1
Most of the other cross-driver gfx infrastructure (dma_buf, dma_fence)
also gets cross posted to all the relevant gfx/memory lists. Doing the
same for ION means people won't miss relevant patches.
Cc: Laura Abbott <labbott(a)redhat.com>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: devel(a)driverdev.osuosl.org
Cc: dri-devel(a)lists.freedesktop.org
Cc: linaro-mm-sig(a)lists.linaro.org (moderated for non-subscribers)
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
---
MAINTAINERS | 2 ++
1 file changed, 2 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 555db72d4eb7..d43cdfca3eb5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -902,6 +902,8 @@ ANDROID ION DRIVER
M: Laura Abbott <labbott(a)redhat.com>
M: Sumit Semwal <sumit.semwal(a)linaro.org>
L: devel(a)driverdev.osuosl.org
+L: dri-devel(a)lists.freedesktop.org
+L: linaro-mm-sig(a)lists.linaro.org (moderated for non-subscribers)
S: Supported
F: drivers/staging/android/ion
F: drivers/staging/android/uapi/ion.h
--
2.16.2
On Thu, Mar 29, 2018 at 09:58:54PM -0400, Jerome Glisse wrote:
> dma_map_resource() is the right API (thought its current implementation
> is fill with x86 assumptions). So i would argue that arch can decide to
> implement it or simply return dma error address which trigger fallback
> path into the caller (at least for GPU drivers). SG variant can be added
> on top.
It isn't in general. It doesn't integrate with scatterlists (see my
comment to page one), and it doesn't integrate with all the subsystems
that also need a kernel virtual address.