diff mbox series

[v12,3/3] drm/i915: peel dma-fence-chains wait fences

Message ID 20200708131751.334457-4-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: timeline semaphore support | expand

Commit Message

Lionel Landwerlin July 8, 2020, 1:17 p.m. UTC
To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.

v2: Also deal with chains where the last node is not a dma-fence-chain

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

Comments

Daniel Vetter July 29, 2020, 12:36 p.m. UTC | #1
On Wed, Jul 08, 2020 at 04:17:51PM +0300, Lionel Landwerlin wrote:
> To allow faster engine to engine synchronization, peel the layer of
> dma-fence-chain to expose potential i915 fences so that the
> i915-request code can emit HW semaphore wait/signal operations in the
> ring which is faster than waking up the host to submit unblocked
> workloads after interrupt notification.
> 
> v2: Also deal with chains where the last node is not a dma-fence-chain
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
>  1 file changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index d8814e637e71..3ffd95d1dc2c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -2403,6 +2403,7 @@ await_fence_array(struct i915_execbuffer *eb)
>  
>  	for (n = 0; n < eb->n_fences; n++) {
>  		struct drm_syncobj *syncobj;
> +		struct dma_fence_chain *chain;
>  		struct dma_fence *fence;
>  		unsigned int flags;
>  
> @@ -2423,7 +2424,43 @@ await_fence_array(struct i915_execbuffer *eb)
>  				continue;
>  		}
>  
> -		err = i915_request_await_dma_fence(eb->request, fence);
> +		chain = to_dma_fence_chain(fence);
> +		if (chain) {
> +			struct dma_fence *iter;
> +
> +			/*
> +			 * If we're dealing with a dma-fence-chain, peel the
> +			 * chain by adding all of the unsignaled fences
> +			 * (dma_fence_chain_for_each does that for us) the
> +			 * chain points to.
> +			 *
> +			 * This enables us to identify waits on i915 fences
> +			 * and allows for faster engine-to-engine
> +			 * synchronization using HW semaphores.
> +			 */
> +			dma_fence_chain_for_each(iter, fence) {
> +				struct dma_fence_chain *iter_chain =
> +					to_dma_fence_chain(iter);
> +
> +				/*
> +				 * It is possible that the last item in the
> +				 * chain is not a dma_fence_chain.
> +				 */
> +				if (iter_chain) {
> +					err = i915_request_await_dma_fence(eb->request,
> +									   iter_chain->fence);
> +				} else {
> +					err = i915_request_await_dma_fence(eb->request, iter);

I'm kinda wondering whether there should be a limit to how deep we go
before we just give up and wait on the chain, since all we're doing here
(in the worst case at least) is rebuilding the chain.

But hey we can figure this out later on when it actually hurts ...

On the series:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> +				}
> +				if (err < 0) {
> +					dma_fence_put(iter);
> +					break;
> +				}
> +			}
> +		} else {
> +			err = i915_request_await_dma_fence(eb->request, fence);
> +		}
> +
>  		dma_fence_put(fence);
>  		if (err < 0)
>  			return err;
> -- 
> 2.27.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index d8814e637e71..3ffd95d1dc2c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2403,6 +2403,7 @@  await_fence_array(struct i915_execbuffer *eb)
 
 	for (n = 0; n < eb->n_fences; n++) {
 		struct drm_syncobj *syncobj;
+		struct dma_fence_chain *chain;
 		struct dma_fence *fence;
 		unsigned int flags;
 
@@ -2423,7 +2424,43 @@  await_fence_array(struct i915_execbuffer *eb)
 				continue;
 		}
 
-		err = i915_request_await_dma_fence(eb->request, fence);
+		chain = to_dma_fence_chain(fence);
+		if (chain) {
+			struct dma_fence *iter;
+
+			/*
+			 * If we're dealing with a dma-fence-chain, peel the
+			 * chain by adding all of the unsignaled fences
+			 * (dma_fence_chain_for_each does that for us) the
+			 * chain points to.
+			 *
+			 * This enables us to identify waits on i915 fences
+			 * and allows for faster engine-to-engine
+			 * synchronization using HW semaphores.
+			 */
+			dma_fence_chain_for_each(iter, fence) {
+				struct dma_fence_chain *iter_chain =
+					to_dma_fence_chain(iter);
+
+				/*
+				 * It is possible that the last item in the
+				 * chain is not a dma_fence_chain.
+				 */
+				if (iter_chain) {
+					err = i915_request_await_dma_fence(eb->request,
+									   iter_chain->fence);
+				} else {
+					err = i915_request_await_dma_fence(eb->request, iter);
+				}
+				if (err < 0) {
+					dma_fence_put(iter);
+					break;
+				}
+			}
+		} else {
+			err = i915_request_await_dma_fence(eb->request, fence);
+		}
+
 		dma_fence_put(fence);
 		if (err < 0)
 			return err;