diff mbox

[30/43] drm/i915/bdw: Two-stage execlist submit process

Message ID 1406217891-8912-31-git-send-email-thomas.daniel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Daniel July 24, 2014, 4:04 p.m. UTC
From: Michel Thierry <michel.thierry@intel.com>

Context switch (and execlist submission) should happen only when
other contexts are not active, otherwise pre-emption occurs.

To assure this, we place context switch requests in a queue and those
request are later consumed when the right context switch interrupt is
received (still TODO).

v2: Use a spinlock, do not remove the requests on unqueue (wait for
context switch completion).

Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>

v3: Several rebases and code changes. Use unique ID.

v4:
- Move the queue/lock init to the late ring initialization.
- Damien's kmalloc review comments: check return, use sizeof(*req),
do not cast.

v5:
- Do not reuse drm_i915_gem_request. Instead, create our own.
- New namespace.

Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v1)
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> (v2-v5)
---
 drivers/gpu/drm/i915/intel_lrc.c        |   63 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.h        |    8 ++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |    2 +
 3 files changed, 71 insertions(+), 2 deletions(-)

Comments

Daniel Vetter Aug. 14, 2014, 8:05 p.m. UTC | #1
On Thu, Jul 24, 2014 at 05:04:38PM +0100, Thomas Daniel wrote:
> From: Michel Thierry <michel.thierry@intel.com>
> 
> Context switch (and execlist submission) should happen only when
> other contexts are not active, otherwise pre-emption occurs.
> 
> To assure this, we place context switch requests in a queue and those
> request are later consumed when the right context switch interrupt is
> received (still TODO).
> 
> v2: Use a spinlock, do not remove the requests on unqueue (wait for
> context switch completion).
> 
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> 
> v3: Several rebases and code changes. Use unique ID.
> 
> v4:
> - Move the queue/lock init to the late ring initialization.
> - Damien's kmalloc review comments: check return, use sizeof(*req),
> do not cast.
> 
> v5:
> - Do not reuse drm_i915_gem_request. Instead, create our own.
> - New namespace.
> 
> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v1)
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> (v2-v5)
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        |   63 ++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/intel_lrc.h        |    8 ++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 +
>  3 files changed, 71 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b6f416..9e91169 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -217,6 +217,63 @@ static int execlists_submit_context(struct intel_engine_cs *ring,
>  	return 0;
>  }
>  
> +static void execlists_context_unqueue(struct intel_engine_cs *ring)
> +{
> +	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
> +	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
> +
> +	if (list_empty(&ring->execlist_queue))
> +		return;
> +
> +	/* Try to read in pairs */
> +	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, execlist_link) {
> +		if (!req0)
> +			req0 = cursor;
> +		else if (req0->ctx == cursor->ctx) {
> +			/* Same ctx: ignore first request, as second request
> +			 * will update tail past first request's workload */
> +			list_del(&req0->execlist_link);
> +			i915_gem_context_unreference(req0->ctx);
> +			kfree(req0);
> +			req0 = cursor;
> +		} else {
> +			req1 = cursor;
> +			break;
> +		}
> +	}
> +
> +	BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
> +			req1? req1->ctx : NULL, req1? req1->tail : 0));

You don't get to hard-hang my driver just because you've done a
programming mistake. Please remember my commandements ;-)

Also checkpatch ...
-Daniel

> +}
> +
> +static int execlists_context_queue(struct intel_engine_cs *ring,
> +				   struct intel_context *to,
> +				   u32 tail)
> +{
> +	struct intel_ctx_submit_request *req = NULL;
> +	unsigned long flags;
> +	bool was_empty;
> +
> +	req = kzalloc(sizeof(*req), GFP_KERNEL);
> +	if (req == NULL)
> +		return -ENOMEM;
> +	req->ctx = to;
> +	i915_gem_context_reference(req->ctx);
> +	req->ring = ring;
> +	req->tail = tail;
> +
> +	spin_lock_irqsave(&ring->execlist_lock, flags);
> +
> +	was_empty = list_empty(&ring->execlist_queue);
> +	list_add_tail(&req->execlist_link, &ring->execlist_queue);
> +	if (was_empty)
> +		execlists_context_unqueue(ring);
> +
> +	spin_unlock_irqrestore(&ring->execlist_lock, flags);
> +
> +	return 0;
> +}
> +
>  static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
>  {
>  	struct intel_engine_cs *ring = ringbuf->ring;
> @@ -405,8 +462,7 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
>  	if (intel_ring_stopped(ring))
>  		return;
>  
> -	/* FIXME: too cheeky, we don't even check if the ELSP is ready */
> -	execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0);
> +	execlists_context_queue(ring, ctx, ringbuf->tail);
>  }
>  
>  static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
> @@ -850,6 +906,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>  	INIT_LIST_HEAD(&ring->request_list);
>  	init_waitqueue_head(&ring->irq_queue);
>  
> +	INIT_LIST_HEAD(&ring->execlist_queue);
> +	spin_lock_init(&ring->execlist_lock);
> +
>  	ret = intel_lr_context_deferred_create(dctx, ring);
>  	if (ret)
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index b59965b..14492a9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -60,4 +60,12 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
>  			       u64 exec_start, u32 flags);
>  u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
>  
> +struct intel_ctx_submit_request {
> +	struct intel_context *ctx;
> +	struct intel_engine_cs *ring;
> +	u32 tail;
> +
> +	struct list_head execlist_link;
> +};
> +
>  #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c885d5c..6358823 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -223,6 +223,8 @@ struct  intel_engine_cs {
>  	} semaphore;
>  
>  	/* Execlists */
> +	spinlock_t execlist_lock;
> +	struct list_head execlist_queue;
>  	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
>  	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
>  	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Aug. 14, 2014, 8:10 p.m. UTC | #2
On Thu, Jul 24, 2014 at 05:04:38PM +0100, Thomas Daniel wrote:
> From: Michel Thierry <michel.thierry@intel.com>
> 
> Context switch (and execlist submission) should happen only when
> other contexts are not active, otherwise pre-emption occurs.
> 
> To assure this, we place context switch requests in a queue and those
> request are later consumed when the right context switch interrupt is
> received (still TODO).
> 
> v2: Use a spinlock, do not remove the requests on unqueue (wait for
> context switch completion).
> 
> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> 
> v3: Several rebases and code changes. Use unique ID.
> 
> v4:
> - Move the queue/lock init to the late ring initialization.
> - Damien's kmalloc review comments: check return, use sizeof(*req),
> do not cast.
> 
> v5:
> - Do not reuse drm_i915_gem_request. Instead, create our own.
> - New namespace.
> 
> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v1)
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> (v2-v5)
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        |   63 ++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/intel_lrc.h        |    8 ++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 +
>  3 files changed, 71 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b6f416..9e91169 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -217,6 +217,63 @@ static int execlists_submit_context(struct intel_engine_cs *ring,
>  	return 0;
>  }
>  
> +static void execlists_context_unqueue(struct intel_engine_cs *ring)
> +{
> +	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
> +	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
> +
> +	if (list_empty(&ring->execlist_queue))
> +		return;
> +
> +	/* Try to read in pairs */
> +	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, execlist_link) {

Ok, because checkpatch I've looked at this. Imo open-coding this would be
much easier to read i.e.

	if (!list_empty)
		grab&remove first item;
	if (!list_empty)
		grab&remove 2nd item;

Care to follow up with a patch for that?

Thanks, Daniel

> +		if (!req0)
> +			req0 = cursor;
> +		else if (req0->ctx == cursor->ctx) {
> +			/* Same ctx: ignore first request, as second request
> +			 * will update tail past first request's workload */
> +			list_del(&req0->execlist_link);
> +			i915_gem_context_unreference(req0->ctx);
> +			kfree(req0);
> +			req0 = cursor;
> +		} else {
> +			req1 = cursor;
> +			break;
> +		}
> +	}
> +
> +	BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
> +			req1? req1->ctx : NULL, req1? req1->tail : 0));
> +}
> +
> +static int execlists_context_queue(struct intel_engine_cs *ring,
> +				   struct intel_context *to,
> +				   u32 tail)
> +{
> +	struct intel_ctx_submit_request *req = NULL;
> +	unsigned long flags;
> +	bool was_empty;
> +
> +	req = kzalloc(sizeof(*req), GFP_KERNEL);
> +	if (req == NULL)
> +		return -ENOMEM;
> +	req->ctx = to;
> +	i915_gem_context_reference(req->ctx);
> +	req->ring = ring;
> +	req->tail = tail;
> +
> +	spin_lock_irqsave(&ring->execlist_lock, flags);
> +
> +	was_empty = list_empty(&ring->execlist_queue);
> +	list_add_tail(&req->execlist_link, &ring->execlist_queue);
> +	if (was_empty)
> +		execlists_context_unqueue(ring);
> +
> +	spin_unlock_irqrestore(&ring->execlist_lock, flags);
> +
> +	return 0;
> +}
> +
>  static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
>  {
>  	struct intel_engine_cs *ring = ringbuf->ring;
> @@ -405,8 +462,7 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
>  	if (intel_ring_stopped(ring))
>  		return;
>  
> -	/* FIXME: too cheeky, we don't even check if the ELSP is ready */
> -	execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0);
> +	execlists_context_queue(ring, ctx, ringbuf->tail);
>  }
>  
>  static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
> @@ -850,6 +906,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
>  	INIT_LIST_HEAD(&ring->request_list);
>  	init_waitqueue_head(&ring->irq_queue);
>  
> +	INIT_LIST_HEAD(&ring->execlist_queue);
> +	spin_lock_init(&ring->execlist_lock);
> +
>  	ret = intel_lr_context_deferred_create(dctx, ring);
>  	if (ret)
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index b59965b..14492a9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -60,4 +60,12 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
>  			       u64 exec_start, u32 flags);
>  u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
>  
> +struct intel_ctx_submit_request {
> +	struct intel_context *ctx;
> +	struct intel_engine_cs *ring;
> +	u32 tail;
> +
> +	struct list_head execlist_link;
> +};
> +
>  #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c885d5c..6358823 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -223,6 +223,8 @@ struct  intel_engine_cs {
>  	} semaphore;
>  
>  	/* Execlists */
> +	spinlock_t execlist_lock;
> +	struct list_head execlist_queue;
>  	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
>  	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
>  	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Thomas Daniel Aug. 15, 2014, 8:51 a.m. UTC | #3
> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Thursday, August 14, 2014 9:10 PM
> To: Daniel, Thomas
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH 30/43] drm/i915/bdw: Two-stage execlist
> submit process
> 
> On Thu, Jul 24, 2014 at 05:04:38PM +0100, Thomas Daniel wrote:
> > From: Michel Thierry <michel.thierry@intel.com>
> >
> > Context switch (and execlist submission) should happen only when other
> > contexts are not active, otherwise pre-emption occurs.
> >
> > To assure this, we place context switch requests in a queue and those
> > request are later consumed when the right context switch interrupt is
> > received (still TODO).
> >
> > v2: Use a spinlock, do not remove the requests on unqueue (wait for
> > context switch completion).
> >
> > Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
> >
> > v3: Several rebases and code changes. Use unique ID.
> >
> > v4:
> > - Move the queue/lock init to the late ring initialization.
> > - Damien's kmalloc review comments: check return, use sizeof(*req), do
> > not cast.
> >
> > v5:
> > - Do not reuse drm_i915_gem_request. Instead, create our own.
> > - New namespace.
> >
> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v1)
> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> (v2-v5)
> > ---
> >  drivers/gpu/drm/i915/intel_lrc.c        |   63
> ++++++++++++++++++++++++++++++-
> >  drivers/gpu/drm/i915/intel_lrc.h        |    8 ++++
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 +
> >  3 files changed, 71 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> > b/drivers/gpu/drm/i915/intel_lrc.c
> > index 5b6f416..9e91169 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -217,6 +217,63 @@ static int execlists_submit_context(struct
> intel_engine_cs *ring,
> >  	return 0;
> >  }
> >
> > +static void execlists_context_unqueue(struct intel_engine_cs *ring) {
> > +	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
> > +	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
> > +
> > +	if (list_empty(&ring->execlist_queue))
> > +		return;
> > +
> > +	/* Try to read in pairs */
> > +	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
> > +execlist_link) {
> 
> Ok, because checkpatch I've looked at this. Imo open-coding this would be
> much easier to read i.e.
> 
> 	if (!list_empty)
> 		grab&remove first item;
> 	if (!list_empty)
> 		grab&remove 2nd item;
> 
> Care to follow up with a patch for that?
> 
> Thanks, Daniel
This needs to be kept as a loop because if there are two consecutive
requests for the same context they are squashed.  Also the non-squashed
requests are not removed here (unfortunately the remove is in the next
patch).

> 
> > +		if (!req0)
> > +			req0 = cursor;
> > +		else if (req0->ctx == cursor->ctx) {
This:

> > +			/* Same ctx: ignore first request, as second request
> > +			 * will update tail past first request's workload */
> > +			list_del(&req0->execlist_link);
> > +			i915_gem_context_unreference(req0->ctx);
> > +			kfree(req0);
> > +			req0 = cursor;
> > +		} else {
> > +			req1 = cursor;
> > +			break;
> > +		}
> > +	}
> > +
> > +	BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
> > +			req1? req1->ctx : NULL, req1? req1->tail : 0)); }
> > +
> > +static int execlists_context_queue(struct intel_engine_cs *ring,
> > +				   struct intel_context *to,
> > +				   u32 tail)
> > +{
> > +	struct intel_ctx_submit_request *req = NULL;
> > +	unsigned long flags;
> > +	bool was_empty;
> > +
> > +	req = kzalloc(sizeof(*req), GFP_KERNEL);
> > +	if (req == NULL)
> > +		return -ENOMEM;
> > +	req->ctx = to;
> > +	i915_gem_context_reference(req->ctx);
> > +	req->ring = ring;
> > +	req->tail = tail;
> > +
> > +	spin_lock_irqsave(&ring->execlist_lock, flags);
> > +
> > +	was_empty = list_empty(&ring->execlist_queue);
> > +	list_add_tail(&req->execlist_link, &ring->execlist_queue);
> > +	if (was_empty)
> > +		execlists_context_unqueue(ring);
> > +
> > +	spin_unlock_irqrestore(&ring->execlist_lock, flags);
> > +
> > +	return 0;
> > +}
> > +
> >  static int logical_ring_invalidate_all_caches(struct intel_ringbuffer
> > *ringbuf)  {
> >  	struct intel_engine_cs *ring = ringbuf->ring; @@ -405,8 +462,7 @@
> > void intel_logical_ring_advance_and_submit(struct intel_ringbuffer
> *ringbuf)
> >  	if (intel_ring_stopped(ring))
> >  		return;
> >
> > -	/* FIXME: too cheeky, we don't even check if the ELSP is ready */
> > -	execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0);
> > +	execlists_context_queue(ring, ctx, ringbuf->tail);
> >  }
> >
> >  static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, @@
> > -850,6 +906,9 @@ static int logical_ring_init(struct drm_device *dev, struct
> intel_engine_cs *rin
> >  	INIT_LIST_HEAD(&ring->request_list);
> >  	init_waitqueue_head(&ring->irq_queue);
> >
> > +	INIT_LIST_HEAD(&ring->execlist_queue);
> > +	spin_lock_init(&ring->execlist_lock);
> > +
> >  	ret = intel_lr_context_deferred_create(dctx, ring);
> >  	if (ret)
> >  		return ret;
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.h
> > b/drivers/gpu/drm/i915/intel_lrc.h
> > index b59965b..14492a9 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.h
> > +++ b/drivers/gpu/drm/i915/intel_lrc.h
> > @@ -60,4 +60,12 @@ int intel_execlists_submission(struct drm_device
> *dev, struct drm_file *file,
> >  			       u64 exec_start, u32 flags);
> >  u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
> >
> > +struct intel_ctx_submit_request {
> > +	struct intel_context *ctx;
> > +	struct intel_engine_cs *ring;
> > +	u32 tail;
> > +
> > +	struct list_head execlist_link;
> > +};
> > +
> >  #endif /* _INTEL_LRC_H_ */
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index c885d5c..6358823 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -223,6 +223,8 @@ struct  intel_engine_cs {
> >  	} semaphore;
> >
> >  	/* Execlists */
> > +	spinlock_t execlist_lock;
> > +	struct list_head execlist_queue;
> >  	u32             irq_keep_mask; /* bitmask for interrupts that should not
> be masked */
> >  	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
> >  	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
> > --
> > 1.7.9.5
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
Daniel Vetter Aug. 15, 2014, 9:38 a.m. UTC | #4
On Fri, Aug 15, 2014 at 08:51:22AM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Thursday, August 14, 2014 9:10 PM
> > To: Daniel, Thomas
> > Cc: intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH 30/43] drm/i915/bdw: Two-stage execlist
> > submit process
> > On Thu, Jul 24, 2014 at 05:04:38PM +0100, Thomas Daniel wrote:
> > > From: Michel Thierry <michel.thierry@intel.com>
> > > +static void execlists_context_unqueue(struct intel_engine_cs *ring) {
> > > +	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
> > > +	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
> > > +
> > > +	if (list_empty(&ring->execlist_queue))
> > > +		return;
> > > +
> > > +	/* Try to read in pairs */
> > > +	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
> > > +execlist_link) {
> > 
> > Ok, because checkpatch I've looked at this. Imo open-coding this would be
> > much easier to read i.e.
> > 
> > 	if (!list_empty)
> > 		grab&remove first item;
> > 	if (!list_empty)
> > 		grab&remove 2nd item;
> > 
> > Care to follow up with a patch for that?
> > 
> > Thanks, Daniel
> This needs to be kept as a loop because if there are two consecutive
> requests for the same context they are squashed.  Also the non-squashed
> requests are not removed here (unfortunately the remove is in the next
> patch).

Ok, this sounds like we need to overhaul it anyway for the request
tracking then.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5b6f416..9e91169 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -217,6 +217,63 @@  static int execlists_submit_context(struct intel_engine_cs *ring,
 	return 0;
 }
 
+static void execlists_context_unqueue(struct intel_engine_cs *ring)
+{
+	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
+	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
+
+	if (list_empty(&ring->execlist_queue))
+		return;
+
+	/* Try to read in pairs */
+	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, execlist_link) {
+		if (!req0)
+			req0 = cursor;
+		else if (req0->ctx == cursor->ctx) {
+			/* Same ctx: ignore first request, as second request
+			 * will update tail past first request's workload */
+			list_del(&req0->execlist_link);
+			i915_gem_context_unreference(req0->ctx);
+			kfree(req0);
+			req0 = cursor;
+		} else {
+			req1 = cursor;
+			break;
+		}
+	}
+
+	BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
+			req1? req1->ctx : NULL, req1? req1->tail : 0));
+}
+
+static int execlists_context_queue(struct intel_engine_cs *ring,
+				   struct intel_context *to,
+				   u32 tail)
+{
+	struct intel_ctx_submit_request *req = NULL;
+	unsigned long flags;
+	bool was_empty;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (req == NULL)
+		return -ENOMEM;
+	req->ctx = to;
+	i915_gem_context_reference(req->ctx);
+	req->ring = ring;
+	req->tail = tail;
+
+	spin_lock_irqsave(&ring->execlist_lock, flags);
+
+	was_empty = list_empty(&ring->execlist_queue);
+	list_add_tail(&req->execlist_link, &ring->execlist_queue);
+	if (was_empty)
+		execlists_context_unqueue(ring);
+
+	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+	return 0;
+}
+
 static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
@@ -405,8 +462,7 @@  void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
 	if (intel_ring_stopped(ring))
 		return;
 
-	/* FIXME: too cheeky, we don't even check if the ELSP is ready */
-	execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0);
+	execlists_context_queue(ring, ctx, ringbuf->tail);
 }
 
 static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
@@ -850,6 +906,9 @@  static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	INIT_LIST_HEAD(&ring->request_list);
 	init_waitqueue_head(&ring->irq_queue);
 
+	INIT_LIST_HEAD(&ring->execlist_queue);
+	spin_lock_init(&ring->execlist_lock);
+
 	ret = intel_lr_context_deferred_create(dctx, ring);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index b59965b..14492a9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -60,4 +60,12 @@  int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 			       u64 exec_start, u32 flags);
 u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
+struct intel_ctx_submit_request {
+	struct intel_context *ctx;
+	struct intel_engine_cs *ring;
+	u32 tail;
+
+	struct list_head execlist_link;
+};
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c885d5c..6358823 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -223,6 +223,8 @@  struct  intel_engine_cs {
 	} semaphore;
 
 	/* Execlists */
+	spinlock_t execlist_lock;
+	struct list_head execlist_queue;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
 	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,