diff mbox

[RFC,03/44] drm/i915: Add extra add_request calls

Message ID 1403803475-16337-4-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison June 26, 2014, 5:23 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The scheduler needs to track batch buffers by seqno without extra, non-batch
buffer work being attached to the same seqno. This means that anywhere which
adds work to the ring should explicitly call i915_add_request() when it has
finished writing to the ring.

The add_request() function does extra work, such as flushing caches, that does
not necessarily want to be done everywhere. Instead, a new
i915_add_request_wo_flush() function has been added which skips the cache flush
and just tidies up request structures and seqno values.

Note, much of this patch was implemented by Naresh Kumar Kachhi for pending
power management improvements. However, it is also directly applicable to the
scheduler work as noted above.
---
 drivers/gpu/drm/i915/i915_dma.c              |    5 +++++
 drivers/gpu/drm/i915/i915_drv.h              |    9 +++++---
 drivers/gpu/drm/i915/i915_gem.c              |   31 ++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_context.c      |    9 ++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |    4 ++--
 drivers/gpu/drm/i915/i915_gem_render_state.c |    2 +-
 drivers/gpu/drm/i915/intel_display.c         |   10 ++++-----
 7 files changed, 52 insertions(+), 18 deletions(-)

Comments

Jesse Barnes June 30, 2014, 9:10 p.m. UTC | #1
On Thu, 26 Jun 2014 18:23:54 +0100
John.C.Harrison@Intel.com wrote:

> From: John Harrison <John.C.Harrison@Intel.com>
> 
> The scheduler needs to track batch buffers by seqno without extra, non-batch
> buffer work being attached to the same seqno. This means that anywhere which
> adds work to the ring should explicitly call i915_add_request() when it has
> finished writing to the ring.
> 
> The add_request() function does extra work, such as flushing caches, that does
> not necessarily want to be done everywhere. Instead, a new
> i915_add_request_wo_flush() function has been added which skips the cache flush
> and just tidies up request structures and seqno values.
> 
> Note, much of this patch was implemented by Naresh Kumar Kachhi for pending
> power management improvements. However, it is also directly applicable to the
> scheduler work as noted above.
> ---
>  drivers/gpu/drm/i915/i915_dma.c              |    5 +++++
>  drivers/gpu/drm/i915/i915_drv.h              |    9 +++++---
>  drivers/gpu/drm/i915/i915_gem.c              |   31 ++++++++++++++++++++------
>  drivers/gpu/drm/i915/i915_gem_context.c      |    9 ++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |    4 ++--
>  drivers/gpu/drm/i915/i915_gem_render_state.c |    2 +-
>  drivers/gpu/drm/i915/intel_display.c         |   10 ++++-----
>  7 files changed, 52 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 67f2918..494b156 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -456,6 +456,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev,
>  				   struct drm_clip_rect *cliprects,
>  				   void *cmdbuf)
>  {
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int nbox = cmd->num_cliprects;
>  	int i = 0, count, ret;
>  
> @@ -482,6 +483,7 @@ static int i915_dispatch_cmdbuffer(struct drm_device * dev,
>  	}
>  
>  	i915_emit_breadcrumb(dev);
> +	i915_add_request_wo_flush(LP_RING(dev_priv));
>  	return 0;
>  }
>  
> @@ -544,6 +546,7 @@ static int i915_dispatch_batchbuffer(struct drm_device * dev,
>  	}
>  
>  	i915_emit_breadcrumb(dev);
> +	i915_add_request_wo_flush(LP_RING(dev_priv));
>  	return 0;
>  }
>  
> @@ -597,6 +600,7 @@ static int i915_dispatch_flip(struct drm_device * dev)
>  		ADVANCE_LP_RING();
>  	}
>  
> +	i915_add_request_wo_flush(LP_RING(dev_priv));
>  	master_priv->sarea_priv->pf_current_page = dev_priv->dri1.current_page;
>  	return 0;
>  }
> @@ -774,6 +778,7 @@ static int i915_emit_irq(struct drm_device * dev)
>  		OUT_RING(dev_priv->dri1.counter);
>  		OUT_RING(MI_USER_INTERRUPT);
>  		ADVANCE_LP_RING();
> +		i915_add_request_wo_flush(LP_RING(dev_priv));
>  	}
>  
>  	return dev_priv->dri1.counter;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7a96ca0..e3295cb 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2199,7 +2199,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>  
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -			 struct intel_engine_cs *to);
> +			 struct intel_engine_cs *to, bool add_request);
>  void i915_vma_move_to_active(struct i915_vma *vma,
>  			     struct intel_engine_cs *ring);
>  int i915_gem_dumb_create(struct drm_file *file_priv,
> @@ -2272,9 +2272,12 @@ int __must_check i915_gem_suspend(struct drm_device *dev);
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
>  		       struct drm_i915_gem_object *batch_obj,
> -		       u32 *seqno);
> +		       u32 *seqno,
> +		       bool flush_caches);
>  #define i915_add_request(ring, seqno) \
> -	__i915_add_request(ring, NULL, NULL, seqno)
> +	__i915_add_request(ring, NULL, NULL, seqno, true)
> +#define i915_add_request_wo_flush(ring) \
> +	__i915_add_request(ring, NULL, NULL, NULL, false)
>  int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
>  				 uint32_t seqno);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 5a13d9e..898660c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2320,7 +2320,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  int __i915_add_request(struct intel_engine_cs *ring,
>  		       struct drm_file *file,
>  		       struct drm_i915_gem_object *obj,
> -		       u32 *out_seqno)
> +		       u32 *out_seqno,
> +		       bool flush_caches)
>  {
>  	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>  	struct drm_i915_gem_request *request;
> @@ -2335,9 +2336,11 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  	 * is that the flush _must_ happen before the next request, no matter
>  	 * what.
>  	 */
> -	ret = intel_ring_flush_all_caches(ring);
> -	if (ret)
> -		return ret;
> +	if (flush_caches) {
> +		ret = intel_ring_flush_all_caches(ring);
> +		if (ret)
> +			return ret;
> +	}
>  
>  	request = ring->preallocated_lazy_request;
>  	if (WARN_ON(request == NULL))
> @@ -2815,6 +2818,8 @@ out:
>   *
>   * @obj: object which may be in use on another ring.
>   * @to: ring we wish to use the object on. May be NULL.
> + * @add_request: do we need to add a request to track operations
> + *    submitted on ring with sync_to function
>   *
>   * This code is meant to abstract object synchronization with the GPU.
>   * Calling with NULL implies synchronizing the object with the CPU
> @@ -2824,7 +2829,7 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to)
> +		     struct intel_engine_cs *to, bool add_request)
>  {
>  	struct intel_engine_cs *from = obj->ring;
>  	u32 seqno;
> @@ -2848,12 +2853,15 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  
>  	trace_i915_gem_ring_sync_to(from, to, seqno);
>  	ret = to->semaphore.sync_to(to, from, seqno);
> -	if (!ret)
> +	if (!ret) {
>  		/* We use last_read_seqno because sync_to()
>  		 * might have just caused seqno wrap under
>  		 * the radar.
>  		 */
>  		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
> +		if (add_request)
> +			i915_add_request_wo_flush(to);
> +	}
>  
>  	return ret;
>  }
> @@ -2958,6 +2966,15 @@ int i915_gpu_idle(struct drm_device *dev)
>  		if (ret)
>  			return ret;
>  
> +		/* Make sure the context switch (if one actually happened)
> +		 * gets wrapped up and finished rather than hanging around
> +		 * and confusing things later. */
> +		if (ring->outstanding_lazy_seqno) {
> +			ret = i915_add_request(ring, NULL);
> +			if (ret)
> +				return ret;
> +		}
> +
>  		ret = intel_ring_idle(ring);
>  		if (ret)
>  			return ret;
> @@ -3832,7 +3849,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	int ret;
>  
>  	if (pipelined != obj->ring) {
> -		ret = i915_gem_object_sync(obj, pipelined);
> +		ret = i915_gem_object_sync(obj, pipelined, true);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3ffe308..d1d2ee0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -488,6 +488,15 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
>  		ret = i915_switch_context(ring, ring->default_context);
>  		if (ret)
>  			return ret;
> +
> +		/* Make sure the context switch (if one actually happened)
> +		 * gets wrapped up and finished rather than hanging around
> +		 * and confusing things later. */
> +		if(ring->outstanding_lazy_seqno) {
> +			ret = i915_add_request_wo_flush(ring);
> +			if (ret)
> +				return ret;
> +		}
>  	}
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3a30133..ee836a6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -858,7 +858,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
>  
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
> -		ret = i915_gem_object_sync(obj, ring);
> +		ret = i915_gem_object_sync(obj, ring, false);
>  		if (ret)
>  			return ret;
>  
> @@ -998,7 +998,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  	ring->gpu_caches_dirty = true;
>  
>  	/* Add a breadcrumb for the completion of the batch buffer */
> -	(void)__i915_add_request(ring, file, obj, NULL);
> +	(void)__i915_add_request(ring, file, obj, NULL, true);
>  }
>  
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 3521f99..50118cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -190,7 +190,7 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>  
>  	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so->obj), ring);
>  
> -	ret = __i915_add_request(ring, NULL, so->obj, NULL);
> +	ret = __i915_add_request(ring, NULL, so->obj, NULL, true);
>  	/* __i915_add_request moves object to inactive if it fails */
>  out:
>  	render_state_free(so);
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 54095d4..fa1ffbb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -8980,7 +8980,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, 0); /* aux display base address, unused */
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	__intel_ring_advance(ring);
> +	i915_add_request_wo_flush(ring);
>  	return 0;
>  }
>  
> @@ -9012,7 +9012,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, MI_NOOP);
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	__intel_ring_advance(ring);
> +	i915_add_request_wo_flush(ring);
>  	return 0;
>  }
>  
> @@ -9051,7 +9051,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, pf | pipesrc);
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	__intel_ring_advance(ring);
> +	i915_add_request_wo_flush(ring);
>  	return 0;
>  }
>  
> @@ -9087,7 +9087,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, pf | pipesrc);
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	__intel_ring_advance(ring);
> +	i915_add_request_wo_flush(ring);
>  	return 0;
>  }
>  
> @@ -9182,7 +9182,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, (MI_NOOP));
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	__intel_ring_advance(ring);
> +	i915_add_request_wo_flush(ring);
>  	return 0;
>  }
>  

I think "no_flush" would be more in line with some of the other
functions in the kernel.  "wo" makes me think of "write only".  But
it's not a big deal.

I do wonder about the rules for when add_request is needed though, and
I need to look later in the series for the usage.  When I looked at it
in relation to fences, it didn't seem to be a good fit since it looked
like requests got freed when the active list was cleared, vs when they
were actually consumed by some user.

But this patch seems straightforward enough, so:

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Daniel Vetter July 7, 2014, 6:41 p.m. UTC | #2
On Mon, Jun 30, 2014 at 02:10:16PM -0700, Jesse Barnes wrote:
> On Thu, 26 Jun 2014 18:23:54 +0100
> John.C.Harrison@Intel.com wrote:
> I think "no_flush" would be more in line with some of the other
> functions in the kernel.  "wo" makes me think of "write only".  But
> it's not a big deal.
> 
> I do wonder about the rules for when add_request is needed though, and
> I need to look later in the series for the usage.  When I looked at it
> in relation to fences, it didn't seem to be a good fit since it looked
> like requests got freed when the active list was cleared, vs when they
> were actually consumed by some user.

Yeah, wo_flush is highly confusing while no_flush is rather clear. There's
also the question of how this all will interfere with execlists since
those patches also have the need to keep track of stuff, but slightly
different.

I'll go through your rfc for some light reading but I think we should
settle execlists first before proceeding with the schedule in earnest.
-Daniel
Chris Wilson July 8, 2014, 7:44 a.m. UTC | #3
On Mon, Jul 07, 2014 at 08:41:47PM +0200, Daniel Vetter wrote:
> On Mon, Jun 30, 2014 at 02:10:16PM -0700, Jesse Barnes wrote:
> > On Thu, 26 Jun 2014 18:23:54 +0100
> > John.C.Harrison@Intel.com wrote:
> > I think "no_flush" would be more in line with some of the other
> > functions in the kernel.  "wo" makes me think of "write only".  But
> > it's not a big deal.
> > 
> > I do wonder about the rules for when add_request is needed though, and
> > I need to look later in the series for the usage.  When I looked at it
> > in relation to fences, it didn't seem to be a good fit since it looked
> > like requests got freed when the active list was cleared, vs when they
> > were actually consumed by some user.
> 
> Yeah, wo_flush is highly confusing while no_flush is rather clear. There's
> also the question of how this all will interfere with execlists since
> those patches also have the need to keep track of stuff, but slightly
> different.
> 
> I'll go through your rfc for some light reading but I think we should
> settle execlists first before proceeding with the schedule in earnest.

On top of these extra requests, it is time to worry about read-read
optimisations. I would like for busy_ioctl to tell me that a flip is
pending on a particular pipe (though that probably requires extending
the ioctl to pass back separate busy/write/read rings) and at that point
I start to worry about undue synchronisation. That seems fitting for a
request overhaul.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 67f2918..494b156 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -456,6 +456,7 @@  static int i915_dispatch_cmdbuffer(struct drm_device * dev,
 				   struct drm_clip_rect *cliprects,
 				   void *cmdbuf)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	int nbox = cmd->num_cliprects;
 	int i = 0, count, ret;
 
@@ -482,6 +483,7 @@  static int i915_dispatch_cmdbuffer(struct drm_device * dev,
 	}
 
 	i915_emit_breadcrumb(dev);
+	i915_add_request_wo_flush(LP_RING(dev_priv));
 	return 0;
 }
 
@@ -544,6 +546,7 @@  static int i915_dispatch_batchbuffer(struct drm_device * dev,
 	}
 
 	i915_emit_breadcrumb(dev);
+	i915_add_request_wo_flush(LP_RING(dev_priv));
 	return 0;
 }
 
@@ -597,6 +600,7 @@  static int i915_dispatch_flip(struct drm_device * dev)
 		ADVANCE_LP_RING();
 	}
 
+	i915_add_request_wo_flush(LP_RING(dev_priv));
 	master_priv->sarea_priv->pf_current_page = dev_priv->dri1.current_page;
 	return 0;
 }
@@ -774,6 +778,7 @@  static int i915_emit_irq(struct drm_device * dev)
 		OUT_RING(dev_priv->dri1.counter);
 		OUT_RING(MI_USER_INTERRUPT);
 		ADVANCE_LP_RING();
+		i915_add_request_wo_flush(LP_RING(dev_priv));
 	}
 
 	return dev_priv->dri1.counter;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7a96ca0..e3295cb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2199,7 +2199,7 @@  static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to);
+			 struct intel_engine_cs *to, bool add_request);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct intel_engine_cs *ring);
 int i915_gem_dumb_create(struct drm_file *file_priv,
@@ -2272,9 +2272,12 @@  int __must_check i915_gem_suspend(struct drm_device *dev);
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *batch_obj,
-		       u32 *seqno);
+		       u32 *seqno,
+		       bool flush_caches);
 #define i915_add_request(ring, seqno) \
-	__i915_add_request(ring, NULL, NULL, seqno)
+	__i915_add_request(ring, NULL, NULL, seqno, true)
+#define i915_add_request_wo_flush(ring) \
+	__i915_add_request(ring, NULL, NULL, NULL, false)
 int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
 				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5a13d9e..898660c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2320,7 +2320,8 @@  i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
 		       struct drm_i915_gem_object *obj,
-		       u32 *out_seqno)
+		       u32 *out_seqno,
+		       bool flush_caches)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_request *request;
@@ -2335,9 +2336,11 @@  int __i915_add_request(struct intel_engine_cs *ring,
 	 * is that the flush _must_ happen before the next request, no matter
 	 * what.
 	 */
-	ret = intel_ring_flush_all_caches(ring);
-	if (ret)
-		return ret;
+	if (flush_caches) {
+		ret = intel_ring_flush_all_caches(ring);
+		if (ret)
+			return ret;
+	}
 
 	request = ring->preallocated_lazy_request;
 	if (WARN_ON(request == NULL))
@@ -2815,6 +2818,8 @@  out:
  *
  * @obj: object which may be in use on another ring.
  * @to: ring we wish to use the object on. May be NULL.
+ * @add_request: do we need to add a request to track operations
+ *    submitted on ring with sync_to function
  *
  * This code is meant to abstract object synchronization with the GPU.
  * Calling with NULL implies synchronizing the object with the CPU
@@ -2824,7 +2829,7 @@  out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to)
+		     struct intel_engine_cs *to, bool add_request)
 {
 	struct intel_engine_cs *from = obj->ring;
 	u32 seqno;
@@ -2848,12 +2853,15 @@  i915_gem_object_sync(struct drm_i915_gem_object *obj,
 
 	trace_i915_gem_ring_sync_to(from, to, seqno);
 	ret = to->semaphore.sync_to(to, from, seqno);
-	if (!ret)
+	if (!ret) {
 		/* We use last_read_seqno because sync_to()
 		 * might have just caused seqno wrap under
 		 * the radar.
 		 */
 		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
+		if (add_request)
+			i915_add_request_wo_flush(to);
+	}
 
 	return ret;
 }
@@ -2958,6 +2966,15 @@  int i915_gpu_idle(struct drm_device *dev)
 		if (ret)
 			return ret;
 
+		/* Make sure the context switch (if one actually happened)
+		 * gets wrapped up and finished rather than hanging around
+		 * and confusing things later. */
+		if (ring->outstanding_lazy_seqno) {
+			ret = i915_add_request(ring, NULL);
+			if (ret)
+				return ret;
+		}
+
 		ret = intel_ring_idle(ring);
 		if (ret)
 			return ret;
@@ -3832,7 +3849,7 @@  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	int ret;
 
 	if (pipelined != obj->ring) {
-		ret = i915_gem_object_sync(obj, pipelined);
+		ret = i915_gem_object_sync(obj, pipelined, true);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3ffe308..d1d2ee0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -488,6 +488,15 @@  int i915_gem_context_enable(struct drm_i915_private *dev_priv)
 		ret = i915_switch_context(ring, ring->default_context);
 		if (ret)
 			return ret;
+
+		/* Make sure the context switch (if one actually happened)
+		 * gets wrapped up and finished rather than hanging around
+		 * and confusing things later. */
+		if(ring->outstanding_lazy_seqno) {
+			ret = i915_add_request_wo_flush(ring);
+			if (ret)
+				return ret;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3a30133..ee836a6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -858,7 +858,7 @@  i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring);
+		ret = i915_gem_object_sync(obj, ring, false);
 		if (ret)
 			return ret;
 
@@ -998,7 +998,7 @@  i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, NULL);
+	(void)__i915_add_request(ring, file, obj, NULL, true);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 3521f99..50118cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -190,7 +190,7 @@  int i915_gem_render_state_init(struct intel_engine_cs *ring)
 
 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so->obj), ring);
 
-	ret = __i915_add_request(ring, NULL, so->obj, NULL);
+	ret = __i915_add_request(ring, NULL, so->obj, NULL, true);
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	render_state_free(so);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 54095d4..fa1ffbb 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8980,7 +8980,7 @@  static int intel_gen2_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
 
 	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(ring);
+	i915_add_request_wo_flush(ring);
 	return 0;
 }
 
@@ -9012,7 +9012,7 @@  static int intel_gen3_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, MI_NOOP);
 
 	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(ring);
+	i915_add_request_wo_flush(ring);
 	return 0;
 }
 
@@ -9051,7 +9051,7 @@  static int intel_gen4_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, pf | pipesrc);
 
 	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(ring);
+	i915_add_request_wo_flush(ring);
 	return 0;
 }
 
@@ -9087,7 +9087,7 @@  static int intel_gen6_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, pf | pipesrc);
 
 	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(ring);
+	i915_add_request_wo_flush(ring);
 	return 0;
 }
 
@@ -9182,7 +9182,7 @@  static int intel_gen7_queue_flip(struct drm_device *dev,
 	intel_ring_emit(ring, (MI_NOOP));
 
 	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(ring);
+	i915_add_request_wo_flush(ring);
 	return 0;
 }