diff mbox

[RFC,44/44] drm/i915: Fake batch support for page flips

Message ID 1403803475-16337-45-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison June 26, 2014, 5:24 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

Any commands written to the ring without the scheduler's knowledge can get lost
during a pre-emption event. This checkin updates the page flip code to send the
ring commands via the scheduler's 'fake batch' interface. Thus the page flip is
kept safe from being clobbered.
---
 drivers/gpu/drm/i915/intel_display.c |   84 ++++++++++++++++------------------
 1 file changed, 40 insertions(+), 44 deletions(-)

Comments

Daniel Vetter July 7, 2014, 7:25 p.m. UTC | #1
On Thu, Jun 26, 2014 at 06:24:35PM +0100, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> Any commands written to the ring without the scheduler's knowledge can get lost
> during a pre-emption event. This checkin updates the page flip code to send the
> ring commands via the scheduler's 'fake batch' interface. Thus the page flip is
> kept safe from being clobbered.

Same comment as with the execlist series: Can't we just use mmio flips
instead? We could just restrict the scheduler to more recent platforms if
mmio flips aren't available on all platforms ...
-Daniel
> ---
>  drivers/gpu/drm/i915/intel_display.c |   84 ++++++++++++++++------------------
>  1 file changed, 40 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index fa1ffbb..8bbc5d3 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9099,8 +9099,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  				 uint32_t flags)
>  {
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	uint32_t plane_bit = 0;
> -	int len, ret;
> +	uint32_t plane_bit = 0, sched_flags;
> +	int ret;
>  
>  	switch (intel_crtc->plane) {
>  	case PLANE_A:
> @@ -9117,18 +9117,6 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  		return -ENODEV;
>  	}
>  
> -	len = 4;
> -	if (ring->id == RCS) {
> -		len += 6;
> -		/*
> -		 * On Gen 8, SRM is now taking an extra dword to accommodate
> -		 * 48bits addresses, and we need a NOOP for the batch size to
> -		 * stay even.
> -		 */
> -		if (IS_GEN8(dev))
> -			len += 2;
> -	}
> -
>  	/*
>  	 * BSpec MI_DISPLAY_FLIP for IVB:
>  	 * "The full packet must be contained within the same cache line."
> @@ -9139,13 +9127,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	 * then do the cacheline alignment, and finally emit the
>  	 * MI_DISPLAY_FLIP.
>  	 */
> -	ret = intel_ring_cacheline_align(ring);
> -	if (ret)
> -		return ret;
> -
> -	ret = intel_ring_begin(ring, len);
> -	if (ret)
> -		return ret;
> +	sched_flags = i915_ebp_sf_cacheline_align;
>  
>  	/* Unmask the flip-done completion message. Note that the bspec says that
>  	 * we should do this for both the BCS and RCS, and that we must not unmask
> @@ -9157,32 +9139,46 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	 * to zero does lead to lockups within MI_DISPLAY_FLIP.
>  	 */
>  	if (ring->id == RCS) {
> -		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> -		intel_ring_emit(ring, DERRMR);
> -		intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
> -					DERRMR_PIPEB_PRI_FLIP_DONE |
> -					DERRMR_PIPEC_PRI_FLIP_DONE));
> -		if (IS_GEN8(dev))
> -			intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8(1) |
> -					      MI_SRM_LRM_GLOBAL_GTT);
> -		else
> -			intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) |
> -					      MI_SRM_LRM_GLOBAL_GTT);
> -		intel_ring_emit(ring, DERRMR);
> -		intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
> -		if (IS_GEN8(dev)) {
> -			intel_ring_emit(ring, 0);
> -			intel_ring_emit(ring, MI_NOOP);
> -		}
> -	}
> +		uint32_t cmds[] = {
> +			MI_LOAD_REGISTER_IMM(1),
> +			DERRMR,
> +			~(DERRMR_PIPEA_PRI_FLIP_DONE |
> +				DERRMR_PIPEB_PRI_FLIP_DONE |
> +				DERRMR_PIPEC_PRI_FLIP_DONE),
> +			IS_GEN8(dev) ? (MI_STORE_REGISTER_MEM_GEN8(1) |
> +					MI_SRM_LRM_GLOBAL_GTT) :
> +				       (MI_STORE_REGISTER_MEM(1) |
> +					MI_SRM_LRM_GLOBAL_GTT),
> +			DERRMR,
> +			ring->scratch.gtt_offset + 256,
> +//		if (IS_GEN8(dev)) {
> +			0,
> +			MI_NOOP,
> +//		}
> +			MI_DISPLAY_FLIP_I915 | plane_bit,
> +			fb->pitches[0] | obj->tiling_mode,
> +			intel_crtc->unpin_work->gtt_offset,
> +			MI_NOOP
> +		};
> +		uint32_t len = sizeof(cmds) / sizeof(*cmds);
> +
> +		ret = i915_scheduler_queue_nonbatch(ring, cmds, len, &obj, 1, sched_flags);
> +	} else {
> +		uint32_t cmds[] = {
> +			MI_DISPLAY_FLIP_I915 | plane_bit,
> +			fb->pitches[0] | obj->tiling_mode,
> +			intel_crtc->unpin_work->gtt_offset,
> +			MI_NOOP
> +		};
> +		uint32_t len = sizeof(cmds) / sizeof(*cmds);
>  
> -	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
> -	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
> -	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
> -	intel_ring_emit(ring, (MI_NOOP));
> +		ret = i915_scheduler_queue_nonbatch(ring, cmds, len, &obj, 1, sched_flags);
> +	}
> +	if (ret)
> +		return ret;
>  
>  	intel_mark_page_flip_active(intel_crtc);
> -	i915_add_request_wo_flush(ring);
> +
>  	return 0;
>  }
>  
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fa1ffbb..8bbc5d3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9099,8 +9099,8 @@  static int intel_gen7_queue_flip(struct drm_device *dev,
 				 uint32_t flags)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	uint32_t plane_bit = 0;
-	int len, ret;
+	uint32_t plane_bit = 0, sched_flags;
+	int ret;
 
 	switch (intel_crtc->plane) {
 	case PLANE_A:
@@ -9117,18 +9117,6 @@  static int intel_gen7_queue_flip(struct drm_device *dev,
 		return -ENODEV;
 	}
 
-	len = 4;
-	if (ring->id == RCS) {
-		len += 6;
-		/*
-		 * On Gen 8, SRM is now taking an extra dword to accommodate
-		 * 48bits addresses, and we need a NOOP for the batch size to
-		 * stay even.
-		 */
-		if (IS_GEN8(dev))
-			len += 2;
-	}
-
 	/*
 	 * BSpec MI_DISPLAY_FLIP for IVB:
 	 * "The full packet must be contained within the same cache line."
@@ -9139,13 +9127,7 @@  static int intel_gen7_queue_flip(struct drm_device *dev,
 	 * then do the cacheline alignment, and finally emit the
 	 * MI_DISPLAY_FLIP.
 	 */
-	ret = intel_ring_cacheline_align(ring);
-	if (ret)
-		return ret;
-
-	ret = intel_ring_begin(ring, len);
-	if (ret)
-		return ret;
+	sched_flags = i915_ebp_sf_cacheline_align;
 
 	/* Unmask the flip-done completion message. Note that the bspec says that
 	 * we should do this for both the BCS and RCS, and that we must not unmask
@@ -9157,32 +9139,46 @@  static int intel_gen7_queue_flip(struct drm_device *dev,
 	 * to zero does lead to lockups within MI_DISPLAY_FLIP.
 	 */
 	if (ring->id == RCS) {
-		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, DERRMR);
-		intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
-					DERRMR_PIPEB_PRI_FLIP_DONE |
-					DERRMR_PIPEC_PRI_FLIP_DONE));
-		if (IS_GEN8(dev))
-			intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8(1) |
-					      MI_SRM_LRM_GLOBAL_GTT);
-		else
-			intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) |
-					      MI_SRM_LRM_GLOBAL_GTT);
-		intel_ring_emit(ring, DERRMR);
-		intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
-		if (IS_GEN8(dev)) {
-			intel_ring_emit(ring, 0);
-			intel_ring_emit(ring, MI_NOOP);
-		}
-	}
+		uint32_t cmds[] = {
+			MI_LOAD_REGISTER_IMM(1),
+			DERRMR,
+			~(DERRMR_PIPEA_PRI_FLIP_DONE |
+				DERRMR_PIPEB_PRI_FLIP_DONE |
+				DERRMR_PIPEC_PRI_FLIP_DONE),
+			IS_GEN8(dev) ? (MI_STORE_REGISTER_MEM_GEN8(1) |
+					MI_SRM_LRM_GLOBAL_GTT) :
+				       (MI_STORE_REGISTER_MEM(1) |
+					MI_SRM_LRM_GLOBAL_GTT),
+			DERRMR,
+			ring->scratch.gtt_offset + 256,
+//		if (IS_GEN8(dev)) {
+			0,
+			MI_NOOP,
+//		}
+			MI_DISPLAY_FLIP_I915 | plane_bit,
+			fb->pitches[0] | obj->tiling_mode,
+			intel_crtc->unpin_work->gtt_offset,
+			MI_NOOP
+		};
+		uint32_t len = sizeof(cmds) / sizeof(*cmds);
+
+		ret = i915_scheduler_queue_nonbatch(ring, cmds, len, &obj, 1, sched_flags);
+	} else {
+		uint32_t cmds[] = {
+			MI_DISPLAY_FLIP_I915 | plane_bit,
+			fb->pitches[0] | obj->tiling_mode,
+			intel_crtc->unpin_work->gtt_offset,
+			MI_NOOP
+		};
+		uint32_t len = sizeof(cmds) / sizeof(*cmds);
 
-	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
-	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
-	intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset);
-	intel_ring_emit(ring, (MI_NOOP));
+		ret = i915_scheduler_queue_nonbatch(ring, cmds, len, &obj, 1, sched_flags);
+	}
+	if (ret)
+		return ret;
 
 	intel_mark_page_flip_active(intel_crtc);
-	i915_add_request_wo_flush(ring);
+
 	return 0;
 }