diff mbox

[4/5] drm/i915: Modifying RC6 Promotion timer for Media workloads.

Message ID 1424963818-11931-5-git-send-email-deepak.s@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

deepak.s@linux.intel.com Feb. 26, 2015, 3:16 p.m. UTC
From: Deepak S <deepak.s@linux.intel.com>

In normal cases, RC6 promotion timer is 1700us/500us. This will
result in more time spent in C1 state. For more residency in C6
in case of media workloads, this is changed to 250us.
Not doing this for 3D workloads as too many C6-C0 transition
delays can result in performance impact

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++++++++++++
 drivers/gpu/drm/i915/intel_drv.h           |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c           | 15 +++++++++++++
 drivers/gpu/drm/i915/intel_pm.c            | 35 ++++++++++++++++++++++++++++++
 5 files changed, 70 insertions(+)

Comments

Chris Wilson Feb. 26, 2015, 4:08 p.m. UTC | #1
On Thu, Feb 26, 2015 at 08:46:57PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@linux.intel.com>
> 
> In normal cases, RC6 promotion timer is 1700us/500us. This will
> result in more time spent in C1 state. For more residency in C6
> in case of media workloads, this is changed to 250us.
> Not doing this for 3D workloads as too many C6-C0 transition
> delays can result in performance impact

I would prefer it if you extended intel_mark_busy() to note the source of
work, and so extended the busy/idle tracking per-ring with the
appropriate hooks for vlv to modify RPS, like how we already do for the
general gen6_rps_busy and gen6_rps_idle.
-Chris
deepak.s@linux.intel.com Feb. 27, 2015, 2:53 a.m. UTC | #2
On Thursday 26 February 2015 09:38 PM, Chris Wilson wrote:
> On Thu, Feb 26, 2015 at 08:46:57PM +0530, deepak.s@linux.intel.com wrote:
>> From: Deepak S <deepak.s@linux.intel.com>
>>
>> In normal cases, RC6 promotion timer is 1700us/500us. This will
>> result in more time spent in C1 state. For more residency in C6
>> in case of media workloads, this is changed to 250us.
>> Not doing this for 3D workloads as too many C6-C0 transition
>> delays can result in performance impact
> I would prefer it if you extended intel_mark_busy() to note the source of
> work, and so extended the busy/idle tracking per-ring with the
> appropriate hooks for vlv to modify RPS, like how we already do for the
> general gen6_rps_busy and gen6_rps_idle.
> -Chris

Thanks Chris. Its better to use existing Framework
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a1dd8bc..e33bf0d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1160,6 +1160,9 @@  struct intel_gen6_power_mgmt {
 	 * Must be taken after struct_mutex if nested.
 	 */
 	struct mutex hw_lock;
+
+	/* Delayed work to adjust RC6 promotion timer */
+	struct delayed_work vlv_media_timeout_work;
 };
 
 /* defined intel_pm.c */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 85a6ada..81f4066 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1272,6 +1272,21 @@  i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 	i915_gem_execbuffer_move_to_active(vmas, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
+	/* For vlv/chv, modify RC6 promotion timer upon hitting Media workload only
+	 * This will help in better power savings with media scenarios.
+	 */
+	if (((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) &&
+		IS_VALLEYVIEW(dev) && dev_priv->rps.enabled) {
+
+		vlv_modify_rc6_promotion_timer(dev_priv, true);
+
+		/* Start a timer for 1 sec to reset this value to original */
+		mod_delayed_work(dev_priv->wq,
+				&dev_priv->rps.vlv_media_timeout_work,
+				msecs_to_jiffies(1000));
+
+	}
+
 error:
 	kfree(cliprects);
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1fb1529..000f2a6 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1234,6 +1234,8 @@  void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */);
+void vlv_modify_rc6_promotion_timer(struct drm_i915_private *dev_priv,
+		bool media_active);
 
 
 /* intel_sdvo.c */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074b..5f495e73 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -716,6 +716,21 @@  int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 	i915_gem_execbuffer_move_to_active(vmas, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
+	/*
+	 * CHV: Extend RC6 promotion timer upon hitting Media workload to help
+	 * increase power savings with media scenarios.
+	 */
+	if (((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) &&
+		IS_CHERRYVIEW(dev_priv->dev) && dev_priv->rps.enabled) {
+
+		vlv_modify_rc6_promotion_timer(dev_priv, true);
+
+		/* Start a timer for 1 sec to reset this value to original */
+		mod_delayed_work(dev_priv->wq,
+				&dev_priv->rps.vlv_media_timeout_work,
+				msecs_to_jiffies(1000));
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e8bd9b9..7716be9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3941,6 +3941,9 @@  static void cherryview_disable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	/* Cancel any pending work-item */
+	cancel_delayed_work_sync(&dev_priv->rps.vlv_media_timeout_work);
+
 	I915_WRITE(GEN6_RC_CONTROL, 0);
 }
 
@@ -3952,6 +3955,9 @@  static void valleyview_disable_rps(struct drm_device *dev)
 	 * This what the BIOS expects when going into suspend */
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
+	/* Cancel any pending work-item */
+	cancel_delayed_work_sync(&dev_priv->rps.vlv_media_timeout_work);
+
 	I915_WRITE(GEN6_RC_CONTROL, 0);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -4857,6 +4863,32 @@  static void cherryview_enable_rps(struct drm_device *dev)
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
+void vlv_modify_rc6_promotion_timer(struct drm_i915_private *dev_priv,
+				    bool media_active)
+{
+	if (media_active) {
+		/* TO threshold set to 250 us ( 0xC3 * 1.28 us) */
+		I915_WRITE(GEN6_RC6_THRESHOLD, 0xC3);
+	} else {
+		if (IS_CHERRYVIEW(dev_priv->dev)) {
+			/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+			I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
+		} else {
+			/* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
+			I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+		}
+	}
+}
+
+static void vlv_media_timeout_work_func(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv = container_of(work, struct drm_i915_private,
+					    rps.vlv_media_timeout_work.work);
+
+	vlv_modify_rc6_promotion_timer(dev_priv, false);
+}
+
+
 static void valleyview_enable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -6687,5 +6719,8 @@  void intel_pm_setup(struct drm_device *dev)
 	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
 			  intel_gen6_powersave_work);
 
+	INIT_DELAYED_WORK(&dev_priv->rps.vlv_media_timeout_work,
+				vlv_media_timeout_work_func);
+
 	dev_priv->pm.suspended = false;
 }