diff mbox

[v5] drm/i915/vlv: WA for Turbo and RC6 to work together.

Message ID 1395902101-9869-1-git-send-email-deepak.s@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

deepak.s@linux.intel.com March 27, 2014, 6:35 a.m. UTC
From: Deepak S <deepak.s@linux.intel.com>

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Resolved comments and remove kernel-doc style comments. (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +++++
 drivers/gpu/drm/i915/i915_irq.c | 135 +++++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h |  12 +++-
 drivers/gpu/drm/i915/intel_pm.c |  13 +++-
 4 files changed, 170 insertions(+), 5 deletions(-)

Comments

Ville Syrjälä March 28, 2014, 12:53 p.m. UTC | #1
On Thu, Mar 27, 2014 at 12:05:01PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@linux.intel.com>
> 
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
> 
> v2: Refactor duplicate code. (Ville)
> 
> v3: Reformat the comments. (Ville)
> 
> v4: Enable required counters and remove unwanted code (Ville)
> 
> v5: Resolved comments and remove kernel-doc style comments. (Ville)

That doesn't tell me that you added the frequency change acceleration
support.

> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  15 +++++
>  drivers/gpu/drm/i915/i915_irq.c | 135 +++++++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h |  12 +++-
>  drivers/gpu/drm/i915/intel_pm.c |  13 +++-
>  4 files changed, 170 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7c212f3..c48ea93 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -816,6 +816,12 @@ struct i915_suspend_saved_registers {
>  	u32 savePCH_PORT_HOTPLUG;
>  };
>  
> +struct intel_rps_ei_calc {
> +	u32 cz_ts_ei;
> +	u32 render_ei_c0;
> +	u32 media_ei_c0;
> +};
> +
>  struct intel_gen6_power_mgmt {
>  	/* work and pm_iir are protected by dev_priv->irq_lock */
>  	struct work_struct work;
> @@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
>  	bool rp_up_masked;
>  	bool rp_down_masked;
>  
> +	u32 ei_interrupt_count;
> +
>  	int last_adj;
>  	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
> @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
>  	/* gen6+ rps state */
>  	struct intel_gen6_power_mgmt rps;
>  
> +	/* rps wa up ei calculation */
> +	struct intel_rps_ei_calc rps_up_ei;
> +
> +	/* rps wa down ei calculation */
> +	struct intel_rps_ei_calc rps_down_ei;
> +
> +
>  	/* ilk-only ips/rps state. Everything in here is protected by the global
>  	 * mchdev_lock in intel_pm.c */
>  	struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 300f127..4b421b4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1121,6 +1121,132 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
>  	}
>  }
>  
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> +				struct  intel_rps_ei_calc *rps_ei)
> +{
> +	u32 cz_ts, cz_freq_khz;
> +	u32 render_count, media_count;
> +	u32 elapsed_render, elapsed_media, elapsed_time;
> +	u32 residency = 0;
> +
> +	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> +	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> +	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> +	if (rps_ei->cz_ts_ei == 0) {
> +		rps_ei->cz_ts_ei = cz_ts;
> +		rps_ei->render_ei_c0 = render_count;
> +		rps_ei->media_ei_c0 = media_count;
> +
> +		return dev_priv->rps.cur_freq;
> +	}
> +
> +	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> +	rps_ei->cz_ts_ei = cz_ts;
> +
> +	elapsed_render = render_count - rps_ei->render_ei_c0;
> +	rps_ei->render_ei_c0 = render_count;
> +
> +	elapsed_media = media_count - rps_ei->media_ei_c0;
> +	rps_ei->media_ei_c0 = media_count;
> +
> +	/* Convert all the counters into common unit of milli sec */
> +	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> +	elapsed_render /=  cz_freq_khz;
> +	elapsed_media /= cz_freq_khz;
> +
> +	/*
> +	 * Calculate overall C0 residency percentage
> +	 * only if elapsed time is non zero
> +	 */
> +	if (elapsed_time) {
> +		residency =
> +			((max(elapsed_render, elapsed_media) * 100)
> +				/ elapsed_time);
> +	}
> +
> +	return residency;
> +}
> +
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> +	u32 residency_C0_up = 0, residency_C0_down = 0;
> +	u8 new_delay, adj;
> +
> +	dev_priv->rps.ei_interrupt_count++;
> +
> +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> +	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> +		return dev_priv->rps.cur_freq;
> +	}
> +
> +
> +	/*
> +	 * To down throttle, C0 residency should be less than down threshold
> +	 * for continous EI intervals. So calculate down EI counters
> +	 * once in VLV_INT_COUNT_FOR_DOWN_EI
> +	 */
> +	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> +		dev_priv->rps.ei_interrupt_count = 0;
> +
> +		residency_C0_down = vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_down_ei);
> +	} else {
> +		residency_C0_up = vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_up_ei);
> +	}
> +
> +	new_delay = dev_priv->rps.cur_freq;
> +
> +	adj = dev_priv->rps.last_adj;
> +	/* C0 residency is greater than UP threshold. Increase Frequency */
> +	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> +		if (adj > 0)
> +			adj *= 2;
> +		else
> +			adj = 1;
> +
> +		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
> +			new_delay = dev_priv->rps.cur_freq + adj;
> +
> +		/*
> +		 * For better performance, jump directly
> +		 * to RPe if we're below it.
> +		 */
> +		if (new_delay < dev_priv->rps.efficient_freq)
> +			new_delay = dev_priv->rps.efficient_freq;
> +
> +	} else if (!dev_priv->rps.ei_interrupt_count &&
> +			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> +		if (adj < 0)
> +			adj *= 2;
> +		else
> +			adj = -1;
> +		/*
> +		 * This means, C0 residency is less than down threshold over
> +		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> +		 */
> +		if (dev_priv->rps.cur_freq > dev_priv->rps.max_freq_softlimit)
> +			new_delay = dev_priv->rps.cur_freq + adj;
> +	}
> +
> +	return new_delay;
> +}
> +
>  static void gen6_pm_rps_work(struct work_struct *work)
>  {
>  	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> @@ -1163,6 +1289,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  		else
>  			new_delay = dev_priv->rps.min_freq_softlimit;
>  		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>  	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>  		if (adj < 0)
>  			adj *= 2;
> @@ -3053,6 +3181,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>  			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>  
>  		dev_priv->pm_irq_mask = 0xffffffff;
> +
>  		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>  		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>  		I915_WRITE(GEN6_PMIER, pm_irqs);
> @@ -4095,7 +4224,11 @@ void intel_irq_init(struct drm_device *dev)
>  	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
>  
>  	/* Let's track the enabled rps events */
> -	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> +	if (IS_VALLEYVIEW(dev))
> +		/* WAUseRC0ResidenncyTurbo:VLV */

Looks like you forgot to fix the w/a comment.

Otherwise the patch looks good to me, so if you fix those two small
issues you can add:
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> +		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> +	else
> +		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  
>  	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
>  		    i915_hangcheck_elapsed,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 927a7c1..e334bf1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -506,6 +506,7 @@ enum punit_power_well {
>  #define PUNIT_REG_GPU_FREQ_STS			0xd8
>  #define   GENFREQSTATUS				(1<<0)
>  #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>  
>  #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>  #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
> @@ -521,6 +522,11 @@ enum punit_power_well {
>  #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>  #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>  
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> +#define VLV_RP_UP_EI_THRESHOLD			90
> +#define VLV_RP_DOWN_EI_THRESHOLD		70
> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
> +
>  /* vlv2 north clock has */
>  #define CCK_FUSE_REG				0x8
>  #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
> @@ -4984,6 +4990,7 @@ enum punit_power_well {
>  #define  VLV_GTLC_PW_STATUS			0x130094
>  #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>  #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>  #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>  #define   FORCEWAKE_KERNEL			0x1
>  #define   FORCEWAKE_USER			0x2
> @@ -5112,12 +5119,15 @@ enum punit_power_well {
>  #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>  #define VLV_COUNTER_CONTROL			0x138104
>  #define   VLV_COUNT_RANGE_HIGH			(1<<15)
> +#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
> +#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
>  #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
>  #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
>  #define GEN6_GT_GFX_RC6				0x138108
>  #define GEN6_GT_GFX_RC6p			0x13810C
>  #define GEN6_GT_GFX_RC6pp			0x138110
> -
> +#define VLV_RENDER_C0_COUNT_REG		0x138118
> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
>  #define GEN6_PCODE_MAILBOX			0x138124
>  #define   GEN6_PCODE_READY			(1<<31)
>  #define   GEN6_READ_OC_PARAMS			0xc
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b66a43b..30730be 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3097,9 +3097,13 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>  				~VLV_GFX_CLK_FORCE_ON_BIT);
>  
>  	/* Unmask Up interrupts */
> -	dev_priv->rps.rp_up_masked = true;
> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> +	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> +		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
> +	else {
> +		dev_priv->rps.rp_up_masked = true;
> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>  						dev_priv->rps.min_freq_softlimit);
> +	}
>  }
>  
>  void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3619,6 +3623,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>  
>  	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>  
>  	I915_WRITE(GEN6_RP_CONTROL,
>  		   GEN6_RP_MEDIA_TURBO |
> @@ -3639,9 +3644,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  
>  	/* allows RC6 residency counter to work */
>  	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> +		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
> +				      VLV_RENDER_RC0_COUNT_EN |
>  				      VLV_MEDIA_RC6_COUNT_EN |
>  				      VLV_RENDER_RC6_COUNT_EN));
> +
>  	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>  		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson March 28, 2014, 1:06 p.m. UTC | #2
On Fri, Mar 28, 2014 at 02:53:48PM +0200, Ville Syrjälä wrote:
> On Thu, Mar 27, 2014 at 12:05:01PM +0530, deepak.s@linux.intel.com wrote:
> > @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
> >  	/* gen6+ rps state */
> >  	struct intel_gen6_power_mgmt rps;
> >  
> > +	/* rps wa up ei calculation */
> > +	struct intel_rps_ei_calc rps_up_ei;
> > +
> > +	/* rps wa down ei calculation */
> > +	struct intel_rps_ei_calc rps_down_ei;

I could have sworn there was a field for holding all the interesting rps
state together. 
-Chris
deepak.s@linux.intel.com March 30, 2014, 6:27 a.m. UTC | #3
On Friday 28 March 2014 06:36 PM, Chris Wilson wrote:
> On Fri, Mar 28, 2014 at 02:53:48PM +0200, Ville Syrjälä wrote:
>> On Thu, Mar 27, 2014 at 12:05:01PM +0530, deepak.s@linux.intel.com wrote:
>>> @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
>>>   	/* gen6+ rps state */
>>>   	struct intel_gen6_power_mgmt rps;
>>>   
>>> +	/* rps wa up ei calculation */
>>> +	struct intel_rps_ei_calc rps_up_ei;
>>> +
>>> +	/* rps wa down ei calculation */
>>> +	struct intel_rps_ei_calc rps_down_ei;
> I could have sworn there was a field for holding all the interesting rps
> state together.
> -Chris
>
Hi Chris,

Earlier i was using the rps structure to hold the wa rps state, but there was not of duplicated code to avoid that i created a separate structure.
We can still re-factor and use rps structure to hold the wa state. Let me know if we need to created a separate patch to re-factor or add to the WA patch itself.

I a thinking of adding a new patch on top this.

Thanks
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c212f3..c48ea93 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -816,6 +816,12 @@  struct i915_suspend_saved_registers {
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+	u32 cz_ts_ei;
+	u32 render_ei_c0;
+	u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -843,6 +849,8 @@  struct intel_gen6_power_mgmt {
 	bool rp_up_masked;
 	bool rp_down_masked;
 
+	u32 ei_interrupt_count;
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1403,6 +1411,13 @@  typedef struct drm_i915_private {
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
+	/* rps wa up ei calculation */
+	struct intel_rps_ei_calc rps_up_ei;
+
+	/* rps wa down ei calculation */
+	struct intel_rps_ei_calc rps_down_ei;
+
+
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
 	struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 300f127..4b421b4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1121,6 +1121,132 @@  void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
 	}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+				struct  intel_rps_ei_calc *rps_ei)
+{
+	u32 cz_ts, cz_freq_khz;
+	u32 render_count, media_count;
+	u32 elapsed_render, elapsed_media, elapsed_time;
+	u32 residency = 0;
+
+	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+	if (rps_ei->cz_ts_ei == 0) {
+		rps_ei->cz_ts_ei = cz_ts;
+		rps_ei->render_ei_c0 = render_count;
+		rps_ei->media_ei_c0 = media_count;
+
+		return dev_priv->rps.cur_freq;
+	}
+
+	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+	rps_ei->cz_ts_ei = cz_ts;
+
+	elapsed_render = render_count - rps_ei->render_ei_c0;
+	rps_ei->render_ei_c0 = render_count;
+
+	elapsed_media = media_count - rps_ei->media_ei_c0;
+	rps_ei->media_ei_c0 = media_count;
+
+	/* Convert all the counters into common unit of milli sec */
+	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+	elapsed_render /=  cz_freq_khz;
+	elapsed_media /= cz_freq_khz;
+
+	/*
+	 * Calculate overall C0 residency percentage
+	 * only if elapsed time is non zero
+	 */
+	if (elapsed_time) {
+		residency =
+			((max(elapsed_render, elapsed_media) * 100)
+				/ elapsed_time);
+	}
+
+	return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+	u32 residency_C0_up = 0, residency_C0_down = 0;
+	u8 new_delay, adj;
+
+	dev_priv->rps.ei_interrupt_count++;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+		return dev_priv->rps.cur_freq;
+	}
+
+
+	/*
+	 * To down throttle, C0 residency should be less than down threshold
+	 * for continous EI intervals. So calculate down EI counters
+	 * once in VLV_INT_COUNT_FOR_DOWN_EI
+	 */
+	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+		dev_priv->rps.ei_interrupt_count = 0;
+
+		residency_C0_down = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_down_ei);
+	} else {
+		residency_C0_up = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_up_ei);
+	}
+
+	new_delay = dev_priv->rps.cur_freq;
+
+	adj = dev_priv->rps.last_adj;
+	/* C0 residency is greater than UP threshold. Increase Frequency */
+	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else
+			adj = 1;
+
+		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+
+		/*
+		 * For better performance, jump directly
+		 * to RPe if we're below it.
+		 */
+		if (new_delay < dev_priv->rps.efficient_freq)
+			new_delay = dev_priv->rps.efficient_freq;
+
+	} else if (!dev_priv->rps.ei_interrupt_count &&
+			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+		if (adj < 0)
+			adj *= 2;
+		else
+			adj = -1;
+		/*
+		 * This means, C0 residency is less than down threshold over
+		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+		 */
+		if (dev_priv->rps.cur_freq > dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+	}
+
+	return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1163,6 +1289,8 @@  static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_freq_softlimit;
 		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
@@ -3053,6 +3181,7 @@  static void gen5_gt_irq_postinstall(struct drm_device *dev)
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
+
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
@@ -4095,7 +4224,11 @@  void intel_irq_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
 	/* Let's track the enabled rps events */
-	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+	if (IS_VALLEYVIEW(dev))
+		/* WAUseRC0ResidenncyTurbo:VLV */
+		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 927a7c1..e334bf1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -506,6 +506,7 @@  enum punit_power_well {
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
 #define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
+#define PUNIT_REG_CZ_TIMESTAMP			0xce
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -521,6 +522,11 @@  enum punit_power_well {
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
+#define VLV_RP_UP_EI_THRESHOLD			90
+#define VLV_RP_DOWN_EI_THRESHOLD		70
+#define VLV_INT_COUNT_FOR_DOWN_EI		5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -4984,6 +4990,7 @@  enum punit_power_well {
 #define  VLV_GTLC_PW_STATUS			0x130094
 #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
 #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
@@ -5112,12 +5119,15 @@  enum punit_power_well {
 #define GEN6_GT_GFX_RC6_LOCKED			0x138104
 #define VLV_COUNTER_CONTROL			0x138104
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
+#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
+#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
 #define GEN6_GT_GFX_RC6				0x138108
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
-
+#define VLV_RENDER_C0_COUNT_REG		0x138118
+#define VLV_MEDIA_C0_COUNT_REG			0x13811C
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
 #define   GEN6_READ_OC_PARAMS			0xc
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b66a43b..30730be 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3097,9 +3097,13 @@  static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 				~VLV_GFX_CLK_FORCE_ON_BIT);
 
 	/* Unmask Up interrupts */
-	dev_priv->rps.rp_up_masked = true;
-	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
+	else {
+		dev_priv->rps.rp_up_masked = true;
+		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
 						dev_priv->rps.min_freq_softlimit);
+	}
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3619,6 +3623,7 @@  static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
@@ -3639,9 +3644,11 @@  static void valleyview_enable_rps(struct drm_device *dev)
 
 	/* allows RC6 residency counter to work */
 	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
+				      VLV_RENDER_RC0_COUNT_EN |
 				      VLV_MEDIA_RC6_COUNT_EN |
 				      VLV_RENDER_RC6_COUNT_EN));
+
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;