Message ID | 1425571079-3051-1-git-send-email-deepak.s@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepak.s@linux.intel.com wrote: > From: Deepak S <deepak.s@linux.intel.com> > > In normal cases, RC6 promotion timer is 1700us/500us. This will > result in more time spent in C1 state. For more residency in > C6 in case of media workloads, this is changed to 250us. > Not doing this for 3D workloads as too many C6-C0 > transition delays can result in performance impact. > > v2: Extend GPU busy & idle detection framework for rc6 Promotion > timer changes (Chris) > > Signed-off-by: Deepak S <deepak.s@linux.intel.com> I've thougth Chris' idea was to put this into the gen6_rps_boost/idle functions? You could check from within them I think for whether the vcs is still busy ... One more comment below. -Daniel > --- > drivers/gpu/drm/i915/i915_gem.c | 10 +++++++++- > drivers/gpu/drm/i915/intel_display.c | 3 ++- > drivers/gpu/drm/i915/intel_drv.h | 2 ++ > drivers/gpu/drm/i915/intel_pm.c | 27 +++++++++++++++++++++++++++ > 4 files changed, 40 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 3831cc0..85f8aa6 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2428,7 +2428,7 @@ int __i915_add_request(struct intel_engine_cs *ring, > struct drm_i915_gem_request *request; > struct intel_ringbuffer *ringbuf; > u32 request_start; > - int ret; > + int ret, was_empty; > > request = ring->outstanding_lazy_request; > if (WARN_ON(request == NULL)) > @@ -2495,6 +2495,7 @@ int __i915_add_request(struct intel_engine_cs *ring, > } > > request->emitted_jiffies = jiffies; > + was_empty = list_empty(&ring->request_list); > list_add_tail(&request->list, &ring->request_list); > request->file_priv = NULL; > > @@ -2519,6 +2520,10 @@ int __i915_add_request(struct intel_engine_cs *ring, > queue_delayed_work(dev_priv->wq, > &dev_priv->mm.retire_work, > round_jiffies_up_relative(HZ)); > + > + if ((ring->id == VCS) && was_empty) > + vlv_media_promotion_timer_busy(dev_priv); > + > intel_mark_busy(dev_priv->dev); > > return 0; > @@ -2802,6 +2807,9 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) > } > > WARN_ON(i915_verify_lists(ring->dev)); > + > + if (ring->id == VCS && list_empty(&ring->request_list)) > + vlv_media_promotion_timer_idle(dev_priv); > } > > bool > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 597c10b..5d121b4 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -9172,8 +9172,9 @@ void intel_mark_idle(struct drm_device *dev) > intel_decrease_pllclock(crtc); > } > > - if (INTEL_INFO(dev)->gen >= 6) > + if (INTEL_INFO(dev)->gen >= 6) { > gen6_rps_idle(dev->dev_private); > + } Uncessary hunk. And a bikeshed: I think generally if we name something vlv_ we put the platform checks outside of the function. Or have some other guarantee in place to make sure it's only called on the right platforms. Otherwise we generally pick an intel_ prefix. > > out: > intel_runtime_pm_put(dev_priv); > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 2a6ec4b..f1a90b8 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -1233,6 +1233,8 @@ void ironlake_teardown_rc6(struct drm_device *dev); > void gen6_update_ring_freq(struct drm_device *dev); > void gen6_rps_idle(struct drm_i915_private *dev_priv); > void gen6_rps_boost(struct drm_i915_private *dev_priv); > +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv); > +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv); > void ilk_wm_get_hw_state(struct drm_device *dev); > void skl_wm_get_hw_state(struct drm_device *dev); > void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index e710b43..d23b60a 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3961,6 +3961,33 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) > mutex_unlock(&dev_priv->rps.hw_lock); > } > > +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv) > +{ > + struct drm_device *dev = dev_priv->dev; > + > + if (!IS_VALLEYVIEW(dev)) > + return; > + > + if (IS_CHERRYVIEW(dev_priv->dev)) { > + /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ > + I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); > + } else { > + /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */ > + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); > + } > +} > + > +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv) > +{ > + struct drm_device *dev = dev_priv->dev; > + > + if (!IS_VALLEYVIEW(dev)) > + return; > + > + /* TO threshold set to 250 us ( 0xC3 * 1.28 us) */ > + I915_WRITE(GEN6_RC6_THRESHOLD, 0xC3); > +} > + > void intel_set_rps(struct drm_device *dev, u8 val) > { > if (IS_VALLEYVIEW(dev)) > -- > 1.9.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Friday 06 March 2015 10:10 PM, Daniel Vetter wrote: > On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepak.s@linux.intel.com wrote: >> From: Deepak S <deepak.s@linux.intel.com> >> >> In normal cases, RC6 promotion timer is 1700us/500us. This will >> result in more time spent in C1 state. For more residency in >> C6 in case of media workloads, this is changed to 250us. >> Not doing this for 3D workloads as too many C6-C0 >> transition delays can result in performance impact. >> >> v2: Extend GPU busy & idle detection framework for rc6 Promotion >> timer changes (Chris) >> >> Signed-off-by: Deepak S <deepak.s@linux.intel.com> > I've thougth Chris' idea was to put this into the gen6_rps_boost/idle > functions? You could check from within them I think for whether the vcs is > still busy ... One more comment below. > -Daniel Hi Daniel, gen6_rps_boost/idle will be called only for RCS right? Also we get gen6_rps_boost during __wait_request But we want to program promotion timer when we add request to VCS to apply the value immediately. Thanks Deepak >> --- >> drivers/gpu/drm/i915/i915_gem.c | 10 +++++++++- >> drivers/gpu/drm/i915/intel_display.c | 3 ++- >> drivers/gpu/drm/i915/intel_drv.h | 2 ++ >> drivers/gpu/drm/i915/intel_pm.c | 27 +++++++++++++++++++++++++++ >> 4 files changed, 40 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index 3831cc0..85f8aa6 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -2428,7 +2428,7 @@ int __i915_add_request(struct intel_engine_cs *ring, >> struct drm_i915_gem_request *request; >> struct intel_ringbuffer *ringbuf; >> u32 request_start; >> - int ret; >> + int ret, was_empty; >> >> request = ring->outstanding_lazy_request; >> if (WARN_ON(request == NULL)) >> @@ -2495,6 +2495,7 @@ int __i915_add_request(struct intel_engine_cs *ring, >> } >> >> request->emitted_jiffies = jiffies; >> + was_empty = list_empty(&ring->request_list); >> list_add_tail(&request->list, &ring->request_list); >> request->file_priv = NULL; >> >> @@ -2519,6 +2520,10 @@ int __i915_add_request(struct intel_engine_cs *ring, >> queue_delayed_work(dev_priv->wq, >> &dev_priv->mm.retire_work, >> round_jiffies_up_relative(HZ)); >> + >> + if ((ring->id == VCS) && was_empty) >> + vlv_media_promotion_timer_busy(dev_priv); >> + >> intel_mark_busy(dev_priv->dev); >> >> return 0; >> @@ -2802,6 +2807,9 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) >> } >> >> WARN_ON(i915_verify_lists(ring->dev)); >> + >> + if (ring->id == VCS && list_empty(&ring->request_list)) >> + vlv_media_promotion_timer_idle(dev_priv); >> } >> >> bool >> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c >> index 597c10b..5d121b4 100644 >> --- a/drivers/gpu/drm/i915/intel_display.c >> +++ b/drivers/gpu/drm/i915/intel_display.c >> @@ -9172,8 +9172,9 @@ void intel_mark_idle(struct drm_device *dev) >> intel_decrease_pllclock(crtc); >> } >> >> - if (INTEL_INFO(dev)->gen >= 6) >> + if (INTEL_INFO(dev)->gen >= 6) { >> gen6_rps_idle(dev->dev_private); >> + } > Uncessary hunk. And a bikeshed: I think generally if we name something > vlv_ we put the platform checks outside of the function. Or have some > other guarantee in place to make sure it's only called on the right > platforms. Otherwise we generally pick an intel_ prefix. Thanks Daniel. I will create intel_ prefix, we might need to extend this for future platforms. >> >> out: >> intel_runtime_pm_put(dev_priv); >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h >> index 2a6ec4b..f1a90b8 100644 >> --- a/drivers/gpu/drm/i915/intel_drv.h >> +++ b/drivers/gpu/drm/i915/intel_drv.h >> @@ -1233,6 +1233,8 @@ void ironlake_teardown_rc6(struct drm_device *dev); >> void gen6_update_ring_freq(struct drm_device *dev); >> void gen6_rps_idle(struct drm_i915_private *dev_priv); >> void gen6_rps_boost(struct drm_i915_private *dev_priv); >> +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv); >> +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv); >> void ilk_wm_get_hw_state(struct drm_device *dev); >> void skl_wm_get_hw_state(struct drm_device *dev); >> void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c >> index e710b43..d23b60a 100644 >> --- a/drivers/gpu/drm/i915/intel_pm.c >> +++ b/drivers/gpu/drm/i915/intel_pm.c >> @@ -3961,6 +3961,33 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) >> mutex_unlock(&dev_priv->rps.hw_lock); >> } >> >> +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv) >> +{ >> + struct drm_device *dev = dev_priv->dev; >> + >> + if (!IS_VALLEYVIEW(dev)) >> + return; >> + >> + if (IS_CHERRYVIEW(dev_priv->dev)) { >> + /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ >> + I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); >> + } else { >> + /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */ >> + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); >> + } >> +} >> + >> +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv) >> +{ >> + struct drm_device *dev = dev_priv->dev; >> + >> + if (!IS_VALLEYVIEW(dev)) >> + return; >> + >> + /* TO threshold set to 250 us ( 0xC3 * 1.28 us) */ >> + I915_WRITE(GEN6_RC6_THRESHOLD, 0xC3); >> +} >> + >> void intel_set_rps(struct drm_device *dev, u8 val) >> { >> if (IS_VALLEYVIEW(dev)) >> -- >> 1.9.1 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Wed, Mar 11, 2015 at 07:07:12PM +0530, Deepak S wrote: > > > On Friday 06 March 2015 10:10 PM, Daniel Vetter wrote: > >On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepak.s@linux.intel.com wrote: > >>From: Deepak S <deepak.s@linux.intel.com> > >> > >>In normal cases, RC6 promotion timer is 1700us/500us. This will > >>result in more time spent in C1 state. For more residency in > >>C6 in case of media workloads, this is changed to 250us. > >>Not doing this for 3D workloads as too many C6-C0 > >>transition delays can result in performance impact. > >> > >>v2: Extend GPU busy & idle detection framework for rc6 Promotion > >>timer changes (Chris) > >> > >>Signed-off-by: Deepak S <deepak.s@linux.intel.com> > >I've thougth Chris' idea was to put this into the gen6_rps_boost/idle > >functions? You could check from within them I think for whether the vcs is > >still busy ... One more comment below. > >-Daniel > > Hi Daniel, > > gen6_rps_boost/idle will be called only for RCS right? Also we get gen6_rps_boost during __wait_request > But we want to program promotion timer when we add request to VCS to apply the value immediately. It's gen6_rps_busy/gen6_rps_idle. They are called from intel_mark_busy and intel_mark_idle. It is intel_mark_busy/intel_mark_idle that we want to extend to cover the VCS case as well. I think if you add a ring parameter to the functions, we can start specialising per ring and global state changes. You will then also be in a position to judge what is the best idle timer (and consider making i915_gem_idle_work_handler per ring). The goal is simply to evolve the current infrastucture for idle/busyness handling to cover your use case as well (and hopefully in the process improving the old/general cases). -Chris
On Wednesday 11 March 2015 07:26 PM, Chris Wilson wrote: > On Wed, Mar 11, 2015 at 07:07:12PM +0530, Deepak S wrote: >> >> On Friday 06 March 2015 10:10 PM, Daniel Vetter wrote: >>> On Thu, Mar 05, 2015 at 09:27:59PM +0530, deepak.s@linux.intel.com wrote: >>>> From: Deepak S <deepak.s@linux.intel.com> >>>> >>>> In normal cases, RC6 promotion timer is 1700us/500us. This will >>>> result in more time spent in C1 state. For more residency in >>>> C6 in case of media workloads, this is changed to 250us. >>>> Not doing this for 3D workloads as too many C6-C0 >>>> transition delays can result in performance impact. >>>> >>>> v2: Extend GPU busy & idle detection framework for rc6 Promotion >>>> timer changes (Chris) >>>> >>>> Signed-off-by: Deepak S <deepak.s@linux.intel.com> >>> I've thougth Chris' idea was to put this into the gen6_rps_boost/idle >>> functions? You could check from within them I think for whether the vcs is >>> still busy ... One more comment below. >>> -Daniel >> Hi Daniel, >> >> gen6_rps_boost/idle will be called only for RCS right? Also we get gen6_rps_boost during __wait_request >> But we want to program promotion timer when we add request to VCS to apply the value immediately. > It's gen6_rps_busy/gen6_rps_idle. They are called from intel_mark_busy > and intel_mark_idle. It is intel_mark_busy/intel_mark_idle that we want > to extend to cover the VCS case as well. I think if you add a ring > parameter to the functions, we can start specialising per ring and > global state changes. You will then also be in a position to judge what > is the best idle timer (and consider making i915_gem_idle_work_handler > per ring). The goal is simply to evolve the current infrastucture for > idle/busyness handling to cover your use case as well (and hopefully in > the process improving the old/general cases). > -Chris > Thanks Chris. extending intel_mark_busy/intel_mark_idle makes sense. I will work on adding the change
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3831cc0..85f8aa6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2428,7 +2428,7 @@ int __i915_add_request(struct intel_engine_cs *ring, struct drm_i915_gem_request *request; struct intel_ringbuffer *ringbuf; u32 request_start; - int ret; + int ret, was_empty; request = ring->outstanding_lazy_request; if (WARN_ON(request == NULL)) @@ -2495,6 +2495,7 @@ int __i915_add_request(struct intel_engine_cs *ring, } request->emitted_jiffies = jiffies; + was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); request->file_priv = NULL; @@ -2519,6 +2520,10 @@ int __i915_add_request(struct intel_engine_cs *ring, queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, round_jiffies_up_relative(HZ)); + + if ((ring->id == VCS) && was_empty) + vlv_media_promotion_timer_busy(dev_priv); + intel_mark_busy(dev_priv->dev); return 0; @@ -2802,6 +2807,9 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) } WARN_ON(i915_verify_lists(ring->dev)); + + if (ring->id == VCS && list_empty(&ring->request_list)) + vlv_media_promotion_timer_idle(dev_priv); } bool diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 597c10b..5d121b4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9172,8 +9172,9 @@ void intel_mark_idle(struct drm_device *dev) intel_decrease_pllclock(crtc); } - if (INTEL_INFO(dev)->gen >= 6) + if (INTEL_INFO(dev)->gen >= 6) { gen6_rps_idle(dev->dev_private); + } out: intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2a6ec4b..f1a90b8 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1233,6 +1233,8 @@ void ironlake_teardown_rc6(struct drm_device *dev); void gen6_update_ring_freq(struct drm_device *dev); void gen6_rps_idle(struct drm_i915_private *dev_priv); void gen6_rps_boost(struct drm_i915_private *dev_priv); +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv); +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv); void ilk_wm_get_hw_state(struct drm_device *dev); void skl_wm_get_hw_state(struct drm_device *dev); void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e710b43..d23b60a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3961,6 +3961,33 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->rps.hw_lock); } +void vlv_media_promotion_timer_idle(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + + if (!IS_VALLEYVIEW(dev)) + return; + + if (IS_CHERRYVIEW(dev_priv->dev)) { + /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ + I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); + } else { + /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */ + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); + } +} + +void vlv_media_promotion_timer_busy(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + + if (!IS_VALLEYVIEW(dev)) + return; + + /* TO threshold set to 250 us ( 0xC3 * 1.28 us) */ + I915_WRITE(GEN6_RC6_THRESHOLD, 0xC3); +} + void intel_set_rps(struct drm_device *dev, u8 val) { if (IS_VALLEYVIEW(dev))