Message ID | 1395726903-5321-1-git-send-email-arun.r.murthy@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Mar 25, 2014 at 11:25:03AM +0530, Arun R Murthy wrote: > BZ: 178761 > > In wait for vblank use usleep_range, which will use hrtimers instead of > msleep. Using msleep(1~20) there are more chances of sleeping for 20ms. > Using usleep_range uses hrtimers and hence are precise, worst case will > trigger an interrupt at the higher/max timeout. > > Change-log: On replacing msleep(1) with usleep_range(1000, 2000) we have > noticed the time consumed by wait for vblank is ~4ms to ~17ms. > > Change-Id: I6672e5697b01987a6d069ab06e76d97287b1f7ae > Signed-off-by: Arun R Murthy <arun.r.murthy@intel.com> Do we actually have that many vblank waits in time critical sections still? If we can't get rid of then and they need to be faster the right approach imo is to replace the wait loops with interrupt waits (with still a fallback timeout after 50ms or so of course) using our drm vblank infrastructure. That will wake up at exactly the time we want to, without any unnecessary wakeups in between or other ugly overhead. So as-is with the justification of optimizing vblank waits the wait_for_us optimization is nacked. -Daniel > --- > drivers/gpu/drm/i915/intel_display.c | 2 +- > drivers/gpu/drm/i915/intel_dp.c | 4 ++-- > drivers/gpu/drm/i915/intel_drv.h | 19 ++++++++++++------- > 3 files changed, 15 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 4d4a0d9..9de2678 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -761,7 +761,7 @@ static void g4x_wait_for_vblank(struct drm_device *dev, int pipe) > > frame = I915_READ(frame_reg); > > - if (wait_for(I915_READ_NOTRACE(frame_reg) != frame, 50)) > + if (wait_for_us(I915_READ_NOTRACE(frame_reg) != frame, 50, 1000)) > DRM_DEBUG_KMS("vblank wait timed out\n"); > } > > diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c > index f1ef3d4..14927e5 100644 > --- a/drivers/gpu/drm/i915/intel_dp.c > +++ b/drivers/gpu/drm/i915/intel_dp.c > @@ -1074,7 +1074,7 @@ static void wait_panel_status(struct intel_dp *intel_dp, > I915_READ(pp_stat_reg), > I915_READ(pp_ctrl_reg)); > > - if (_wait_for((I915_READ(pp_stat_reg) & mask) == value, 5000, 10)) { > + if (wait_for_ms((I915_READ(pp_stat_reg) & mask) == value, 5000, 10)) { > DRM_ERROR("Panel status timeout: status %08x control %08x\n", > I915_READ(pp_stat_reg), > I915_READ(pp_ctrl_reg)); > @@ -1808,7 +1808,7 @@ void intel_edp_psr_disable(struct intel_dp *intel_dp) > I915_READ(EDP_PSR_CTL(dev)) & ~EDP_PSR_ENABLE); > > /* Wait till PSR is idle */ > - if (_wait_for((I915_READ(EDP_PSR_STATUS_CTL(dev)) & > + if (wait_for_ms((I915_READ(EDP_PSR_STATUS_CTL(dev)) & > EDP_PSR_STATUS_STATE_MASK) == 0, 2000, 10)) > DRM_ERROR("Timed out waiting for PSR Idle State\n"); > } > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 44067bc..bbda97e 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -42,8 +42,8 @@ > * having timed out, since the timeout could be due to preemption or similar and > * we've never had a chance to check the condition before the timeout. > */ > -#define _wait_for(COND, MS, W) ({ \ > - unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ > +#define _wait_for(COND, TIMEOUT, MS, US) ({ \ > + unsigned long timeout__ = jiffies + msecs_to_jiffies(TIMEOUT) + 1;\ > int ret__ = 0; \ > while (!(COND)) { \ > if (time_after(jiffies, timeout__)) { \ > @@ -51,8 +51,11 @@ > ret__ = -ETIMEDOUT; \ > break; \ > } \ > - if (W && drm_can_sleep()) { \ > - msleep(W); \ > + if ((MS | US) && drm_can_sleep()) { \ > + if (MS) \ > + msleep(MS); \ > + else \ > + usleep_range(US, US * 2); \ > } else { \ > cpu_relax(); \ > } \ > @@ -60,10 +63,12 @@ > ret__; \ > }) > > -#define wait_for(COND, MS) _wait_for(COND, MS, 1) > -#define wait_for_atomic(COND, MS) _wait_for(COND, MS, 0) > +#define wait_for(COND, TIMEOUT) _wait_for(COND, TIMEOUT, 1, 0) > +#define wait_for_ms(COND, TIMEOUT, MS) _wait_for(COND, TIMEOUT, MS, 0) > +#define wait_for_us(COND, TIMEOUT, US) _wait_for(COND, TIMEOUT, 0, US) > +#define wait_for_atomic(COND, TIMEOUT) _wait_for(COND, TIMEOUT, 0, 0) > #define wait_for_atomic_us(COND, US) _wait_for((COND), \ > - DIV_ROUND_UP((US), 1000), 0) > + DIV_ROUND_UP((US), 1000), 0, 0) > > #define KHz(x) (1000 * (x)) > #define MHz(x) KHz(1000 * (x)) > -- > 1.7.9.5 >
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4d4a0d9..9de2678 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -761,7 +761,7 @@ static void g4x_wait_for_vblank(struct drm_device *dev, int pipe) frame = I915_READ(frame_reg); - if (wait_for(I915_READ_NOTRACE(frame_reg) != frame, 50)) + if (wait_for_us(I915_READ_NOTRACE(frame_reg) != frame, 50, 1000)) DRM_DEBUG_KMS("vblank wait timed out\n"); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f1ef3d4..14927e5 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1074,7 +1074,7 @@ static void wait_panel_status(struct intel_dp *intel_dp, I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg)); - if (_wait_for((I915_READ(pp_stat_reg) & mask) == value, 5000, 10)) { + if (wait_for_ms((I915_READ(pp_stat_reg) & mask) == value, 5000, 10)) { DRM_ERROR("Panel status timeout: status %08x control %08x\n", I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg)); @@ -1808,7 +1808,7 @@ void intel_edp_psr_disable(struct intel_dp *intel_dp) I915_READ(EDP_PSR_CTL(dev)) & ~EDP_PSR_ENABLE); /* Wait till PSR is idle */ - if (_wait_for((I915_READ(EDP_PSR_STATUS_CTL(dev)) & + if (wait_for_ms((I915_READ(EDP_PSR_STATUS_CTL(dev)) & EDP_PSR_STATUS_STATE_MASK) == 0, 2000, 10)) DRM_ERROR("Timed out waiting for PSR Idle State\n"); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 44067bc..bbda97e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -42,8 +42,8 @@ * having timed out, since the timeout could be due to preemption or similar and * we've never had a chance to check the condition before the timeout. */ -#define _wait_for(COND, MS, W) ({ \ - unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ +#define _wait_for(COND, TIMEOUT, MS, US) ({ \ + unsigned long timeout__ = jiffies + msecs_to_jiffies(TIMEOUT) + 1;\ int ret__ = 0; \ while (!(COND)) { \ if (time_after(jiffies, timeout__)) { \ @@ -51,8 +51,11 @@ ret__ = -ETIMEDOUT; \ break; \ } \ - if (W && drm_can_sleep()) { \ - msleep(W); \ + if ((MS | US) && drm_can_sleep()) { \ + if (MS) \ + msleep(MS); \ + else \ + usleep_range(US, US * 2); \ } else { \ cpu_relax(); \ } \ @@ -60,10 +63,12 @@ ret__; \ }) -#define wait_for(COND, MS) _wait_for(COND, MS, 1) -#define wait_for_atomic(COND, MS) _wait_for(COND, MS, 0) +#define wait_for(COND, TIMEOUT) _wait_for(COND, TIMEOUT, 1, 0) +#define wait_for_ms(COND, TIMEOUT, MS) _wait_for(COND, TIMEOUT, MS, 0) +#define wait_for_us(COND, TIMEOUT, US) _wait_for(COND, TIMEOUT, 0, US) +#define wait_for_atomic(COND, TIMEOUT) _wait_for(COND, TIMEOUT, 0, 0) #define wait_for_atomic_us(COND, US) _wait_for((COND), \ - DIV_ROUND_UP((US), 1000), 0) + DIV_ROUND_UP((US), 1000), 0, 0) #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x))
BZ: 178761 In wait for vblank use usleep_range, which will use hrtimers instead of msleep. Using msleep(1~20) there are more chances of sleeping for 20ms. Using usleep_range uses hrtimers and hence are precise, worst case will trigger an interrupt at the higher/max timeout. Change-log: On replacing msleep(1) with usleep_range(1000, 2000) we have noticed the time consumed by wait for vblank is ~4ms to ~17ms. Change-Id: I6672e5697b01987a6d069ab06e76d97287b1f7ae Signed-off-by: Arun R Murthy <arun.r.murthy@intel.com> --- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- drivers/gpu/drm/i915/intel_drv.h | 19 ++++++++++++------- 3 files changed, 15 insertions(+), 10 deletions(-)