Message ID | 20161124040135.5517-9-mahesh1.kumar@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Mahesh Kumar schreef op do 24-11-2016 om 09:31 [+0530]: > This patch implemnets Workariunds related to display arbitrated > memory > bandwidth. These WA are applicabe for all gen-9 based platforms. > > Changes since v1: > - Rebase on top of Paulo's patch series > Changes since v2: > - Address review comments > - Rebase/rework as per other patch changes in series > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 9 +++ > drivers/gpu/drm/i915/intel_pm.c | 149 > +++++++++++++++++++++++++++++++++++++--- > 2 files changed, 149 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > index 4e2f17f..2b673c6 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1193,6 +1193,13 @@ enum intel_sbi_destination { > SBI_MPHY, > }; > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ > +enum watermark_memory_wa { > + WATERMARK_WA_NONE, > + WATERMARK_WA_X_TILED, > + WATERMARK_WA_Y_TILED, > +}; > + > #define QUIRK_PIPEA_FORCE (1<<0) > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation { > > struct skl_wm_values { > unsigned dirty_pipes; > + /* any WaterMark memory workaround Required */ > + enum watermark_memory_wa mem_wa; > struct skl_ddb_allocation ddb; > }; > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index 3e2dd8f..547bbda 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct intel_plane > *plane) > } > } > > -/* > - * FIXME: We still don't have the proper code detect if we need to > apply the WA, > - * so assume we'll always need it in order to avoid underruns. > - */ > -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) > +static bool intel_needs_memory_bw_wa(struct intel_atomic_state > *state) > { > struct drm_i915_private *dev_priv = to_i915(state- > >base.dev); > > @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct > drm_atomic_state *state) > > latency = dev_priv->wm.skl_latency[level]; > > - if (skl_needs_memory_bw_wa(intel_state) && > + if (intel_needs_memory_bw_wa(intel_state) && > plane->base.state->fb->modifier == > I915_FORMAT_MOD_X_TILED) > latency += 15; > @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > uint32_t y_min_scanlines; > struct intel_atomic_state *state = > to_intel_atomic_state(cstate->base.state); > - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); > + enum watermark_memory_wa mem_wa; > bool y_tiled, x_tiled; > > if (latency == 0 || !cstate->base.active || !intel_pstate- > >base.visible) { > @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) > latency += 4; > > - if (apply_memory_bw_wa && x_tiled) > + mem_wa = state->wm_results.mem_wa; > + if (mem_wa != WATERMARK_WA_NONE && x_tiled) > latency += 15; > > width = drm_rect_width(&intel_pstate->base.src) >> 16; > @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > y_min_scanlines = 4; > } > > - if (apply_memory_bw_wa) > + if (mem_wa == WATERMARK_WA_Y_TILED) > y_min_scanlines *= 2; > > plane_bytes_per_line = width * cpp; > @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct drm_atomic_state > *state) > } > > /* > + * If Watermark workaround is changed we need to recalculate > + * watermark values for all active pipes > + */ > + if (intel_state->wm_results.mem_wa != dev_priv- > >wm.skl_hw.mem_wa) { > + realloc_pipes = ~0; > + intel_state->wm_results.dirty_pipes = ~0; > + } > + > + /* > * We're not recomputing for the pipes not included in the > commit, so > * make sure we start with the current state. > */ > @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct drm_atomic_state > *state) > } > > static void > +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) > +{ > + struct drm_device *dev = state->dev; > + struct drm_i915_private *dev_priv = to_i915(dev); > + struct intel_crtc *intel_crtc; > + struct intel_plane_state *intel_pstate; > + struct intel_atomic_state *intel_state = > to_intel_atomic_state(state); > + struct memdev_info *memdev_info = &dev_priv->memdev_info; > + int num_active_pipes; > + uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps; > + int display_bw_percentage; > + bool y_tile_enabled = false; > + > + if (!intel_needs_memory_bw_wa(intel_state)) { > + intel_state->wm_results.mem_wa = WATERMARK_WA_NONE; > + return; > + } > + > + if (!memdev_info->valid) > + goto exit; > + > + max_pipe_bw_kbps = 0; > + num_active_pipes = 0; > + for_each_intel_crtc(dev, intel_crtc) { > + struct intel_crtc_state *cstate; > + struct intel_plane *plane; > + int num_active_planes; > + uint32_t max_plane_bw_kbps, pipe_bw_kbps; > + > + /* > + * If CRTC is part of current atomic commit, get > crtc state from > + * existing CRTC state. else take the cached CRTC > state > + */ > + cstate = NULL; > + if (state) > + cstate = > intel_atomic_get_existing_crtc_state(state, > + intel_crtc); > + if (!cstate) > + cstate = to_intel_crtc_state(intel_crtc- > >base.state); This is really really not allowed. If you want to access crtc->state, you need to get the state. Since a w/a forces a full recalculation, best you can do is put the per pipe status in crtc_state->wm.skl as well. If it changes you could add all other pipes. > + if (!cstate->base.active) > + continue; > + > + num_active_planes = 0; > + max_plane_bw_kbps = 0; > + for_each_intel_plane_mask(dev, plane, cstate- > >base.plane_mask) { This is very much not allowed either! If you want to do this safely and you hold crtc_state, use drm_atomic_crtc_state_for_each_plane_state. > + struct drm_framebuffer *fb; > + uint32_t plane_bw_kbps; > + uint32_t id = skl_wm_plane_id(plane); > + > + intel_pstate = NULL; > + intel_pstate = > + intel_atomic_get_existing_plane_stat > e(state, > + > plane); > + if (!intel_pstate) > + intel_pstate = > + to_intel_plane_state(plane- > >base.state); > + > + if (!intel_pstate->base.visible) > + continue; > + > + if (WARN_ON(!intel_pstate->base.fb)) > + continue; > + > + if (id == PLANE_CURSOR) > + continue; > + > + fb = intel_pstate->base.fb; > + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED > || > + fb->modifier == > I915_FORMAT_MOD_Yf_TILED)) > + y_tile_enabled = true; > + > + plane_bw_kbps = > skl_adjusted_plane_pixel_rate(cstate, > + inte > l_pstate); > + max_plane_bw_kbps = max(plane_bw_kbps, > + max_plane_bw > _kbps); > + num_active_planes++; > + } > + pipe_bw_kbps = max_plane_bw_kbps * > num_active_planes; > + max_pipe_bw_kbps = max(pipe_bw_kbps, > max_pipe_bw_kbps); > + num_active_pipes++; > + } > + > + total_pipe_bw_kbps = max_pipe_bw_kbps * num_active_pipes; > + > + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps > * 100, > + memdev_info- > >bandwidth_kbps); > + > + /* > + * If there is any Ytile plane enabled and arbitrated > display > + * bandwidth > 20% of raw system memory bandwidth > + * Enale Y-tile related WA > + * > + * If memory is dual channel single rank, Xtile limit = 35%, > else Xtile > + * limit = 60% > + * If there is no Ytile plane enabled and > + * arbitrated display bandwidth > Xtile limit > + * Enable X-tile realated WA > + */ > + if (y_tile_enabled && (display_bw_percentage > 20)) > + intel_state->wm_results.mem_wa = > WATERMARK_WA_Y_TILED; > + else { > + int x_tile_percentage = 60; > + enum rank rank = DRAM_RANK_SINGLE; > + > + if (memdev_info->rank != DRAM_RANK_INVALID) > + rank = memdev_info->rank; > + > + if ((rank == DRAM_RANK_SINGLE) && > + (memdev_info->num_channels > == 2)) > + x_tile_percentage = 35; > + > + if (display_bw_percentage > x_tile_percentage) > + intel_state->wm_results.mem_wa = > WATERMARK_WA_X_TILED; > + } > + return; > + > +exit: > + intel_state->wm_results.mem_wa = WATERMARK_WA_Y_TILED; > +} > + > + > +static void > skl_copy_wm_for_pipe(struct skl_wm_values *dst, > struct skl_wm_values *src, > enum pipe pipe) > @@ -4177,6 +4306,8 @@ skl_compute_wm(struct drm_atomic_state *state) > /* Clear all dirty flags */ > results->dirty_pipes = 0; > > + skl_compute_memory_bandwidth_wm_wa(state); > + > ret = skl_compute_ddb(state); > if (ret) > return ret; --------------------------------------------------------------------- Intel International B.V. Registered in The Netherlands under number 34098535 Statutory seat: Haarlemmermeer Registered address: Capronilaan 37, 1119NG Schiphol-Rijk This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
Hi, On Thursday 24 November 2016 06:21 PM, Lankhorst, Maarten wrote: > Mahesh Kumar schreef op do 24-11-2016 om 09:31 [+0530]: >> This patch implemnets Workariunds related to display arbitrated >> memory >> bandwidth. These WA are applicabe for all gen-9 based platforms. >> >> Changes since v1: >> - Rebase on top of Paulo's patch series >> Changes since v2: >> - Address review comments >> - Rebase/rework as per other patch changes in series >> >> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 9 +++ >> drivers/gpu/drm/i915/intel_pm.c | 149 >> +++++++++++++++++++++++++++++++++++++--- >> 2 files changed, 149 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index 4e2f17f..2b673c6 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1193,6 +1193,13 @@ enum intel_sbi_destination { >> SBI_MPHY, >> }; >> >> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ >> +enum watermark_memory_wa { >> + WATERMARK_WA_NONE, >> + WATERMARK_WA_X_TILED, >> + WATERMARK_WA_Y_TILED, >> +}; >> + >> #define QUIRK_PIPEA_FORCE (1<<0) >> #define QUIRK_LVDS_SSC_DISABLE (1<<1) >> #define QUIRK_INVERT_BRIGHTNESS (1<<2) >> @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation { >> >> struct skl_wm_values { >> unsigned dirty_pipes; >> + /* any WaterMark memory workaround Required */ >> + enum watermark_memory_wa mem_wa; >> struct skl_ddb_allocation ddb; >> }; >> >> diff --git a/drivers/gpu/drm/i915/intel_pm.c >> b/drivers/gpu/drm/i915/intel_pm.c >> index 3e2dd8f..547bbda 100644 >> --- a/drivers/gpu/drm/i915/intel_pm.c >> +++ b/drivers/gpu/drm/i915/intel_pm.c >> @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct intel_plane >> *plane) >> } >> } >> >> -/* >> - * FIXME: We still don't have the proper code detect if we need to >> apply the WA, >> - * so assume we'll always need it in order to avoid underruns. >> - */ >> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) >> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state >> *state) >> { >> struct drm_i915_private *dev_priv = to_i915(state- >>> base.dev); >> >> @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct >> drm_atomic_state *state) >> >> latency = dev_priv->wm.skl_latency[level]; >> >> - if (skl_needs_memory_bw_wa(intel_state) && >> + if (intel_needs_memory_bw_wa(intel_state) && >> plane->base.state->fb->modifier == >> I915_FORMAT_MOD_X_TILED) >> latency += 15; >> @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const struct >> drm_i915_private *dev_priv, >> uint32_t y_min_scanlines; >> struct intel_atomic_state *state = >> to_intel_atomic_state(cstate->base.state); >> - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); >> + enum watermark_memory_wa mem_wa; >> bool y_tiled, x_tiled; >> >> if (latency == 0 || !cstate->base.active || !intel_pstate- >>> base.visible) { >> @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const struct >> drm_i915_private *dev_priv, >> if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) >> latency += 4; >> >> - if (apply_memory_bw_wa && x_tiled) >> + mem_wa = state->wm_results.mem_wa; >> + if (mem_wa != WATERMARK_WA_NONE && x_tiled) >> latency += 15; >> >> width = drm_rect_width(&intel_pstate->base.src) >> 16; >> @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const struct >> drm_i915_private *dev_priv, >> y_min_scanlines = 4; >> } >> >> - if (apply_memory_bw_wa) >> + if (mem_wa == WATERMARK_WA_Y_TILED) >> y_min_scanlines *= 2; >> >> plane_bytes_per_line = width * cpp; >> @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct drm_atomic_state >> *state) >> } >> >> /* >> + * If Watermark workaround is changed we need to recalculate >> + * watermark values for all active pipes >> + */ >> + if (intel_state->wm_results.mem_wa != dev_priv- >>> wm.skl_hw.mem_wa) { >> + realloc_pipes = ~0; >> + intel_state->wm_results.dirty_pipes = ~0; >> + } >> + >> + /* >> * We're not recomputing for the pipes not included in the >> commit, so >> * make sure we start with the current state. >> */ >> @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct drm_atomic_state >> *state) >> } >> >> static void >> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) >> +{ >> + struct drm_device *dev = state->dev; >> + struct drm_i915_private *dev_priv = to_i915(dev); >> + struct intel_crtc *intel_crtc; >> + struct intel_plane_state *intel_pstate; >> + struct intel_atomic_state *intel_state = >> to_intel_atomic_state(state); >> + struct memdev_info *memdev_info = &dev_priv->memdev_info; >> + int num_active_pipes; >> + uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps; >> + int display_bw_percentage; >> + bool y_tile_enabled = false; >> + >> + if (!intel_needs_memory_bw_wa(intel_state)) { >> + intel_state->wm_results.mem_wa = WATERMARK_WA_NONE; >> + return; >> + } >> + >> + if (!memdev_info->valid) >> + goto exit; >> + >> + max_pipe_bw_kbps = 0; >> + num_active_pipes = 0; >> + for_each_intel_crtc(dev, intel_crtc) { >> + struct intel_crtc_state *cstate; >> + struct intel_plane *plane; >> + int num_active_planes; >> + uint32_t max_plane_bw_kbps, pipe_bw_kbps; >> + >> + /* >> + * If CRTC is part of current atomic commit, get >> crtc state from >> + * existing CRTC state. else take the cached CRTC >> state >> + */ >> + cstate = NULL; >> + if (state) >> + cstate = >> intel_atomic_get_existing_crtc_state(state, >> + intel_crtc); >> + if (!cstate) >> + cstate = to_intel_crtc_state(intel_crtc- >>> base.state); > This is really really not allowed. If you want to access > crtc->state, you need to get the state. > > Since a w/a forces a full recalculation, best you can do is put the per > pipe status in crtc_state->wm.skl as well. If it changes you could add > all other pipes. I really don't want to add all pipes in "state" until that is necessary otherwise it'll make our commit huge, that's why getting the cached state. Same goes for plane. Later if w/a changes then I'm adding all the CRTC's in relloc_pipes mask. if (intel_state->wm_results.mem_wa != dev_priv->wm.skl_hw.mem_wa) If I hold the crtc->mutex or plane->mutex lock, It may cause deadlock with other commits. I can keep a cached copy of plane memory bandwidth requirement in "dev_priv->wm.skl_hw", but again if there are two parallel commits it may cause race. Can you please suggest what is the correct way of getting plane/crtc state during check phase without have to actually commit them (add in global-state), If any such thing is available in atomic framework? Regards, -Mahesh >> + if (!cstate->base.active) >> + continue; >> + >> + num_active_planes = 0; >> + max_plane_bw_kbps = 0; >> + for_each_intel_plane_mask(dev, plane, cstate- >>> base.plane_mask) { > This is very much not allowed either! > > If you want to do this safely and you hold crtc_state, use > drm_atomic_crtc_state_for_each_plane_state. >> + struct drm_framebuffer *fb; >> + uint32_t plane_bw_kbps; >> + uint32_t id = skl_wm_plane_id(plane); >> + >> + intel_pstate = NULL; >> + intel_pstate = >> + intel_atomic_get_existing_plane_stat >> e(state, >> + >> plane); >> + if (!intel_pstate) >> + intel_pstate = >> + to_intel_plane_state(plane- >>> base.state); >> + >> + if (!intel_pstate->base.visible) >> + continue; >> + >> + if (WARN_ON(!intel_pstate->base.fb)) >> + continue; >> + >> + if (id == PLANE_CURSOR) >> + continue; >> + >> + fb = intel_pstate->base.fb; >> + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED >> || >> + fb->modifier == >> I915_FORMAT_MOD_Yf_TILED)) >> + y_tile_enabled = true; >> + >> + plane_bw_kbps = >> skl_adjusted_plane_pixel_rate(cstate, >> + inte >> l_pstate); >> + max_plane_bw_kbps = max(plane_bw_kbps, >> + max_plane_bw >> _kbps); >> + num_active_planes++; >> + } >> + pipe_bw_kbps = max_plane_bw_kbps * >> num_active_planes; >> + max_pipe_bw_kbps = max(pipe_bw_kbps, >> max_pipe_bw_kbps); >> + num_active_pipes++; >> + } >> + >> + total_pipe_bw_kbps = max_pipe_bw_kbps * num_active_pipes; >> + >> + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps >> * 100, >> + memdev_info- >>> bandwidth_kbps); >> + >> + /* >> + * If there is any Ytile plane enabled and arbitrated >> display >> + * bandwidth > 20% of raw system memory bandwidth >> + * Enale Y-tile related WA >> + * >> + * If memory is dual channel single rank, Xtile limit = 35%, >> else Xtile >> + * limit = 60% >> + * If there is no Ytile plane enabled and >> + * arbitrated display bandwidth > Xtile limit >> + * Enable X-tile realated WA >> + */ >> + if (y_tile_enabled && (display_bw_percentage > 20)) >> + intel_state->wm_results.mem_wa = >> WATERMARK_WA_Y_TILED; >> + else { >> + int x_tile_percentage = 60; >> + enum rank rank = DRAM_RANK_SINGLE; >> + >> + if (memdev_info->rank != DRAM_RANK_INVALID) >> + rank = memdev_info->rank; >> + >> + if ((rank == DRAM_RANK_SINGLE) && >> + (memdev_info->num_channels >> == 2)) >> + x_tile_percentage = 35; >> + >> + if (display_bw_percentage > x_tile_percentage) >> + intel_state->wm_results.mem_wa = >> WATERMARK_WA_X_TILED; >> + } >> + return; >> + >> +exit: >> + intel_state->wm_results.mem_wa = WATERMARK_WA_Y_TILED; >> +} >> + >> + >> +static void >> skl_copy_wm_for_pipe(struct skl_wm_values *dst, >> struct skl_wm_values *src, >> enum pipe pipe) >> @@ -4177,6 +4306,8 @@ skl_compute_wm(struct drm_atomic_state *state) >> /* Clear all dirty flags */ >> results->dirty_pipes = 0; >> >> + skl_compute_memory_bandwidth_wm_wa(state); >> + >> ret = skl_compute_ddb(state); >> if (ret) >> return ret;
Mahesh Kumar schreef op di 29-11-2016 om 11:12 [+0530]: > Hi, > > > On Thursday 24 November 2016 06:21 PM, Lankhorst, Maarten wrote: > > > > Mahesh Kumar schreef op do 24-11-2016 om 09:31 [+0530]: > > > > > > This patch implemnets Workariunds related to display arbitrated > > > memory > > > bandwidth. These WA are applicabe for all gen-9 based platforms. > > > > > > Changes since v1: > > > - Rebase on top of Paulo's patch series > > > Changes since v2: > > > - Address review comments > > > - Rebase/rework as per other patch changes in series > > > > > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> > > > --- > > > drivers/gpu/drm/i915/i915_drv.h | 9 +++ > > > drivers/gpu/drm/i915/intel_pm.c | 149 > > > +++++++++++++++++++++++++++++++++++++--- > > > 2 files changed, 149 insertions(+), 9 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > > > b/drivers/gpu/drm/i915/i915_drv.h > > > index 4e2f17f..2b673c6 100644 > > > --- a/drivers/gpu/drm/i915/i915_drv.h > > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > > @@ -1193,6 +1193,13 @@ enum intel_sbi_destination { > > > SBI_MPHY, > > > }; > > > > > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ > > > +enum watermark_memory_wa { > > > + WATERMARK_WA_NONE, > > > + WATERMARK_WA_X_TILED, > > > + WATERMARK_WA_Y_TILED, > > > +}; > > > + > > > #define QUIRK_PIPEA_FORCE (1<<0) > > > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > > > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > > > @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation { > > > > > > struct skl_wm_values { > > > unsigned dirty_pipes; > > > + /* any WaterMark memory workaround Required */ > > > + enum watermark_memory_wa mem_wa; > > > struct skl_ddb_allocation ddb; > > > }; > > > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > > > b/drivers/gpu/drm/i915/intel_pm.c > > > index 3e2dd8f..547bbda 100644 > > > --- a/drivers/gpu/drm/i915/intel_pm.c > > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > > @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct intel_plane > > > *plane) > > > } > > > } > > > > > > -/* > > > - * FIXME: We still don't have the proper code detect if we need > > > to > > > apply the WA, > > > - * so assume we'll always need it in order to avoid underruns. > > > - */ > > > -static bool skl_needs_memory_bw_wa(struct intel_atomic_state > > > *state) > > > +static bool intel_needs_memory_bw_wa(struct intel_atomic_state > > > *state) > > > { > > > struct drm_i915_private *dev_priv = to_i915(state- > > > > > > > > base.dev); > > > > > > @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct > > > drm_atomic_state *state) > > > > > > latency = dev_priv->wm.skl_latency[level]; > > > > > > - if (skl_needs_memory_bw_wa(intel_state) && > > > + if (intel_needs_memory_bw_wa(intel_state) && > > > plane->base.state->fb->modifier == > > > I915_FORMAT_MOD_X_TILED) > > > latency += 15; > > > @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const > > > struct > > > drm_i915_private *dev_priv, > > > uint32_t y_min_scanlines; > > > struct intel_atomic_state *state = > > > to_intel_atomic_state(cstate->base.state); > > > - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); > > > + enum watermark_memory_wa mem_wa; > > > bool y_tiled, x_tiled; > > > > > > if (latency == 0 || !cstate->base.active || > > > !intel_pstate- > > > > > > > > base.visible) { > > > @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const > > > struct > > > drm_i915_private *dev_priv, > > > if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) > > > latency += 4; > > > > > > - if (apply_memory_bw_wa && x_tiled) > > > + mem_wa = state->wm_results.mem_wa; > > > + if (mem_wa != WATERMARK_WA_NONE && x_tiled) > > > latency += 15; > > > > > > width = drm_rect_width(&intel_pstate->base.src) >> 16; > > > @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const > > > struct > > > drm_i915_private *dev_priv, > > > y_min_scanlines = 4; > > > } > > > > > > - if (apply_memory_bw_wa) > > > + if (mem_wa == WATERMARK_WA_Y_TILED) > > > y_min_scanlines *= 2; > > > > > > plane_bytes_per_line = width * cpp; > > > @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct drm_atomic_state > > > *state) > > > } > > > > > > /* > > > + * If Watermark workaround is changed we need to > > > recalculate > > > + * watermark values for all active pipes > > > + */ > > > + if (intel_state->wm_results.mem_wa != dev_priv- > > > > > > > > wm.skl_hw.mem_wa) { > > > + realloc_pipes = ~0; > > > + intel_state->wm_results.dirty_pipes = ~0; > > > + } > > > + > > > + /* > > > * We're not recomputing for the pipes not included in > > > the > > > commit, so > > > * make sure we start with the current state. > > > */ > > > @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct drm_atomic_state > > > *state) > > > } > > > > > > static void > > > +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state > > > *state) > > > +{ > > > + struct drm_device *dev = state->dev; > > > + struct drm_i915_private *dev_priv = to_i915(dev); > > > + struct intel_crtc *intel_crtc; > > > + struct intel_plane_state *intel_pstate; > > > + struct intel_atomic_state *intel_state = > > > to_intel_atomic_state(state); > > > + struct memdev_info *memdev_info = &dev_priv- > > > >memdev_info; > > > + int num_active_pipes; > > > + uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps; > > > + int display_bw_percentage; > > > + bool y_tile_enabled = false; > > > + > > > + if (!intel_needs_memory_bw_wa(intel_state)) { > > > + intel_state->wm_results.mem_wa = > > > WATERMARK_WA_NONE; > > > + return; > > > + } > > > + > > > + if (!memdev_info->valid) > > > + goto exit; > > > + > > > + max_pipe_bw_kbps = 0; > > > + num_active_pipes = 0; > > > + for_each_intel_crtc(dev, intel_crtc) { > > > + struct intel_crtc_state *cstate; > > > + struct intel_plane *plane; > > > + int num_active_planes; > > > + uint32_t max_plane_bw_kbps, pipe_bw_kbps; > > > + > > > + /* > > > + * If CRTC is part of current atomic commit, get > > > crtc state from > > > + * existing CRTC state. else take the cached > > > CRTC > > > state > > > + */ > > > + cstate = NULL; > > > + if (state) > > > + cstate = > > > intel_atomic_get_existing_crtc_state(state, > > > + intel_crtc); > > > + if (!cstate) > > > + cstate = to_intel_crtc_state(intel_crtc- > > > > > > > > base.state); > > This is really really not allowed. If you want to access > > crtc->state, you need to get the state. > > > > Since a w/a forces a full recalculation, best you can do is put the > > per > > pipe status in crtc_state->wm.skl as well. If it changes you could > > add > > all other pipes. > I really don't want to add all pipes in "state" until that is > necessary > otherwise it'll make our commit huge, that's why getting the cached > state. > Same goes for plane. Later if w/a changes then I'm adding all the > CRTC's in relloc_pipes mask. > > if (intel_state->wm_results.mem_wa != dev_priv- > >wm.skl_hw.mem_wa) > > If I hold the crtc->mutex or plane->mutex lock, It may cause > deadlock > with other commits. > I can keep a cached copy of plane memory bandwidth requirement in > "dev_priv->wm.skl_hw", but again if there are two parallel commits > it > may cause race. > > Can you please suggest what is the correct way of getting plane/crtc > state during check phase without have to actually commit them (add > in > global-state), > If any such thing is available in atomic framework? How often will the workaround status change in practice? You could do same as active_crtcs, protect changing the workaround current crtc's workaround mode in dev_priv->wm.skl.wa_state[pipe] with connection_mutex. If this means changing that the global workaround setting is changed, then also acquire all other crtc states. I can't say it's pretty though, it would need a good justification for how bad the problem is in practice plus pretty good documentation on why it works. ~Maarten --------------------------------------------------------------------- Intel International B.V. Registered in The Netherlands under number 34098535 Statutory seat: Haarlemmermeer Registered address: Capronilaan 37, 1119NG Schiphol-Rijk This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
On Tuesday 29 November 2016 03:16 PM, Lankhorst, Maarten wrote: > Mahesh Kumar schreef op di 29-11-2016 om 11:12 [+0530]: >> Hi, >> >> >> On Thursday 24 November 2016 06:21 PM, Lankhorst, Maarten wrote: >>> Mahesh Kumar schreef op do 24-11-2016 om 09:31 [+0530]: >>>> This patch implemnets Workariunds related to display arbitrated >>>> memory >>>> bandwidth. These WA are applicabe for all gen-9 based platforms. >>>> >>>> Changes since v1: >>>> - Rebase on top of Paulo's patch series >>>> Changes since v2: >>>> - Address review comments >>>> - Rebase/rework as per other patch changes in series >>>> >>>> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> >>>> --- >>>> drivers/gpu/drm/i915/i915_drv.h | 9 +++ >>>> drivers/gpu/drm/i915/intel_pm.c | 149 >>>> +++++++++++++++++++++++++++++++++++++--- >>>> 2 files changed, 149 insertions(+), 9 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h >>>> b/drivers/gpu/drm/i915/i915_drv.h >>>> index 4e2f17f..2b673c6 100644 >>>> --- a/drivers/gpu/drm/i915/i915_drv.h >>>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>>> @@ -1193,6 +1193,13 @@ enum intel_sbi_destination { >>>> SBI_MPHY, >>>> }; >>>> >>>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ >>>> +enum watermark_memory_wa { >>>> + WATERMARK_WA_NONE, >>>> + WATERMARK_WA_X_TILED, >>>> + WATERMARK_WA_Y_TILED, >>>> +}; >>>> + >>>> #define QUIRK_PIPEA_FORCE (1<<0) >>>> #define QUIRK_LVDS_SSC_DISABLE (1<<1) >>>> #define QUIRK_INVERT_BRIGHTNESS (1<<2) >>>> @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation { >>>> >>>> struct skl_wm_values { >>>> unsigned dirty_pipes; >>>> + /* any WaterMark memory workaround Required */ >>>> + enum watermark_memory_wa mem_wa; >>>> struct skl_ddb_allocation ddb; >>>> }; >>>> >>>> diff --git a/drivers/gpu/drm/i915/intel_pm.c >>>> b/drivers/gpu/drm/i915/intel_pm.c >>>> index 3e2dd8f..547bbda 100644 >>>> --- a/drivers/gpu/drm/i915/intel_pm.c >>>> +++ b/drivers/gpu/drm/i915/intel_pm.c >>>> @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct intel_plane >>>> *plane) >>>> } >>>> } >>>> >>>> -/* >>>> - * FIXME: We still don't have the proper code detect if we need >>>> to >>>> apply the WA, >>>> - * so assume we'll always need it in order to avoid underruns. >>>> - */ >>>> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state >>>> *state) >>>> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state >>>> *state) >>>> { >>>> struct drm_i915_private *dev_priv = to_i915(state- >>>>> base.dev); >>>> >>>> @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct >>>> drm_atomic_state *state) >>>> >>>> latency = dev_priv->wm.skl_latency[level]; >>>> >>>> - if (skl_needs_memory_bw_wa(intel_state) && >>>> + if (intel_needs_memory_bw_wa(intel_state) && >>>> plane->base.state->fb->modifier == >>>> I915_FORMAT_MOD_X_TILED) >>>> latency += 15; >>>> @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const >>>> struct >>>> drm_i915_private *dev_priv, >>>> uint32_t y_min_scanlines; >>>> struct intel_atomic_state *state = >>>> to_intel_atomic_state(cstate->base.state); >>>> - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); >>>> + enum watermark_memory_wa mem_wa; >>>> bool y_tiled, x_tiled; >>>> >>>> if (latency == 0 || !cstate->base.active || >>>> !intel_pstate- >>>>> base.visible) { >>>> @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const >>>> struct >>>> drm_i915_private *dev_priv, >>>> if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) >>>> latency += 4; >>>> >>>> - if (apply_memory_bw_wa && x_tiled) >>>> + mem_wa = state->wm_results.mem_wa; >>>> + if (mem_wa != WATERMARK_WA_NONE && x_tiled) >>>> latency += 15; >>>> >>>> width = drm_rect_width(&intel_pstate->base.src) >> 16; >>>> @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const >>>> struct >>>> drm_i915_private *dev_priv, >>>> y_min_scanlines = 4; >>>> } >>>> >>>> - if (apply_memory_bw_wa) >>>> + if (mem_wa == WATERMARK_WA_Y_TILED) >>>> y_min_scanlines *= 2; >>>> >>>> plane_bytes_per_line = width * cpp; >>>> @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct drm_atomic_state >>>> *state) >>>> } >>>> >>>> /* >>>> + * If Watermark workaround is changed we need to >>>> recalculate >>>> + * watermark values for all active pipes >>>> + */ >>>> + if (intel_state->wm_results.mem_wa != dev_priv- >>>>> wm.skl_hw.mem_wa) { >>>> + realloc_pipes = ~0; >>>> + intel_state->wm_results.dirty_pipes = ~0; >>>> + } >>>> + >>>> + /* >>>> * We're not recomputing for the pipes not included in >>>> the >>>> commit, so >>>> * make sure we start with the current state. >>>> */ >>>> @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct drm_atomic_state >>>> *state) >>>> } >>>> >>>> static void >>>> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state >>>> *state) >>>> +{ >>>> + struct drm_device *dev = state->dev; >>>> + struct drm_i915_private *dev_priv = to_i915(dev); >>>> + struct intel_crtc *intel_crtc; >>>> + struct intel_plane_state *intel_pstate; >>>> + struct intel_atomic_state *intel_state = >>>> to_intel_atomic_state(state); >>>> + struct memdev_info *memdev_info = &dev_priv- >>>>> memdev_info; >>>> + int num_active_pipes; >>>> + uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps; >>>> + int display_bw_percentage; >>>> + bool y_tile_enabled = false; >>>> + >>>> + if (!intel_needs_memory_bw_wa(intel_state)) { >>>> + intel_state->wm_results.mem_wa = >>>> WATERMARK_WA_NONE; >>>> + return; >>>> + } >>>> + >>>> + if (!memdev_info->valid) >>>> + goto exit; >>>> + >>>> + max_pipe_bw_kbps = 0; >>>> + num_active_pipes = 0; >>>> + for_each_intel_crtc(dev, intel_crtc) { >>>> + struct intel_crtc_state *cstate; >>>> + struct intel_plane *plane; >>>> + int num_active_planes; >>>> + uint32_t max_plane_bw_kbps, pipe_bw_kbps; >>>> + >>>> + /* >>>> + * If CRTC is part of current atomic commit, get >>>> crtc state from >>>> + * existing CRTC state. else take the cached >>>> CRTC >>>> state >>>> + */ >>>> + cstate = NULL; >>>> + if (state) >>>> + cstate = >>>> intel_atomic_get_existing_crtc_state(state, >>>> + intel_crtc); >>>> + if (!cstate) >>>> + cstate = to_intel_crtc_state(intel_crtc- >>>>> base.state); >>> This is really really not allowed. If you want to access >>> crtc->state, you need to get the state. >>> >>> Since a w/a forces a full recalculation, best you can do is put the >>> per >>> pipe status in crtc_state->wm.skl as well. If it changes you could >>> add >>> all other pipes. >> I really don't want to add all pipes in "state" until that is >> necessary >> otherwise it'll make our commit huge, that's why getting the cached >> state. >> Same goes for plane. Later if w/a changes then I'm adding all the >> CRTC's in relloc_pipes mask. >> >> if (intel_state->wm_results.mem_wa != dev_priv- >>> wm.skl_hw.mem_wa) >> If I hold the crtc->mutex or plane->mutex lock, It may cause >> deadlock >> with other commits. >> I can keep a cached copy of plane memory bandwidth requirement in >> "dev_priv->wm.skl_hw", but again if there are two parallel commits >> it >> may cause race. >> >> Can you please suggest what is the correct way of getting plane/crtc >> state during check phase without have to actually commit them (add >> in >> global-state), >> If any such thing is available in atomic framework? > How often will the workaround status change in practice? In Linux use-case status changes very rarely, If we connect multiple high resolution display & our DRAM's bandwidth is less we may have to enable the WA. & it'll remain there until any display in unplugged OR any modeset with lower resolution. On the other hand, In OS with multi-plane enabled, WA may change if any plane is enabled/disabled (based on total bandwidth requirement for display) > You could do same as active_crtcs, protect changing the workaround > current crtc's workaround mode in dev_priv->wm.skl.wa_state[pipe] with > connection_mutex. If this means changing that the global workaround > setting is changed, then also acquire all other crtc states. This is not per-CRTC WA, this is complete display-system level WA. & depends upon total memory requirement of display. As you suggested I can keep the status(bandwidth requirement) for each pipe in dev_priv->wm.skl.wa_state/bandwidth[pipe], and access it with some mutex. Is connection mutex is right mutex to hold, what about wm_mutex? Regards, -Mahesh > > I can't say it's pretty though, it would need a good justification for > how bad the problem is in practice plus pretty good documentation on > why it works. > > ~Maarten
Mahesh Kumar schreef op di 29-11-2016 om 19:17 [+0530]: > > On Tuesday 29 November 2016 03:16 PM, Lankhorst, Maarten wrote: > > > > Mahesh Kumar schreef op di 29-11-2016 om 11:12 [+0530]: > > > > > > Hi, > > > > > > > > > On Thursday 24 November 2016 06:21 PM, Lankhorst, Maarten wrote: > > > > > > > > Mahesh Kumar schreef op do 24-11-2016 om 09:31 [+0530]: > > > > > > > > > > This patch implemnets Workariunds related to display > > > > > arbitrated > > > > > memory > > > > > bandwidth. These WA are applicabe for all gen-9 based > > > > > platforms. > > > > > > > > > > Changes since v1: > > > > > - Rebase on top of Paulo's patch series > > > > > Changes since v2: > > > > > - Address review comments > > > > > - Rebase/rework as per other patch changes in series > > > > > > > > > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> > > > > > --- > > > > > drivers/gpu/drm/i915/i915_drv.h | 9 +++ > > > > > drivers/gpu/drm/i915/intel_pm.c | 149 > > > > > +++++++++++++++++++++++++++++++++++++--- > > > > > 2 files changed, 149 insertions(+), 9 deletions(-) > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > > > > > b/drivers/gpu/drm/i915/i915_drv.h > > > > > index 4e2f17f..2b673c6 100644 > > > > > --- a/drivers/gpu/drm/i915/i915_drv.h > > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > > > > @@ -1193,6 +1193,13 @@ enum intel_sbi_destination { > > > > > SBI_MPHY, > > > > > }; > > > > > > > > > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds > > > > > */ > > > > > +enum watermark_memory_wa { > > > > > + WATERMARK_WA_NONE, > > > > > + WATERMARK_WA_X_TILED, > > > > > + WATERMARK_WA_Y_TILED, > > > > > +}; > > > > > + > > > > > #define QUIRK_PIPEA_FORCE (1<<0) > > > > > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > > > > > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > > > > > @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation { > > > > > > > > > > struct skl_wm_values { > > > > > unsigned dirty_pipes; > > > > > + /* any WaterMark memory workaround Required */ > > > > > + enum watermark_memory_wa mem_wa; > > > > > struct skl_ddb_allocation ddb; > > > > > }; > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > > > > > b/drivers/gpu/drm/i915/intel_pm.c > > > > > index 3e2dd8f..547bbda 100644 > > > > > --- a/drivers/gpu/drm/i915/intel_pm.c > > > > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > > > > @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct > > > > > intel_plane > > > > > *plane) > > > > > } > > > > > } > > > > > > > > > > -/* > > > > > - * FIXME: We still don't have the proper code detect if we > > > > > need > > > > > to > > > > > apply the WA, > > > > > - * so assume we'll always need it in order to avoid > > > > > underruns. > > > > > - */ > > > > > -static bool skl_needs_memory_bw_wa(struct intel_atomic_state > > > > > *state) > > > > > +static bool intel_needs_memory_bw_wa(struct > > > > > intel_atomic_state > > > > > *state) > > > > > { > > > > > struct drm_i915_private *dev_priv = to_i915(state- > > > > > > > > > > > > base.dev); > > > > > > > > > > @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct > > > > > drm_atomic_state *state) > > > > > > > > > > latency = dev_priv->wm.skl_latency[level]; > > > > > > > > > > - if (skl_needs_memory_bw_wa(intel_state) && > > > > > + if (intel_needs_memory_bw_wa(intel_state) && > > > > > plane->base.state->fb->modifier == > > > > > I915_FORMAT_MOD_X_TILED) > > > > > latency += 15; > > > > > @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const > > > > > struct > > > > > drm_i915_private *dev_priv, > > > > > uint32_t y_min_scanlines; > > > > > struct intel_atomic_state *state = > > > > > to_intel_atomic_state(cstate->base.state); > > > > > - bool apply_memory_bw_wa = > > > > > skl_needs_memory_bw_wa(state); > > > > > + enum watermark_memory_wa mem_wa; > > > > > bool y_tiled, x_tiled; > > > > > > > > > > if (latency == 0 || !cstate->base.active || > > > > > !intel_pstate- > > > > > > > > > > > > base.visible) { > > > > > @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const > > > > > struct > > > > > drm_i915_private *dev_priv, > > > > > if (IS_KABYLAKE(dev_priv) && dev_priv- > > > > > >ipc_enabled) > > > > > latency += 4; > > > > > > > > > > - if (apply_memory_bw_wa && x_tiled) > > > > > + mem_wa = state->wm_results.mem_wa; > > > > > + if (mem_wa != WATERMARK_WA_NONE && x_tiled) > > > > > latency += 15; > > > > > > > > > > width = drm_rect_width(&intel_pstate->base.src) >> > > > > > 16; > > > > > @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const > > > > > struct > > > > > drm_i915_private *dev_priv, > > > > > y_min_scanlines = 4; > > > > > } > > > > > > > > > > - if (apply_memory_bw_wa) > > > > > + if (mem_wa == WATERMARK_WA_Y_TILED) > > > > > y_min_scanlines *= 2; > > > > > > > > > > plane_bytes_per_line = width * cpp; > > > > > @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct > > > > > drm_atomic_state > > > > > *state) > > > > > } > > > > > > > > > > /* > > > > > + * If Watermark workaround is changed we need to > > > > > recalculate > > > > > + * watermark values for all active pipes > > > > > + */ > > > > > + if (intel_state->wm_results.mem_wa != dev_priv- > > > > > > > > > > > > wm.skl_hw.mem_wa) { > > > > > + realloc_pipes = ~0; > > > > > + intel_state->wm_results.dirty_pipes = ~0; > > > > > + } > > > > > + > > > > > + /* > > > > > * We're not recomputing for the pipes not > > > > > included in > > > > > the > > > > > commit, so > > > > > * make sure we start with the current state. > > > > > */ > > > > > @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct > > > > > drm_atomic_state > > > > > *state) > > > > > } > > > > > > > > > > static void > > > > > +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state > > > > > *state) > > > > > +{ > > > > > + struct drm_device *dev = state->dev; > > > > > + struct drm_i915_private *dev_priv = to_i915(dev); > > > > > + struct intel_crtc *intel_crtc; > > > > > + struct intel_plane_state *intel_pstate; > > > > > + struct intel_atomic_state *intel_state = > > > > > to_intel_atomic_state(state); > > > > > + struct memdev_info *memdev_info = &dev_priv- > > > > > > > > > > > > memdev_info; > > > > > + int num_active_pipes; > > > > > + uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps; > > > > > + int display_bw_percentage; > > > > > + bool y_tile_enabled = false; > > > > > + > > > > > + if (!intel_needs_memory_bw_wa(intel_state)) { > > > > > + intel_state->wm_results.mem_wa = > > > > > WATERMARK_WA_NONE; > > > > > + return; > > > > > + } > > > > > + > > > > > + if (!memdev_info->valid) > > > > > + goto exit; > > > > > + > > > > > + max_pipe_bw_kbps = 0; > > > > > + num_active_pipes = 0; > > > > > + for_each_intel_crtc(dev, intel_crtc) { > > > > > + struct intel_crtc_state *cstate; > > > > > + struct intel_plane *plane; > > > > > + int num_active_planes; > > > > > + uint32_t max_plane_bw_kbps, pipe_bw_kbps; > > > > > + > > > > > + /* > > > > > + * If CRTC is part of current atomic commit, > > > > > get > > > > > crtc state from > > > > > + * existing CRTC state. else take the cached > > > > > CRTC > > > > > state > > > > > + */ > > > > > + cstate = NULL; > > > > > + if (state) > > > > > + cstate = > > > > > intel_atomic_get_existing_crtc_state(state, > > > > > + intel_crtc); > > > > > + if (!cstate) > > > > > + cstate = > > > > > to_intel_crtc_state(intel_crtc- > > > > > > > > > > > > base.state); > > > > This is really really not allowed. If you want to access > > > > crtc->state, you need to get the state. > > > > > > > > Since a w/a forces a full recalculation, best you can do is put > > > > the > > > > per > > > > pipe status in crtc_state->wm.skl as well. If it changes you > > > > could > > > > add > > > > all other pipes. > > > I really don't want to add all pipes in "state" until that is > > > necessary > > > otherwise it'll make our commit huge, that's why getting the > > > cached > > > state. > > > Same goes for plane. Later if w/a changes then I'm adding all > > > the > > > CRTC's in relloc_pipes mask. > > > > > > if (intel_state->wm_results.mem_wa != dev_priv- > > > > > > > > wm.skl_hw.mem_wa) > > > If I hold the crtc->mutex or plane->mutex lock, It may cause > > > deadlock > > > with other commits. > > > I can keep a cached copy of plane memory bandwidth requirement in > > > "dev_priv->wm.skl_hw", but again if there are two parallel > > > commits > > > it > > > may cause race. > > > > > > Can you please suggest what is the correct way of getting > > > plane/crtc > > > state during check phase without have to actually commit them > > > (add > > > in > > > global-state), > > > If any such thing is available in atomic framework? > > How often will the workaround status change in practice? > In Linux use-case status changes very rarely, If we connect multiple > high resolution display & our DRAM's bandwidth is less we may have > to > enable the WA. > & it'll remain there until any display in unplugged OR any modeset > with > lower resolution. > On the other hand, In OS with multi-plane enabled, WA may change if > any > plane is enabled/disabled (based on total bandwidth requirement for > display) > > > > You could do same as active_crtcs, protect changing the workaround > > current crtc's workaround mode in dev_priv->wm.skl.wa_state[pipe] > > with > > connection_mutex. If this means changing that the global workaround > > setting is changed, then also acquire all other crtc states. > This is not per-CRTC WA, this is complete display-system level WA. & > depends upon total memory requirement of display. > As you suggested I can keep the status(bandwidth requirement) for > each > pipe in dev_priv->wm.skl.wa_state/bandwidth[pipe], and access it > with > some mutex. > Is connection mutex is right mutex to hold, what about wm_mutex? It's nasty to do so, but it would work. connection_mutex is a ww_mutex. But I don't think this situation will happen often, so if the w/a for current crtc changes, won't it be better to just grab all states anyway? I don't think the optimization is worth the complexity it introduces you run the numbers in normal use. ~Maarten --------------------------------------------------------------------- Intel International B.V. Registered in The Netherlands under number 34098535 Statutory seat: Haarlemmermeer Registered address: Capronilaan 37, 1119NG Schiphol-Rijk This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4e2f17f..2b673c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1193,6 +1193,13 @@ enum intel_sbi_destination { SBI_MPHY, }; +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ +enum watermark_memory_wa { + WATERMARK_WA_NONE, + WATERMARK_WA_X_TILED, + WATERMARK_WA_Y_TILED, +}; + #define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) @@ -1764,6 +1771,8 @@ struct skl_ddb_allocation { struct skl_wm_values { unsigned dirty_pipes; + /* any WaterMark memory workaround Required */ + enum watermark_memory_wa mem_wa; struct skl_ddb_allocation ddb; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3e2dd8f..547bbda 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2889,11 +2889,7 @@ skl_wm_plane_id(const struct intel_plane *plane) } } -/* - * FIXME: We still don't have the proper code detect if we need to apply the WA, - * so assume we'll always need it in order to avoid underruns. - */ -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +static bool intel_needs_memory_bw_wa(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -3067,7 +3063,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency = dev_priv->wm.skl_latency[level]; - if (skl_needs_memory_bw_wa(intel_state) && + if (intel_needs_memory_bw_wa(intel_state) && plane->base.state->fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3597,7 +3593,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t y_min_scanlines; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + enum watermark_memory_wa mem_wa; bool y_tiled, x_tiled; if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { @@ -3613,7 +3609,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) latency += 4; - if (apply_memory_bw_wa && x_tiled) + mem_wa = state->wm_results.mem_wa; + if (mem_wa != WATERMARK_WA_NONE && x_tiled) latency += 15; width = drm_rect_width(&intel_pstate->base.src) >> 16; @@ -3648,7 +3645,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, y_min_scanlines = 4; } - if (apply_memory_bw_wa) + if (mem_wa == WATERMARK_WA_Y_TILED) y_min_scanlines *= 2; plane_bytes_per_line = width * cpp; @@ -4077,6 +4074,15 @@ skl_compute_ddb(struct drm_atomic_state *state) } /* + * If Watermark workaround is changed we need to recalculate + * watermark values for all active pipes + */ + if (intel_state->wm_results.mem_wa != dev_priv->wm.skl_hw.mem_wa) { + realloc_pipes = ~0; + intel_state->wm_results.dirty_pipes = ~0; + } + + /* * We're not recomputing for the pipes not included in the commit, so * make sure we start with the current state. */ @@ -4102,6 +4108,129 @@ skl_compute_ddb(struct drm_atomic_state *state) } static void +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *intel_crtc; + struct intel_plane_state *intel_pstate; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct memdev_info *memdev_info = &dev_priv->memdev_info; + int num_active_pipes; + uint32_t max_pipe_bw_kbps, total_pipe_bw_kbps; + int display_bw_percentage; + bool y_tile_enabled = false; + + if (!intel_needs_memory_bw_wa(intel_state)) { + intel_state->wm_results.mem_wa = WATERMARK_WA_NONE; + return; + } + + if (!memdev_info->valid) + goto exit; + + max_pipe_bw_kbps = 0; + num_active_pipes = 0; + for_each_intel_crtc(dev, intel_crtc) { + struct intel_crtc_state *cstate; + struct intel_plane *plane; + int num_active_planes; + uint32_t max_plane_bw_kbps, pipe_bw_kbps; + + /* + * If CRTC is part of current atomic commit, get crtc state from + * existing CRTC state. else take the cached CRTC state + */ + cstate = NULL; + if (state) + cstate = intel_atomic_get_existing_crtc_state(state, + intel_crtc); + if (!cstate) + cstate = to_intel_crtc_state(intel_crtc->base.state); + + if (!cstate->base.active) + continue; + + num_active_planes = 0; + max_plane_bw_kbps = 0; + for_each_intel_plane_mask(dev, plane, cstate->base.plane_mask) { + struct drm_framebuffer *fb; + uint32_t plane_bw_kbps; + uint32_t id = skl_wm_plane_id(plane); + + intel_pstate = NULL; + intel_pstate = + intel_atomic_get_existing_plane_state(state, + plane); + if (!intel_pstate) + intel_pstate = + to_intel_plane_state(plane->base.state); + + if (!intel_pstate->base.visible) + continue; + + if (WARN_ON(!intel_pstate->base.fb)) + continue; + + if (id == PLANE_CURSOR) + continue; + + fb = intel_pstate->base.fb; + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED || + fb->modifier == I915_FORMAT_MOD_Yf_TILED)) + y_tile_enabled = true; + + plane_bw_kbps = skl_adjusted_plane_pixel_rate(cstate, + intel_pstate); + max_plane_bw_kbps = max(plane_bw_kbps, + max_plane_bw_kbps); + num_active_planes++; + } + pipe_bw_kbps = max_plane_bw_kbps * num_active_planes; + max_pipe_bw_kbps = max(pipe_bw_kbps, max_pipe_bw_kbps); + num_active_pipes++; + } + + total_pipe_bw_kbps = max_pipe_bw_kbps * num_active_pipes; + + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps * 100, + memdev_info->bandwidth_kbps); + + /* + * If there is any Ytile plane enabled and arbitrated display + * bandwidth > 20% of raw system memory bandwidth + * Enale Y-tile related WA + * + * If memory is dual channel single rank, Xtile limit = 35%, else Xtile + * limit = 60% + * If there is no Ytile plane enabled and + * arbitrated display bandwidth > Xtile limit + * Enable X-tile realated WA + */ + if (y_tile_enabled && (display_bw_percentage > 20)) + intel_state->wm_results.mem_wa = WATERMARK_WA_Y_TILED; + else { + int x_tile_percentage = 60; + enum rank rank = DRAM_RANK_SINGLE; + + if (memdev_info->rank != DRAM_RANK_INVALID) + rank = memdev_info->rank; + + if ((rank == DRAM_RANK_SINGLE) && + (memdev_info->num_channels == 2)) + x_tile_percentage = 35; + + if (display_bw_percentage > x_tile_percentage) + intel_state->wm_results.mem_wa = WATERMARK_WA_X_TILED; + } + return; + +exit: + intel_state->wm_results.mem_wa = WATERMARK_WA_Y_TILED; +} + + +static void skl_copy_wm_for_pipe(struct skl_wm_values *dst, struct skl_wm_values *src, enum pipe pipe) @@ -4177,6 +4306,8 @@ skl_compute_wm(struct drm_atomic_state *state) /* Clear all dirty flags */ results->dirty_pipes = 0; + skl_compute_memory_bandwidth_wm_wa(state); + ret = skl_compute_ddb(state); if (ret) return ret;
This patch implemnets Workariunds related to display arbitrated memory bandwidth. These WA are applicabe for all gen-9 based platforms. Changes since v1: - Rebase on top of Paulo's patch series Changes since v2: - Address review comments - Rebase/rework as per other patch changes in series Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 9 +++ drivers/gpu/drm/i915/intel_pm.c | 149 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 149 insertions(+), 9 deletions(-)