Message ID | 20161201154940.24446-9-mahesh1.kumar@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Em Qui, 2016-12-01 às 21:19 +0530, Mahesh Kumar escreveu: > This patch implemnets Workariunds related to display arbitrated > memory > bandwidth. These WA are applicabe for all gen-9 based platforms. 3 typos above. The WA is already implemented. What the patch does is that it opts-out of the WA in case we conclude it's safe. Please say this in the commit message. > > Changes since v1: > - Rebase on top of Paulo's patch series > Changes since v2: > - Address review comments > - Rebase/rework as per other patch changes in series > Changes since v3: > - Rework based on Maarten's comments > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 11 +++ > drivers/gpu/drm/i915/intel_display.c | 24 ++++++ > drivers/gpu/drm/i915/intel_pm.c | 155 > +++++++++++++++++++++++++++++++++-- > 3 files changed, 181 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > index 69213a4..3126259 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1216,6 +1216,13 @@ enum intel_sbi_destination { > SBI_MPHY, > }; > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ > +enum watermark_memory_wa { > + WATERMARK_WA_NONE, > + WATERMARK_WA_X_TILED, > + WATERMARK_WA_Y_TILED, > +}; > + > #define QUIRK_PIPEA_FORCE (1<<0) > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation { > > struct skl_wm_values { > unsigned dirty_pipes; > + /* any WaterMark memory workaround Required */ CapitaliZation is weird Here. Besides, no need for the comment. > + enum watermark_memory_wa mem_wa; > + uint32_t pipe_bw_kbps[I915_MAX_PIPES]; > + bool pipe_ytiled[I915_MAX_PIPES]; > struct skl_ddb_allocation ddb; > }; > > diff --git a/drivers/gpu/drm/i915/intel_display.c > b/drivers/gpu/drm/i915/intel_display.c > index 5d11002..f8da488 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct > drm_atomic_state *state) > to_intel_plane(plane)- > >frontbuffer_bit); > } > > +static void > +intel_update_wm_bw_wa(struct drm_atomic_state *state) > +{ > + struct intel_atomic_state *intel_state = > to_intel_atomic_state(state); > + struct drm_i915_private *dev_priv = to_i915(state->dev); > + const struct drm_crtc *crtc; > + const struct drm_crtc_state *cstate; > + struct skl_wm_values *results = &intel_state->wm_results; > + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; > + int i; > + > + if (!IS_GEN9(dev_priv)) > + return; > + > + for_each_crtc_in_state(state, crtc, cstate, i) { > + hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i]; > + hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i]; > + } > + > + hw_vals->mem_wa = results->mem_wa; > +} I think we can just patch skl_copy_wm_for_pipe() to also copy the fields we need instead of adding this function. Whouldn't that be much simpler? Anyway, this one looks correct, so no need to change if you don't want. > + > /** > * intel_atomic_commit - commit validated state object > * @dev: DRM device > @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct > drm_device *dev, > intel_shared_dpll_commit(state); > intel_atomic_track_fbs(state); > > + intel_update_wm_bw_wa(state); > + > if (intel_state->modeset) { > memcpy(dev_priv->min_pixclk, intel_state- > >min_pixclk, > sizeof(intel_state->min_pixclk)); > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index cc8fc84..fda6e1e 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev) > > #define SKL_SAGV_BLOCK_TIME 30 /* µs */ > > -/* > - * FIXME: We still don't have the proper code detect if we need to > apply the WA, > - * so assume we'll always need it in order to avoid underruns. > - */ > -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) > +static bool intel_needs_memory_bw_wa(struct intel_atomic_state > *state) Why are we renaming this function? Also, in my last review I suggested that we kill this function. I don't think it makes sense to have this function anymore. Besides, function intel_can_enable_sagv() needs to look at the value we assigned to enum watermark_memory_wa, not get a true/false based on platform version. > { > struct drm_i915_private *dev_priv = to_i915(state- > >base.dev); > > @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct > drm_atomic_state *state) > > latency = dev_priv->wm.skl_latency[level]; > > - if (skl_needs_memory_bw_wa(intel_state) && > + if (intel_needs_memory_bw_wa(intel_state) && > plane->base.state->fb->modifier == > I915_FORMAT_MOD_X_TILED) > latency += 15; > @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > uint32_t y_min_scanlines; > struct intel_atomic_state *state = > to_intel_atomic_state(cstate->base.state); > - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); > + enum watermark_memory_wa mem_wa; > bool y_tiled, x_tiled; > > if (latency == 0 || !cstate->base.active || !intel_pstate- > >base.visible) { > @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) > latency += 4; > > - if (apply_memory_bw_wa && x_tiled) > + mem_wa = state->wm_results.mem_wa; This assignment could have been done during declaration. > + if (mem_wa != WATERMARK_WA_NONE && x_tiled) > latency += 15; > > width = drm_rect_width(&intel_pstate->base.src) >> 16; > @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > y_min_scanlines = 4; > } > > - if (apply_memory_bw_wa) > + if (mem_wa == WATERMARK_WA_Y_TILED) > y_min_scanlines *= 2; > > plane_bytes_per_line = width * cpp; > @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state > *state) > } > > /* > + * If Watermark workaround is changed we need to recalculate > + * watermark values for all active pipes > + */ > + if (intel_state->wm_results.mem_wa != dev_priv- > >wm.skl_hw.mem_wa) { > + realloc_pipes = ~0; > + intel_state->wm_results.dirty_pipes = ~0; > + } But then skl_ddb_add_affected_planes() only actually adds to the commit the planes that have different DDB partitioning. It seems to me that it may be possible to have a case where the WA changes and the DDB stays the same, so we need to find a way to include every CRTC there. Maybe we could just go and call the function that adds every CRTC here. > + > + /* > * We're not recomputing for the pipes not included in the > commit, so > * make sure we start with the current state. > */ > @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state > *state) > return 0; > } > > +static int > +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) > +{ > + struct drm_device *dev = state->dev; > + struct drm_i915_private *dev_priv = to_i915(dev); > + struct intel_atomic_state *intel_state = > to_intel_atomic_state(state); > + struct memdev_info *memdev_info = &dev_priv->memdev_info; > + struct skl_wm_values *results = &intel_state->wm_results; > + struct drm_crtc *crtc; > + struct drm_crtc_state *cstate; > + int active_pipes = 0; > + uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps; > + int display_bw_percentage; > + bool y_tile_enabled = false; > + int ret, i; > + > + if (!intel_needs_memory_bw_wa(intel_state)) { > + results->mem_wa = WATERMARK_WA_NONE; > + return 0; > + } > + > + if (!memdev_info->valid) > + goto exit; There's no reason to use a label if it's only called here. Just move all that code to the if statement. > + > + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, > + state->acquire_ctx); > + if (ret) > + return ret; Why are we getting this here? Why this lock specifically? What's it protecting? Doc says: - ww mutex protecting connector state and routing - protects connector->encoder and encoder->crtc links We're not touching any of that here. > + > + memcpy(results->pipe_bw_kbps, dev_priv- > >wm.skl_hw.pipe_bw_kbps, > + sizeof(results->pipe_bw_kbps)); > + memcpy(results->pipe_ytiled, dev_priv- > >wm.skl_hw.pipe_ytiled, > + sizeof(results->pipe_ytiled)); > + > + for_each_crtc_in_state(state, crtc, cstate, i) { > + struct drm_plane *plane; > + const struct drm_plane_state *pstate; > + int active_planes = 0; > + uint32_t max_plane_bw_kbps = 0; > + > + results->pipe_ytiled[i] = false; > + > + if (!cstate->active) { > + results->pipe_bw_kbps[i] = 0; > + continue; > + } > + > + drm_atomic_crtc_state_for_each_plane_state(plane, > pstate, > + csta > te) { > + struct drm_framebuffer *fb; > + uint32_t plane_bw_kbps; > + enum plane_id id = to_intel_plane(plane)- > >id; > + > + if (!pstate->visible) > + continue; > + > + if (WARN_ON(!pstate->fb)) > + continue; > + > + if (id == PLANE_CURSOR) > + continue; > + > + fb = pstate->fb; > + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED > || > + fb->modifier == > I915_FORMAT_MOD_Yf_TILED)) > + results->pipe_ytiled[i] = true; > + > + plane_bw_kbps = > skl_adjusted_plane_pixel_rate( > + to_intel_crtc_state( > cstate), > + to_intel_plane_state > (pstate)); > + max_plane_bw_kbps = max(plane_bw_kbps, > + max_plane_bw > _kbps); > + active_planes++; > + } > + results->pipe_bw_kbps[i] = max_plane_bw_kbps * > active_planes; > + } > + > + for_each_pipe(dev_priv, i) { > + if (results->pipe_bw_kbps[i]) { > + max_pipe_bw_kbps = max(max_pipe_bw_kbps, > + results->pipe_bw_kbps[i]); > + active_pipes++; > + } > + if (results->pipe_ytiled[i]) > + y_tile_enabled = true; > + } > + > + total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes; > + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps > * 100, > + memdev_info- > >bandwidth_kbps); Shouldn't this be just DIV_ROUND_UP()? I don't see 64 bit variables here, nor the possibility of overflows. > + > + /* > + * If there is any Ytile plane enabled and arbitrated > display > + * bandwidth > 20% of raw system memory bandwidth > + * Enale Y-tile related WA > + * > + * If memory is dual channel single rank, Xtile limit = 35%, > else Xtile > + * limit = 60% > + * If there is no Ytile plane enabled and > + * arbitrated display bandwidth > Xtile limit > + * Enable X-tile realated WA > + */ > + if (y_tile_enabled && (display_bw_percentage > 20)) > + results->mem_wa = WATERMARK_WA_Y_TILED; > + else { > + int x_tile_percentage = 60; > + enum rank rank = DRAM_RANK_SINGLE; > + > + if (memdev_info->rank != DRAM_RANK_INVALID) > + rank = memdev_info->rank; I really think that the previous patch should prevent this. If memdev_info->rank is invalid then memdev_info->valid should also be false and we'd have returned at the beginning of the function. With that, this part of the code could be simplified a little bit since then we'd have no need to choose a default rank. > + > + if ((rank == DRAM_RANK_SINGLE) && > + (memdev_info->num_channels > == 2)) > + x_tile_percentage = 35; > + > + if (display_bw_percentage > x_tile_percentage) > + results->mem_wa = WATERMARK_WA_X_TILED; > + } > + > + return 0; > + > +exit: > + results->mem_wa = WATERMARK_WA_Y_TILED; > + return 0; > +} > + > + Two blank lines here. > static void > skl_copy_wm_for_pipe(struct skl_wm_values *dst, > struct skl_wm_values *src, > @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state) > /* Clear all dirty flags */ > results->dirty_pipes = 0; > > + ret = skl_compute_memory_bandwidth_wm_wa(state); > + if (ret) > + return ret; > + > ret = skl_compute_ddb(state); > if (ret) > return ret;
Hi, On Thursday 15 December 2016 10:37 PM, Paulo Zanoni wrote: > Em Qui, 2016-12-01 às 21:19 +0530, Mahesh Kumar escreveu: >> This patch implemnets Workariunds related to display arbitrated >> memory >> bandwidth. These WA are applicabe for all gen-9 based platforms. > 3 typos above. > > The WA is already implemented. What the patch does is that it opts-out > of the WA in case we conclude it's safe. Please say this in the commit > message. updated the commit msg >> Changes since v1: >> - Rebase on top of Paulo's patch series >> Changes since v2: >> - Address review comments >> - Rebase/rework as per other patch changes in series >> Changes since v3: >> - Rework based on Maarten's comments >> >> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 11 +++ >> drivers/gpu/drm/i915/intel_display.c | 24 ++++++ >> drivers/gpu/drm/i915/intel_pm.c | 155 >> +++++++++++++++++++++++++++++++++-- >> 3 files changed, 181 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index 69213a4..3126259 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1216,6 +1216,13 @@ enum intel_sbi_destination { >> SBI_MPHY, >> }; >> >> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ >> +enum watermark_memory_wa { >> + WATERMARK_WA_NONE, >> + WATERMARK_WA_X_TILED, >> + WATERMARK_WA_Y_TILED, >> +}; >> + >> #define QUIRK_PIPEA_FORCE (1<<0) >> #define QUIRK_LVDS_SSC_DISABLE (1<<1) >> #define QUIRK_INVERT_BRIGHTNESS (1<<2) >> @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation { >> >> struct skl_wm_values { >> unsigned dirty_pipes; >> + /* any WaterMark memory workaround Required */ > CapitaliZation is weird Here. Besides, no need for the comment. > > >> + enum watermark_memory_wa mem_wa; >> + uint32_t pipe_bw_kbps[I915_MAX_PIPES]; >> + bool pipe_ytiled[I915_MAX_PIPES]; >> struct skl_ddb_allocation ddb; >> }; >> >> diff --git a/drivers/gpu/drm/i915/intel_display.c >> b/drivers/gpu/drm/i915/intel_display.c >> index 5d11002..f8da488 100644 >> --- a/drivers/gpu/drm/i915/intel_display.c >> +++ b/drivers/gpu/drm/i915/intel_display.c >> @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct >> drm_atomic_state *state) >> to_intel_plane(plane)- >>> frontbuffer_bit); >> } >> >> +static void >> +intel_update_wm_bw_wa(struct drm_atomic_state *state) >> +{ >> + struct intel_atomic_state *intel_state = >> to_intel_atomic_state(state); >> + struct drm_i915_private *dev_priv = to_i915(state->dev); >> + const struct drm_crtc *crtc; >> + const struct drm_crtc_state *cstate; >> + struct skl_wm_values *results = &intel_state->wm_results; >> + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; >> + int i; >> + >> + if (!IS_GEN9(dev_priv)) >> + return; >> + >> + for_each_crtc_in_state(state, crtc, cstate, i) { >> + hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i]; >> + hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i]; >> + } >> + >> + hw_vals->mem_wa = results->mem_wa; >> +} > I think we can just patch skl_copy_wm_for_pipe() to also copy the > fields we need instead of adding this function. Whouldn't that be much > simpler? Anyway, this one looks correct, so no need to change if you > don't want. > > >> + >> /** >> * intel_atomic_commit - commit validated state object >> * @dev: DRM device >> @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct >> drm_device *dev, >> intel_shared_dpll_commit(state); >> intel_atomic_track_fbs(state); >> >> + intel_update_wm_bw_wa(state); >> + >> if (intel_state->modeset) { >> memcpy(dev_priv->min_pixclk, intel_state- >>> min_pixclk, >> sizeof(intel_state->min_pixclk)); >> diff --git a/drivers/gpu/drm/i915/intel_pm.c >> b/drivers/gpu/drm/i915/intel_pm.c >> index cc8fc84..fda6e1e 100644 >> --- a/drivers/gpu/drm/i915/intel_pm.c >> +++ b/drivers/gpu/drm/i915/intel_pm.c >> @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev) >> >> #define SKL_SAGV_BLOCK_TIME 30 /* µs */ >> >> -/* >> - * FIXME: We still don't have the proper code detect if we need to >> apply the WA, >> - * so assume we'll always need it in order to avoid underruns. >> - */ >> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) >> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state >> *state) > Why are we renaming this function? > > Also, in my last review I suggested that we kill this function. I don't > think it makes sense to have this function anymore. > > Besides, function intel_can_enable_sagv() needs to look at the value we > assigned to enum watermark_memory_wa, not get a true/false based on > platform version. removed the function > > >> { >> struct drm_i915_private *dev_priv = to_i915(state- >>> base.dev); >> >> @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct >> drm_atomic_state *state) >> >> latency = dev_priv->wm.skl_latency[level]; >> >> - if (skl_needs_memory_bw_wa(intel_state) && >> + if (intel_needs_memory_bw_wa(intel_state) && >> plane->base.state->fb->modifier == >> I915_FORMAT_MOD_X_TILED) >> latency += 15; >> @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct >> drm_i915_private *dev_priv, >> uint32_t y_min_scanlines; >> struct intel_atomic_state *state = >> to_intel_atomic_state(cstate->base.state); >> - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); >> + enum watermark_memory_wa mem_wa; >> bool y_tiled, x_tiled; >> >> if (latency == 0 || !cstate->base.active || !intel_pstate- >>> base.visible) { >> @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct >> drm_i915_private *dev_priv, >> if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) >> latency += 4; >> >> - if (apply_memory_bw_wa && x_tiled) >> + mem_wa = state->wm_results.mem_wa; > This assignment could have been done during declaration. done > > >> + if (mem_wa != WATERMARK_WA_NONE && x_tiled) >> latency += 15; >> >> width = drm_rect_width(&intel_pstate->base.src) >> 16; >> @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct >> drm_i915_private *dev_priv, >> y_min_scanlines = 4; >> } >> >> - if (apply_memory_bw_wa) >> + if (mem_wa == WATERMARK_WA_Y_TILED) >> y_min_scanlines *= 2; >> >> plane_bytes_per_line = width * cpp; >> @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state >> *state) >> } >> >> /* >> + * If Watermark workaround is changed we need to recalculate >> + * watermark values for all active pipes >> + */ >> + if (intel_state->wm_results.mem_wa != dev_priv- >>> wm.skl_hw.mem_wa) { >> + realloc_pipes = ~0; >> + intel_state->wm_results.dirty_pipes = ~0; >> + } > But then skl_ddb_add_affected_planes() only actually adds to the commit > the planes that have different DDB partitioning. It seems to me that it > may be possible to have a case where the WA changes and the DDB stays > the same, so we need to find a way to include every CRTC there. Maybe > we could just go and call the function that adds every CRTC here. modified this part of code to calculate WM for all pipes if WA is changing & add pipe only if WM values are changing. We are holding a ww_mutex, so this operation should be race condition free. > >> + >> + /* >> * We're not recomputing for the pipes not included in the >> commit, so >> * make sure we start with the current state. >> */ >> @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state >> *state) >> return 0; >> } >> >> +static int >> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) >> +{ >> + struct drm_device *dev = state->dev; >> + struct drm_i915_private *dev_priv = to_i915(dev); >> + struct intel_atomic_state *intel_state = >> to_intel_atomic_state(state); >> + struct memdev_info *memdev_info = &dev_priv->memdev_info; >> + struct skl_wm_values *results = &intel_state->wm_results; >> + struct drm_crtc *crtc; >> + struct drm_crtc_state *cstate; >> + int active_pipes = 0; >> + uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps; >> + int display_bw_percentage; >> + bool y_tile_enabled = false; >> + int ret, i; >> + >> + if (!intel_needs_memory_bw_wa(intel_state)) { >> + results->mem_wa = WATERMARK_WA_NONE; >> + return 0; >> + } >> + >> + if (!memdev_info->valid) >> + goto exit; > There's no reason to use a label if it's only called here. Just move > all that code to the if statement. done > > >> + >> + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, >> + state->acquire_ctx); >> + if (ret) >> + return ret; > Why are we getting this here? Why this lock specifically? What's it > protecting? > > Doc says: > - ww mutex protecting connector state and routing > - protects connector->encoder and encoder->crtc links > > We're not touching any of that here. Created a new ww_mutex lock to prevent wm related calculation, when it may affect other CRTC's also. using that now instead of connection_mutex. > > >> + >> + memcpy(results->pipe_bw_kbps, dev_priv- >>> wm.skl_hw.pipe_bw_kbps, >> + sizeof(results->pipe_bw_kbps)); >> + memcpy(results->pipe_ytiled, dev_priv- >>> wm.skl_hw.pipe_ytiled, >> + sizeof(results->pipe_ytiled)); >> + >> + for_each_crtc_in_state(state, crtc, cstate, i) { >> + struct drm_plane *plane; >> + const struct drm_plane_state *pstate; >> + int active_planes = 0; >> + uint32_t max_plane_bw_kbps = 0; >> + >> + results->pipe_ytiled[i] = false; >> + >> + if (!cstate->active) { >> + results->pipe_bw_kbps[i] = 0; >> + continue; >> + } >> + >> + drm_atomic_crtc_state_for_each_plane_state(plane, >> pstate, >> + csta >> te) { >> + struct drm_framebuffer *fb; >> + uint32_t plane_bw_kbps; >> + enum plane_id id = to_intel_plane(plane)- >>> id; >> + >> + if (!pstate->visible) >> + continue; >> + >> + if (WARN_ON(!pstate->fb)) >> + continue; >> + >> + if (id == PLANE_CURSOR) >> + continue; >> + >> + fb = pstate->fb; >> + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED >> || >> + fb->modifier == >> I915_FORMAT_MOD_Yf_TILED)) >> + results->pipe_ytiled[i] = true; >> + >> + plane_bw_kbps = >> skl_adjusted_plane_pixel_rate( >> + to_intel_crtc_state( >> cstate), >> + to_intel_plane_state >> (pstate)); >> + max_plane_bw_kbps = max(plane_bw_kbps, >> + max_plane_bw >> _kbps); >> + active_planes++; >> + } >> + results->pipe_bw_kbps[i] = max_plane_bw_kbps * >> active_planes; >> + } >> + >> + for_each_pipe(dev_priv, i) { >> + if (results->pipe_bw_kbps[i]) { >> + max_pipe_bw_kbps = max(max_pipe_bw_kbps, >> + results->pipe_bw_kbps[i]); >> + active_pipes++; >> + } >> + if (results->pipe_ytiled[i]) >> + y_tile_enabled = true; >> + } >> + >> + total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes; >> + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps >> * 100, >> + memdev_info- >>> bandwidth_kbps); > Shouldn't this be just DIV_ROUND_UP()? I don't see 64 bit variables > here, nor the possibility of overflows. If we have 3 4K pannels & each have 3 planes enabled with scaling then this value theoretically may go beyond 32bits, type-casted the variable now to 64bit. 536MHz * 3 planes * 3 pipes * pipe_scaler_ratio * plane_scaler_ratio * 100 = 536000 * 3 * 3 * (1.9 *1.9 (hscale & vscale)) * (1.9 * 1.9 (pipe hscale & vscale)) * 100 = 6286685040 = 0x1 76B7 3370 > > >> + >> + /* >> + * If there is any Ytile plane enabled and arbitrated >> display >> + * bandwidth > 20% of raw system memory bandwidth >> + * Enale Y-tile related WA >> + * >> + * If memory is dual channel single rank, Xtile limit = 35%, >> else Xtile >> + * limit = 60% >> + * If there is no Ytile plane enabled and >> + * arbitrated display bandwidth > Xtile limit >> + * Enable X-tile realated WA >> + */ >> + if (y_tile_enabled && (display_bw_percentage > 20)) >> + results->mem_wa = WATERMARK_WA_Y_TILED; >> + else { >> + int x_tile_percentage = 60; >> + enum rank rank = DRAM_RANK_SINGLE; >> + >> + if (memdev_info->rank != DRAM_RANK_INVALID) >> + rank = memdev_info->rank; > I really think that the previous patch should prevent this. If > memdev_info->rank is invalid then memdev_info->valid should also be > false and we'd have returned at the beginning of the function. With > that, this part of the code could be simplified a little bit since then > we'd have no need to choose a default rank. > done Regards, -Mahesh > >> + >> + if ((rank == DRAM_RANK_SINGLE) && >> + (memdev_info->num_channels >> == 2)) >> + x_tile_percentage = 35; >> + >> + if (display_bw_percentage > x_tile_percentage) >> + results->mem_wa = WATERMARK_WA_X_TILED; >> + } >> + >> + return 0; >> + >> +exit: >> + results->mem_wa = WATERMARK_WA_Y_TILED; >> + return 0; >> +} >> + >> + > Two blank lines here. > > >> static void >> skl_copy_wm_for_pipe(struct skl_wm_values *dst, >> struct skl_wm_values *src, >> @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state) >> /* Clear all dirty flags */ >> results->dirty_pipes = 0; >> >> + ret = skl_compute_memory_bandwidth_wm_wa(state); >> + if (ret) >> + return ret; >> + >> ret = skl_compute_ddb(state); >> if (ret) >> return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 69213a4..3126259 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1216,6 +1216,13 @@ enum intel_sbi_destination { SBI_MPHY, }; +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ +enum watermark_memory_wa { + WATERMARK_WA_NONE, + WATERMARK_WA_X_TILED, + WATERMARK_WA_Y_TILED, +}; + #define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation { struct skl_wm_values { unsigned dirty_pipes; + /* any WaterMark memory workaround Required */ + enum watermark_memory_wa mem_wa; + uint32_t pipe_bw_kbps[I915_MAX_PIPES]; + bool pipe_ytiled[I915_MAX_PIPES]; struct skl_ddb_allocation ddb; }; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5d11002..f8da488 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state) to_intel_plane(plane)->frontbuffer_bit); } +static void +intel_update_wm_bw_wa(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + const struct drm_crtc *crtc; + const struct drm_crtc_state *cstate; + struct skl_wm_values *results = &intel_state->wm_results; + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; + int i; + + if (!IS_GEN9(dev_priv)) + return; + + for_each_crtc_in_state(state, crtc, cstate, i) { + hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i]; + hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i]; + } + + hw_vals->mem_wa = results->mem_wa; +} + /** * intel_atomic_commit - commit validated state object * @dev: DRM device @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct drm_device *dev, intel_shared_dpll_commit(state); intel_atomic_track_fbs(state); + intel_update_wm_bw_wa(state); + if (intel_state->modeset) { memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, sizeof(intel_state->min_pixclk)); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cc8fc84..fda6e1e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev) #define SKL_SAGV_BLOCK_TIME 30 /* µs */ -/* - * FIXME: We still don't have the proper code detect if we need to apply the WA, - * so assume we'll always need it in order to avoid underruns. - */ -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) +static bool intel_needs_memory_bw_wa(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency = dev_priv->wm.skl_latency[level]; - if (skl_needs_memory_bw_wa(intel_state) && + if (intel_needs_memory_bw_wa(intel_state) && plane->base.state->fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t y_min_scanlines; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + enum watermark_memory_wa mem_wa; bool y_tiled, x_tiled; if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) latency += 4; - if (apply_memory_bw_wa && x_tiled) + mem_wa = state->wm_results.mem_wa; + if (mem_wa != WATERMARK_WA_NONE && x_tiled) latency += 15; width = drm_rect_width(&intel_pstate->base.src) >> 16; @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, y_min_scanlines = 4; } - if (apply_memory_bw_wa) + if (mem_wa == WATERMARK_WA_Y_TILED) y_min_scanlines *= 2; plane_bytes_per_line = width * cpp; @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state *state) } /* + * If Watermark workaround is changed we need to recalculate + * watermark values for all active pipes + */ + if (intel_state->wm_results.mem_wa != dev_priv->wm.skl_hw.mem_wa) { + realloc_pipes = ~0; + intel_state->wm_results.dirty_pipes = ~0; + } + + /* * We're not recomputing for the pipes not included in the commit, so * make sure we start with the current state. */ @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state *state) return 0; } +static int +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct memdev_info *memdev_info = &dev_priv->memdev_info; + struct skl_wm_values *results = &intel_state->wm_results; + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + int active_pipes = 0; + uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps; + int display_bw_percentage; + bool y_tile_enabled = false; + int ret, i; + + if (!intel_needs_memory_bw_wa(intel_state)) { + results->mem_wa = WATERMARK_WA_NONE; + return 0; + } + + if (!memdev_info->valid) + goto exit; + + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; + + memcpy(results->pipe_bw_kbps, dev_priv->wm.skl_hw.pipe_bw_kbps, + sizeof(results->pipe_bw_kbps)); + memcpy(results->pipe_ytiled, dev_priv->wm.skl_hw.pipe_ytiled, + sizeof(results->pipe_ytiled)); + + for_each_crtc_in_state(state, crtc, cstate, i) { + struct drm_plane *plane; + const struct drm_plane_state *pstate; + int active_planes = 0; + uint32_t max_plane_bw_kbps = 0; + + results->pipe_ytiled[i] = false; + + if (!cstate->active) { + results->pipe_bw_kbps[i] = 0; + continue; + } + + drm_atomic_crtc_state_for_each_plane_state(plane, pstate, + cstate) { + struct drm_framebuffer *fb; + uint32_t plane_bw_kbps; + enum plane_id id = to_intel_plane(plane)->id; + + if (!pstate->visible) + continue; + + if (WARN_ON(!pstate->fb)) + continue; + + if (id == PLANE_CURSOR) + continue; + + fb = pstate->fb; + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED || + fb->modifier == I915_FORMAT_MOD_Yf_TILED)) + results->pipe_ytiled[i] = true; + + plane_bw_kbps = skl_adjusted_plane_pixel_rate( + to_intel_crtc_state(cstate), + to_intel_plane_state(pstate)); + max_plane_bw_kbps = max(plane_bw_kbps, + max_plane_bw_kbps); + active_planes++; + } + results->pipe_bw_kbps[i] = max_plane_bw_kbps * active_planes; + } + + for_each_pipe(dev_priv, i) { + if (results->pipe_bw_kbps[i]) { + max_pipe_bw_kbps = max(max_pipe_bw_kbps, + results->pipe_bw_kbps[i]); + active_pipes++; + } + if (results->pipe_ytiled[i]) + y_tile_enabled = true; + } + + total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes; + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps * 100, + memdev_info->bandwidth_kbps); + + /* + * If there is any Ytile plane enabled and arbitrated display + * bandwidth > 20% of raw system memory bandwidth + * Enale Y-tile related WA + * + * If memory is dual channel single rank, Xtile limit = 35%, else Xtile + * limit = 60% + * If there is no Ytile plane enabled and + * arbitrated display bandwidth > Xtile limit + * Enable X-tile realated WA + */ + if (y_tile_enabled && (display_bw_percentage > 20)) + results->mem_wa = WATERMARK_WA_Y_TILED; + else { + int x_tile_percentage = 60; + enum rank rank = DRAM_RANK_SINGLE; + + if (memdev_info->rank != DRAM_RANK_INVALID) + rank = memdev_info->rank; + + if ((rank == DRAM_RANK_SINGLE) && + (memdev_info->num_channels == 2)) + x_tile_percentage = 35; + + if (display_bw_percentage > x_tile_percentage) + results->mem_wa = WATERMARK_WA_X_TILED; + } + + return 0; + +exit: + results->mem_wa = WATERMARK_WA_Y_TILED; + return 0; +} + + static void skl_copy_wm_for_pipe(struct skl_wm_values *dst, struct skl_wm_values *src, @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state) /* Clear all dirty flags */ results->dirty_pipes = 0; + ret = skl_compute_memory_bandwidth_wm_wa(state); + if (ret) + return ret; + ret = skl_compute_ddb(state); if (ret) return ret;
This patch implemnets Workariunds related to display arbitrated memory bandwidth. These WA are applicabe for all gen-9 based platforms. Changes since v1: - Rebase on top of Paulo's patch series Changes since v2: - Address review comments - Rebase/rework as per other patch changes in series Changes since v3: - Rework based on Maarten's comments Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 11 +++ drivers/gpu/drm/i915/intel_display.c | 24 ++++++ drivers/gpu/drm/i915/intel_pm.c | 155 +++++++++++++++++++++++++++++++++-- 3 files changed, 181 insertions(+), 9 deletions(-)