Message ID | 20211001161626.18954-1-radhakrishna.sripada@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v5] drm/i915: Update memory bandwidth formulae | expand |
On Fri, Oct 01, 2021 at 09:16:26AM -0700, Radhakrishna Sripada wrote: > The formulae has been updated to include more variables. Make > sure the code carries the same. > > Bspec: 64631 I notice .deburst should be set to 8 rather than 16 on RKL. > > v2: Make GEN11 follow the default route and fix calculation of > maxdebw(RK) > v3: Fix div by zero on default case > Correct indent for fallthrough(Jani) > v4: Fix div by zero on gen11. > v5: Fix 0 max_numchannels case > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > Suggested-by: Matt Roper <matthew.d.roper@intel.com> > Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com> > --- > drivers/gpu/drm/i915/display/intel_bw.c | 80 ++++++++++++++++++++----- > 1 file changed, 65 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c > index 4b94256d7319..a3c0a282aaad 100644 > --- a/drivers/gpu/drm/i915/display/intel_bw.c > +++ b/drivers/gpu/drm/i915/display/intel_bw.c > @@ -27,6 +27,9 @@ struct intel_qgv_info { > u8 num_points; > u8 num_psf_points; > u8 t_bl; > + u8 max_numchannels; > + u8 channel_width; > + u8 deinterleave; > }; > > static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, > @@ -133,7 +136,8 @@ int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, > } > > static int icl_get_qgv_points(struct drm_i915_private *dev_priv, > - struct intel_qgv_info *qi) > + struct intel_qgv_info *qi, > + bool is_y_tile) > { > const struct dram_info *dram_info = &dev_priv->dram_info; > int i, ret; > @@ -144,17 +148,41 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, > if (DISPLAY_VER(dev_priv) == 12) > switch (dram_info->type) { > case INTEL_DRAM_DDR4: > - qi->t_bl = 4; > + qi->t_bl = is_y_tile ? 8 : 4; > + qi->max_numchannels = 2; > + qi->channel_width = 64; > + qi->deinterleave = is_y_tile ? 1 : 2; > break; > case INTEL_DRAM_DDR5: > - qi->t_bl = 8; > + qi->t_bl = is_y_tile ? 16 : 8; > + qi->max_numchannels = 4; > + qi->channel_width = 32; > + qi->deinterleave = is_y_tile ? 1 : 2; > + break; > + case INTEL_DRAM_LPDDR4: > + if (IS_ROCKETLAKE(dev_priv)) { > + qi->t_bl = 8; > + qi->max_numchannels = 4; > + qi->channel_width = 32; > + qi->deinterleave = 2; > + break; > + } > + fallthrough; > + case INTEL_DRAM_LPDDR5: > + qi->t_bl = 16; > + qi->max_numchannels = 8; > + qi->channel_width = 16; > + qi->deinterleave = is_y_tile ? 2 : 4; > break; > default: > qi->t_bl = 16; > + qi->max_numchannels = 1; > break; > } > - else if (DISPLAY_VER(dev_priv) == 11) > + else if (DISPLAY_VER(dev_priv) == 11) { > qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8; > + qi->max_numchannels = 1; We don't have several of these new values for gen11, so that's going to cause problems in the calculations below. E.g., if we leave channel_width as 0, that makes peakbw 0, which makes maxdebw 0. So we either need to backcalculate the proper values that will make the version 12 algorithm give the same values as the version 11 algorithm, or we need to implement the two algorithms separately. Bspec 54023 vs bspec 64631 look a bit different, but I didn't inspect them carefully enough to see if 54023 just has some of these values implicitly baked into the calculations already. > + } > > if (drm_WARN_ON(&dev_priv->drm, > qi->num_points > ARRAY_SIZE(qi->points))) > @@ -263,37 +291,59 @@ static const struct intel_sa_info adlp_sa_info = { > static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) > { > struct intel_qgv_info qi = {}; > + const struct dram_info *dram_info = &dev_priv->dram_info; > bool is_y_tile = true; /* assume y tile may be used */ > int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); > - int deinterleave; > int ipqdepth, ipqdepthpch; > int dclk_max; > - int maxdebw; > + int maxdebw, peakbw; > + int clperchgroup; > + int num_groups = ARRAY_SIZE(dev_priv->max_bw); > int i, ret; > > - ret = icl_get_qgv_points(dev_priv, &qi); > + ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); > if (ret) { > drm_dbg_kms(&dev_priv->drm, > "Failed to get memory subsystem information, ignoring bandwidth limits"); > return ret; > } > > - deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); > - dclk_max = icl_sagv_max_dclk(&qi); > + if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5) > + num_channels *= 2; > + > + qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); > + > + if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12) > + qi.deinterleave = DIV_ROUND_UP(qi.deinterleave, 2); This becomes 0 for DDR4/DDR5 y-tile. The bspec has another MAX() operation here to ensure that we end up with a value of at least 1. > + > + if (qi.max_numchannels != 0) > + num_channels = min_t(u8, num_channels, qi.max_numchannels); I don't see this in the bspec, but it makes sense. Should we have some kind of debug message / WARN if the number of channels we think we have exceeds the maximum? > + > + dclk_max = icl_calc_bw(icl_sagv_max_dclk(&qi), 16, 1); dclk_max in the old code was the maximum clock rate (and corresponds to "clk_max" in the updated algorithm). But here we're modifying scaling it into different units (I'm not sure what they're supposed to represent). > > ipqdepthpch = 16; > > - maxdebw = min(sa->deprogbwlimit * 1000, > - icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */ > + peakbw = num_channels * DIV_ROUND_UP(qi.deinterleave, 8) * dclk_max; qi.deinterleave here is supposed to be channel_width isn't it? deinterleave/8 would be 0 for all possible values of deinterleave I think. > + maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */ > ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); > + /* > + * clperchgroup = 4kpagespermempage * clperchperblock, > + * clperchperblock = 8 / num_channels * interleave Based on the way this is written in the bspec, I wonder if they meant to have parentheses around (num_channels * deinterleave)? Probably worth confirming. > + */ > + clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave; > > - for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { > + for (i = 0; i < num_groups; i++) { > struct intel_bw_info *bi = &dev_priv->max_bw[i]; > int clpchgroup; > int j; > > - clpchgroup = (sa->deburst * deinterleave / num_channels) << i; > - bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; > + clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; > + > + if ((i < num_groups - 1 && clpchgroup < clperchgroup) || > + DISPLAY_VER(dev_priv) == 11) > + bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; > + else > + bi->num_planes = 0; I think these are off by one. num_planes[0] is supposed to be 0. The loop in the bspec is assigning values for numplanes[i + 1]. The loop you have here is going to leave numplanes[num_groups - 1] as 0 instead. This does differ from the old version of the algorithm. > > bi->num_qgv_points = qi.num_points; > bi->num_psf_gv_points = qi.num_psf_points; > @@ -339,7 +389,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel > * SAGV point, but we can't send PCode commands to restrict it > * as it will fail and pointless anyway. > */ > - if (qi.num_points == 1) > + if (qi.num_points >= 1) Was this change a mistake? This is going to prevent us from using SAGV in the situations where it should be used (i.e., where the BIOS properly tells us about multiple QGV points we can switch between). Matt > dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; > else > dev_priv->sagv_status = I915_SAGV_ENABLED; > -- > 2.20.1 >
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 4b94256d7319..a3c0a282aaad 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -27,6 +27,9 @@ struct intel_qgv_info { u8 num_points; u8 num_psf_points; u8 t_bl; + u8 max_numchannels; + u8 channel_width; + u8 deinterleave; }; static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, @@ -133,7 +136,8 @@ int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, } static int icl_get_qgv_points(struct drm_i915_private *dev_priv, - struct intel_qgv_info *qi) + struct intel_qgv_info *qi, + bool is_y_tile) { const struct dram_info *dram_info = &dev_priv->dram_info; int i, ret; @@ -144,17 +148,41 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) == 12) switch (dram_info->type) { case INTEL_DRAM_DDR4: - qi->t_bl = 4; + qi->t_bl = is_y_tile ? 8 : 4; + qi->max_numchannels = 2; + qi->channel_width = 64; + qi->deinterleave = is_y_tile ? 1 : 2; break; case INTEL_DRAM_DDR5: - qi->t_bl = 8; + qi->t_bl = is_y_tile ? 16 : 8; + qi->max_numchannels = 4; + qi->channel_width = 32; + qi->deinterleave = is_y_tile ? 1 : 2; + break; + case INTEL_DRAM_LPDDR4: + if (IS_ROCKETLAKE(dev_priv)) { + qi->t_bl = 8; + qi->max_numchannels = 4; + qi->channel_width = 32; + qi->deinterleave = 2; + break; + } + fallthrough; + case INTEL_DRAM_LPDDR5: + qi->t_bl = 16; + qi->max_numchannels = 8; + qi->channel_width = 16; + qi->deinterleave = is_y_tile ? 2 : 4; break; default: qi->t_bl = 16; + qi->max_numchannels = 1; break; } - else if (DISPLAY_VER(dev_priv) == 11) + else if (DISPLAY_VER(dev_priv) == 11) { qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8; + qi->max_numchannels = 1; + } if (drm_WARN_ON(&dev_priv->drm, qi->num_points > ARRAY_SIZE(qi->points))) @@ -263,37 +291,59 @@ static const struct intel_sa_info adlp_sa_info = { static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) { struct intel_qgv_info qi = {}; + const struct dram_info *dram_info = &dev_priv->dram_info; bool is_y_tile = true; /* assume y tile may be used */ int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); - int deinterleave; int ipqdepth, ipqdepthpch; int dclk_max; - int maxdebw; + int maxdebw, peakbw; + int clperchgroup; + int num_groups = ARRAY_SIZE(dev_priv->max_bw); int i, ret; - ret = icl_get_qgv_points(dev_priv, &qi); + ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); if (ret) { drm_dbg_kms(&dev_priv->drm, "Failed to get memory subsystem information, ignoring bandwidth limits"); return ret; } - deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); - dclk_max = icl_sagv_max_dclk(&qi); + if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5) + num_channels *= 2; + + qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); + + if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12) + qi.deinterleave = DIV_ROUND_UP(qi.deinterleave, 2); + + if (qi.max_numchannels != 0) + num_channels = min_t(u8, num_channels, qi.max_numchannels); + + dclk_max = icl_calc_bw(icl_sagv_max_dclk(&qi), 16, 1); ipqdepthpch = 16; - maxdebw = min(sa->deprogbwlimit * 1000, - icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */ + peakbw = num_channels * DIV_ROUND_UP(qi.deinterleave, 8) * dclk_max; + maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */ ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); + /* + * clperchgroup = 4kpagespermempage * clperchperblock, + * clperchperblock = 8 / num_channels * interleave + */ + clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave; - for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { + for (i = 0; i < num_groups; i++) { struct intel_bw_info *bi = &dev_priv->max_bw[i]; int clpchgroup; int j; - clpchgroup = (sa->deburst * deinterleave / num_channels) << i; - bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; + clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; + + if ((i < num_groups - 1 && clpchgroup < clperchgroup) || + DISPLAY_VER(dev_priv) == 11) + bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; + else + bi->num_planes = 0; bi->num_qgv_points = qi.num_points; bi->num_psf_gv_points = qi.num_psf_points; @@ -339,7 +389,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel * SAGV point, but we can't send PCode commands to restrict it * as it will fail and pointless anyway. */ - if (qi.num_points == 1) + if (qi.num_points >= 1) dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; else dev_priv->sagv_status = I915_SAGV_ENABLED;
The formulae has been updated to include more variables. Make sure the code carries the same. Bspec: 64631 v2: Make GEN11 follow the default route and fix calculation of maxdebw(RK) v3: Fix div by zero on default case Correct indent for fallthrough(Jani) v4: Fix div by zero on gen11. v5: Fix 0 max_numchannels case Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Suggested-by: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com> --- drivers/gpu/drm/i915/display/intel_bw.c | 80 ++++++++++++++++++++----- 1 file changed, 65 insertions(+), 15 deletions(-)