Message ID | 20230512062417.2584427-6-ankit.k.nautiyal@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | DSC misc fixes | expand |
On Fri, May 12, 2023 at 11:54:09AM +0530, Ankit Nautiyal wrote: > As per Bsepc:49259, Bigjoiner BW check puts restriction on the > compressed bpp for a given CDCLK, pixelclock in cases where > Bigjoiner + DSC are used. > > Currently compressed bpp is computed first, and it is ensured that > the bpp will work at least with the max CDCLK freq. > > Since the CDCLK is computed later, lets account for Bigjoiner BW > check while calculating Min CDCLK. > > Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com> > --- > drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++---- > 1 file changed, 42 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c > index 6bed75f1541a..3532640c5027 100644 > --- a/drivers/gpu/drm/i915/display/intel_cdclk.c > +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c > @@ -2520,6 +2520,46 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) > return min_cdclk; > } > > +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) > +{ > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); > + struct drm_i915_private *i915 = to_i915(crtc->base.dev); > + int min_cdclk = 0; > + > + /* > + * When we decide to use only one VDSC engine, since > + * each VDSC operates with 1 ppc throughput, pixel clock > + * cannot be higher than the VDSC clock (cdclk) > + */ > + if (!crtc_state->dsc.dsc_split) > + min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); > + > + if (crtc_state->bigjoiner_pipes) { > + /* > + * According to Bigjoiner bw check: > + * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock > + * > + * We have already computed compressed_bpp, so now compute the min CDCLK that > + * is required to support this compressed_bpp. > + * > + * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits) > + * > + * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner > + * => CDCLK >= compressed_bpp * pixel_rate / Bigjoiner Interface bits > + * > + * #TODO Bspec mentions to account for FEC overhead while using pixel clock. > + * Check if we need to use FEC overhead in the above calculations. > + */ > + int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24; > + int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate / > + bigjoiner_interface_bits; pixel_rate is the downscale adjusted thing, so it doesn't seem like the correct thing to use here. Hmm. Assuming that the single VDSC engine really throttles the entire pipe to 1 PPC then we should probably account for the 1 vs. 2 PPC difference in *_plane_min_cdclk() and intel_pixel_rate_to_cdclk() directly. Currently all of those assume 2 PPC. > + > + min_cdclk = max(min_cdclk, min_cdclk_bj); > + } > + > + return min_cdclk; > +} > + > int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) > { > struct drm_i915_private *dev_priv = > @@ -2591,13 +2631,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) > /* Account for additional needs from the planes */ > min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); > > - /* > - * When we decide to use only one VDSC engine, since > - * each VDSC operates with 1 ppc throughput, pixel clock > - * cannot be higher than the VDSC clock (cdclk) > - */ > - if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split) > - min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); > + if (crtc_state->dsc.compression_enable) > + min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state)); > > /* > * HACK. Currently for TGL/DG2 platforms we calculate > -- > 2.25.1
On Mon, May 15, 2023 at 05:44:51PM +0300, Ville Syrjälä wrote: > On Fri, May 12, 2023 at 11:54:09AM +0530, Ankit Nautiyal wrote: > > As per Bsepc:49259, Bigjoiner BW check puts restriction on the > > compressed bpp for a given CDCLK, pixelclock in cases where > > Bigjoiner + DSC are used. > > > > Currently compressed bpp is computed first, and it is ensured that > > the bpp will work at least with the max CDCLK freq. > > > > Since the CDCLK is computed later, lets account for Bigjoiner BW > > check while calculating Min CDCLK. > > > > Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com> > > --- > > drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++---- > > 1 file changed, 42 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c > > index 6bed75f1541a..3532640c5027 100644 > > --- a/drivers/gpu/drm/i915/display/intel_cdclk.c > > +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c > > @@ -2520,6 +2520,46 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) > > return min_cdclk; > > } > > > > +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) > > +{ > > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); > > + struct drm_i915_private *i915 = to_i915(crtc->base.dev); > > + int min_cdclk = 0; > > + > > + /* > > + * When we decide to use only one VDSC engine, since > > + * each VDSC operates with 1 ppc throughput, pixel clock > > + * cannot be higher than the VDSC clock (cdclk) > > + */ > > + if (!crtc_state->dsc.dsc_split) > > + min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); > > + > > + if (crtc_state->bigjoiner_pipes) { > > + /* > > + * According to Bigjoiner bw check: > > + * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock > > + * > > + * We have already computed compressed_bpp, so now compute the min CDCLK that > > + * is required to support this compressed_bpp. > > + * > > + * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits) > > + * > > + * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner > > + * => CDCLK >= compressed_bpp * pixel_rate / Bigjoiner Interface bits > > + * > > + * #TODO Bspec mentions to account for FEC overhead while using pixel clock. > > + * Check if we need to use FEC overhead in the above calculations. > > + */ > > + int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24; > > + int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate / > > + bigjoiner_interface_bits; > > pixel_rate is the downscale adjusted thing, so it doesn't seem > like the correct thing to use here. > > Hmm. Assuming that the single VDSC engine really throttles the entire > pipe to 1 PPC then we should probably account for the 1 vs. 2 PPC > difference in *_plane_min_cdclk() and intel_pixel_rate_to_cdclk() > directly. Currently all of those assume 2 PPC. Main thing is to properly align that one you propose above with that check, where we decide how many VDSC engines to use: /* * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate * is greater than the maximum Cdclock and if slice count is even * then we need to use 2 VDSC instances. */ if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) { if (pipe_config->dsc.slice_count > 1) { pipe_config->dsc.dsc_split = true; } else { drm_dbg_kms(&dev_priv->drm, "Cannot split stream to use 2 VDSC instances\n"); return -EINVAL; } } Otherwise I agree that we should do that check preferrably in *_plane_min_cdclk and use plane data rate which is adjusted after scaling is applied(I think we even have correspondent function there) It is strange that scaling wasn't mentioned in BSpec formula. I would also say that we should account for number of slices(i.e VDSC engines) now only in Bigjoiner case, but always, as I understand that number can be different not only for Bigjoiner cases. Stan > > > + > > + min_cdclk = max(min_cdclk, min_cdclk_bj); > > + } > > + > > + return min_cdclk; > > +} > > + > > int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) > > { > > struct drm_i915_private *dev_priv = > > @@ -2591,13 +2631,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) > > /* Account for additional needs from the planes */ > > min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); > > > > - /* > > - * When we decide to use only one VDSC engine, since > > - * each VDSC operates with 1 ppc throughput, pixel clock > > - * cannot be higher than the VDSC clock (cdclk) > > - */ > > - if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split) > > - min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); > > + if (crtc_state->dsc.compression_enable) > > + min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state)); > > > > /* > > * HACK. Currently for TGL/DG2 platforms we calculate > > -- > > 2.25.1 > > -- > Ville Syrjälä > Intel
Thanks Ville and Stan for the comments. I agree with the changes in _plane_min_cdclk and intel_pixel_rate_to_cdclk regarding PPC. But I am a little confused for about the pixel clock. Please find my comments inline: On 5/16/2023 3:41 PM, Lisovskiy, Stanislav wrote: > On Mon, May 15, 2023 at 05:44:51PM +0300, Ville Syrjälä wrote: >> On Fri, May 12, 2023 at 11:54:09AM +0530, Ankit Nautiyal wrote: >>> As per Bsepc:49259, Bigjoiner BW check puts restriction on the >>> compressed bpp for a given CDCLK, pixelclock in cases where >>> Bigjoiner + DSC are used. >>> >>> Currently compressed bpp is computed first, and it is ensured that >>> the bpp will work at least with the max CDCLK freq. >>> >>> Since the CDCLK is computed later, lets account for Bigjoiner BW >>> check while calculating Min CDCLK. >>> >>> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com> >>> --- >>> drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++---- >>> 1 file changed, 42 insertions(+), 7 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c >>> index 6bed75f1541a..3532640c5027 100644 >>> --- a/drivers/gpu/drm/i915/display/intel_cdclk.c >>> +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c >>> @@ -2520,6 +2520,46 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) >>> return min_cdclk; >>> } >>> >>> +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) >>> +{ >>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); >>> + struct drm_i915_private *i915 = to_i915(crtc->base.dev); >>> + int min_cdclk = 0; >>> + >>> + /* >>> + * When we decide to use only one VDSC engine, since >>> + * each VDSC operates with 1 ppc throughput, pixel clock >>> + * cannot be higher than the VDSC clock (cdclk) >>> + */ >>> + if (!crtc_state->dsc.dsc_split) >>> + min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); >>> + >>> + if (crtc_state->bigjoiner_pipes) { >>> + /* >>> + * According to Bigjoiner bw check: >>> + * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock >>> + * >>> + * We have already computed compressed_bpp, so now compute the min CDCLK that >>> + * is required to support this compressed_bpp. >>> + * >>> + * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits) >>> + * >>> + * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner >>> + * => CDCLK >= compressed_bpp * pixel_rate / Bigjoiner Interface bits >>> + * >>> + * #TODO Bspec mentions to account for FEC overhead while using pixel clock. >>> + * Check if we need to use FEC overhead in the above calculations. >>> + */ >>> + int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24; >>> + int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate / >>> + bigjoiner_interface_bits; >> pixel_rate is the downscale adjusted thing, so it doesn't seem >> like the correct thing to use here. >> >> Hmm. Assuming that the single VDSC engine really throttles the entire >> pipe to 1 PPC then we should probably account for the 1 vs. 2 PPC >> difference in *_plane_min_cdclk() and intel_pixel_rate_to_cdclk() >> directly. Currently all of those assume 2 PPC. Hmm alright, I do see in plane_min_cdclk and intel_pixel_rate_to_cdclk we assume 2 PPC. So I can add a check for the dsc_split and use 1 PPC/2PPC in the two functions as a separate patch perhaps. > Main thing is to properly align that one you propose above with that check, > where we decide how many VDSC engines to use: > > /* > * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate > * is greater than the maximum Cdclock and if slice count is even > * then we need to use 2 VDSC instances. > */ > if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) { > if (pipe_config->dsc.slice_count > 1) { > pipe_config->dsc.dsc_split = true; > } else { > drm_dbg_kms(&dev_priv->drm, > "Cannot split stream to use 2 VDSC instances\n"); > return -EINVAL; > } > } > > Otherwise I agree that we should do that check preferrably in *_plane_min_cdclk > and use plane data rate which is adjusted after scaling is applied(I think we even have correspondent function there) > It is strange that scaling wasn't mentioned in BSpec formula. > I would also say that we should account for number of slices(i.e VDSC engines) now only in Bigjoiner case, but always, as I understand that number can be different not only for Bigjoiner cases. > > Stan > Hmm does it mean: if (!crtc_state->dsc.dsc_split) { if (bigjoiner) min_cdclk = compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits); else min_cdclk = compressed_bpp * Pixel clock; } For Pixel clock, should it not be crtc_state->hw.adjusted_mode->clock ? Regards, Ankit >>> + >>> + min_cdclk = max(min_cdclk, min_cdclk_bj); >>> + } >>> + >>> + return min_cdclk; >>> +} >>> + >>> int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) >>> { >>> struct drm_i915_private *dev_priv = >>> @@ -2591,13 +2631,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) >>> /* Account for additional needs from the planes */ >>> min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); >>> >>> - /* >>> - * When we decide to use only one VDSC engine, since >>> - * each VDSC operates with 1 ppc throughput, pixel clock >>> - * cannot be higher than the VDSC clock (cdclk) >>> - */ >>> - if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split) >>> - min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); >>> + if (crtc_state->dsc.compression_enable) >>> + min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state)); >>> >>> /* >>> * HACK. Currently for TGL/DG2 platforms we calculate >>> -- >>> 2.25.1 >> -- >> Ville Syrjälä >> Intel
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 6bed75f1541a..3532640c5027 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2520,6 +2520,46 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state) return min_cdclk; } +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + int min_cdclk = 0; + + /* + * When we decide to use only one VDSC engine, since + * each VDSC operates with 1 ppc throughput, pixel clock + * cannot be higher than the VDSC clock (cdclk) + */ + if (!crtc_state->dsc.dsc_split) + min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); + + if (crtc_state->bigjoiner_pipes) { + /* + * According to Bigjoiner bw check: + * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock + * + * We have already computed compressed_bpp, so now compute the min CDCLK that + * is required to support this compressed_bpp. + * + * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits) + * + * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner + * => CDCLK >= compressed_bpp * pixel_rate / Bigjoiner Interface bits + * + * #TODO Bspec mentions to account for FEC overhead while using pixel clock. + * Check if we need to use FEC overhead in the above calculations. + */ + int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24; + int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate / + bigjoiner_interface_bits; + + min_cdclk = max(min_cdclk, min_cdclk_bj); + } + + return min_cdclk; +} + int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = @@ -2591,13 +2631,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) /* Account for additional needs from the planes */ min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk); - /* - * When we decide to use only one VDSC engine, since - * each VDSC operates with 1 ppc throughput, pixel clock - * cannot be higher than the VDSC clock (cdclk) - */ - if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split) - min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate); + if (crtc_state->dsc.compression_enable) + min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state)); /* * HACK. Currently for TGL/DG2 platforms we calculate
As per Bsepc:49259, Bigjoiner BW check puts restriction on the compressed bpp for a given CDCLK, pixelclock in cases where Bigjoiner + DSC are used. Currently compressed bpp is computed first, and it is ensured that the bpp will work at least with the max CDCLK freq. Since the CDCLK is computed later, lets account for Bigjoiner BW check while calculating Min CDCLK. Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com> --- drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++---- 1 file changed, 42 insertions(+), 7 deletions(-)