diff mbox series

[05/13] drm/i915/intel_cdclk: Add vdsc with bigjoiner constraints on min_cdlck

Message ID 20230512062417.2584427-6-ankit.k.nautiyal@intel.com (mailing list archive)
State New, archived
Headers show
Series DSC misc fixes | expand

Commit Message

Nautiyal, Ankit K May 12, 2023, 6:24 a.m. UTC
As per Bsepc:49259, Bigjoiner BW check puts restriction on the
compressed bpp for a given CDCLK, pixelclock in cases where
Bigjoiner + DSC are used.

Currently compressed bpp is computed first, and it is ensured that
the bpp will work at least with the max CDCLK freq.

Since the CDCLK is computed later, lets account for Bigjoiner BW
check while calculating Min CDCLK.

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++----
 1 file changed, 42 insertions(+), 7 deletions(-)

Comments

Ville Syrjälä May 15, 2023, 2:44 p.m. UTC | #1
On Fri, May 12, 2023 at 11:54:09AM +0530, Ankit Nautiyal wrote:
> As per Bsepc:49259, Bigjoiner BW check puts restriction on the
> compressed bpp for a given CDCLK, pixelclock in cases where
> Bigjoiner + DSC are used.
> 
> Currently compressed bpp is computed first, and it is ensured that
> the bpp will work at least with the max CDCLK freq.
> 
> Since the CDCLK is computed later, lets account for Bigjoiner BW
> check while calculating Min CDCLK.
> 
> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++----
>  1 file changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
> index 6bed75f1541a..3532640c5027 100644
> --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
> +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
> @@ -2520,6 +2520,46 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
>  	return min_cdclk;
>  }
>  
> +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> +	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
> +	int min_cdclk = 0;
> +
> +	/*
> +	 * When we decide to use only one VDSC engine, since
> +	 * each VDSC operates with 1 ppc throughput, pixel clock
> +	 * cannot be higher than the VDSC clock (cdclk)
> +	 */
> +	if (!crtc_state->dsc.dsc_split)
> +		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
> +
> +	if (crtc_state->bigjoiner_pipes) {
> +		/*
> +		 * According to Bigjoiner bw check:
> +		 * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock
> +		 *
> +		 * We have already computed compressed_bpp, so now compute the min CDCLK that
> +		 * is required to support this compressed_bpp.
> +		 *
> +		 * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits)
> +		 *
> +		 * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner
> +		 * => CDCLK >= compressed_bpp * pixel_rate  / Bigjoiner Interface bits
> +		 *
> +		 * #TODO Bspec mentions to account for FEC overhead while using pixel clock.
> +		 * Check if we need to use FEC overhead in the above calculations.
> +		 */
> +		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
> +		int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate /
> +				   bigjoiner_interface_bits;

pixel_rate is the downscale adjusted thing, so it doesn't seem
like the correct thing to use here.

Hmm. Assuming that the single VDSC engine really throttles the entire
pipe to 1 PPC then we should probably account for the 1 vs. 2 PPC
difference in *_plane_min_cdclk() and intel_pixel_rate_to_cdclk()
directly. Currently all of those assume 2 PPC.

> +
> +		min_cdclk = max(min_cdclk, min_cdclk_bj);
> +	}
> +
> +	return min_cdclk;
> +}
> +
>  int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>  {
>  	struct drm_i915_private *dev_priv =
> @@ -2591,13 +2631,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>  	/* Account for additional needs from the planes */
>  	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
>  
> -	/*
> -	 * When we decide to use only one VDSC engine, since
> -	 * each VDSC operates with 1 ppc throughput, pixel clock
> -	 * cannot be higher than the VDSC clock (cdclk)
> -	 */
> -	if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split)
> -		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
> +	if (crtc_state->dsc.compression_enable)
> +		min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state));
>  
>  	/*
>  	 * HACK. Currently for TGL/DG2 platforms we calculate
> -- 
> 2.25.1
Stanislav Lisovskiy May 16, 2023, 10:11 a.m. UTC | #2
On Mon, May 15, 2023 at 05:44:51PM +0300, Ville Syrjälä wrote:
> On Fri, May 12, 2023 at 11:54:09AM +0530, Ankit Nautiyal wrote:
> > As per Bsepc:49259, Bigjoiner BW check puts restriction on the
> > compressed bpp for a given CDCLK, pixelclock in cases where
> > Bigjoiner + DSC are used.
> > 
> > Currently compressed bpp is computed first, and it is ensured that
> > the bpp will work at least with the max CDCLK freq.
> > 
> > Since the CDCLK is computed later, lets account for Bigjoiner BW
> > check while calculating Min CDCLK.
> > 
> > Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++----
> >  1 file changed, 42 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
> > index 6bed75f1541a..3532640c5027 100644
> > --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
> > +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
> > @@ -2520,6 +2520,46 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
> >  	return min_cdclk;
> >  }
> >  
> > +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> > +	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
> > +	int min_cdclk = 0;
> > +
> > +	/*
> > +	 * When we decide to use only one VDSC engine, since
> > +	 * each VDSC operates with 1 ppc throughput, pixel clock
> > +	 * cannot be higher than the VDSC clock (cdclk)
> > +	 */
> > +	if (!crtc_state->dsc.dsc_split)
> > +		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
> > +
> > +	if (crtc_state->bigjoiner_pipes) {
> > +		/*
> > +		 * According to Bigjoiner bw check:
> > +		 * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock
> > +		 *
> > +		 * We have already computed compressed_bpp, so now compute the min CDCLK that
> > +		 * is required to support this compressed_bpp.
> > +		 *
> > +		 * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits)
> > +		 *
> > +		 * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner
> > +		 * => CDCLK >= compressed_bpp * pixel_rate  / Bigjoiner Interface bits
> > +		 *
> > +		 * #TODO Bspec mentions to account for FEC overhead while using pixel clock.
> > +		 * Check if we need to use FEC overhead in the above calculations.
> > +		 */
> > +		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
> > +		int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate /
> > +				   bigjoiner_interface_bits;
> 
> pixel_rate is the downscale adjusted thing, so it doesn't seem
> like the correct thing to use here.
> 
> Hmm. Assuming that the single VDSC engine really throttles the entire
> pipe to 1 PPC then we should probably account for the 1 vs. 2 PPC
> difference in *_plane_min_cdclk() and intel_pixel_rate_to_cdclk()
> directly. Currently all of those assume 2 PPC.

Main thing is to properly align that one you propose above with that check,
where we decide how many VDSC engines to use:

        /*
         * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
         * is greater than the maximum Cdclock and if slice count is even
         * then we need to use 2 VDSC instances.
         */
        if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) {
                if (pipe_config->dsc.slice_count > 1) {
                        pipe_config->dsc.dsc_split = true;
                } else {
                        drm_dbg_kms(&dev_priv->drm,
                                    "Cannot split stream to use 2 VDSC instances\n");
                        return -EINVAL;
                }
        }

Otherwise I agree that we should do that check preferrably in *_plane_min_cdclk
and use plane data rate which is adjusted after scaling is applied(I think we even have correspondent function there)
It is strange that scaling wasn't mentioned in BSpec formula.
I would also say that we should account for number of slices(i.e VDSC engines) now only in Bigjoiner case, but always, as I understand that number can be different not only for Bigjoiner cases.

Stan


> 
> > +
> > +		min_cdclk = max(min_cdclk, min_cdclk_bj);
> > +	}
> > +
> > +	return min_cdclk;
> > +}
> > +
> >  int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
> >  {
> >  	struct drm_i915_private *dev_priv =
> > @@ -2591,13 +2631,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
> >  	/* Account for additional needs from the planes */
> >  	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
> >  
> > -	/*
> > -	 * When we decide to use only one VDSC engine, since
> > -	 * each VDSC operates with 1 ppc throughput, pixel clock
> > -	 * cannot be higher than the VDSC clock (cdclk)
> > -	 */
> > -	if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split)
> > -		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
> > +	if (crtc_state->dsc.compression_enable)
> > +		min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state));
> >  
> >  	/*
> >  	 * HACK. Currently for TGL/DG2 platforms we calculate
> > -- 
> > 2.25.1
> 
> -- 
> Ville Syrjälä
> Intel
Nautiyal, Ankit K May 18, 2023, 1:14 p.m. UTC | #3
Thanks Ville and Stan for the comments.

I agree with the changes in _plane_min_cdclk and 
intel_pixel_rate_to_cdclk regarding PPC.

But I am a little confused for about the pixel clock.

Please find my comments inline:


On 5/16/2023 3:41 PM, Lisovskiy, Stanislav wrote:
> On Mon, May 15, 2023 at 05:44:51PM +0300, Ville Syrjälä wrote:
>> On Fri, May 12, 2023 at 11:54:09AM +0530, Ankit Nautiyal wrote:
>>> As per Bsepc:49259, Bigjoiner BW check puts restriction on the
>>> compressed bpp for a given CDCLK, pixelclock in cases where
>>> Bigjoiner + DSC are used.
>>>
>>> Currently compressed bpp is computed first, and it is ensured that
>>> the bpp will work at least with the max CDCLK freq.
>>>
>>> Since the CDCLK is computed later, lets account for Bigjoiner BW
>>> check while calculating Min CDCLK.
>>>
>>> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/display/intel_cdclk.c | 49 ++++++++++++++++++----
>>>   1 file changed, 42 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
>>> index 6bed75f1541a..3532640c5027 100644
>>> --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
>>> +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
>>> @@ -2520,6 +2520,46 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
>>>   	return min_cdclk;
>>>   }
>>>   
>>> +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
>>> +{
>>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
>>> +	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
>>> +	int min_cdclk = 0;
>>> +
>>> +	/*
>>> +	 * When we decide to use only one VDSC engine, since
>>> +	 * each VDSC operates with 1 ppc throughput, pixel clock
>>> +	 * cannot be higher than the VDSC clock (cdclk)
>>> +	 */
>>> +	if (!crtc_state->dsc.dsc_split)
>>> +		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
>>> +
>>> +	if (crtc_state->bigjoiner_pipes) {
>>> +		/*
>>> +		 * According to Bigjoiner bw check:
>>> +		 * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock
>>> +		 *
>>> +		 * We have already computed compressed_bpp, so now compute the min CDCLK that
>>> +		 * is required to support this compressed_bpp.
>>> +		 *
>>> +		 * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits)
>>> +		 *
>>> +		 * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner
>>> +		 * => CDCLK >= compressed_bpp * pixel_rate  / Bigjoiner Interface bits
>>> +		 *
>>> +		 * #TODO Bspec mentions to account for FEC overhead while using pixel clock.
>>> +		 * Check if we need to use FEC overhead in the above calculations.
>>> +		 */
>>> +		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
>>> +		int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate /
>>> +				   bigjoiner_interface_bits;
>> pixel_rate is the downscale adjusted thing, so it doesn't seem
>> like the correct thing to use here.
>>
>> Hmm. Assuming that the single VDSC engine really throttles the entire
>> pipe to 1 PPC then we should probably account for the 1 vs. 2 PPC
>> difference in *_plane_min_cdclk() and intel_pixel_rate_to_cdclk()
>> directly. Currently all of those assume 2 PPC.

Hmm alright,  I do see in plane_min_cdclk and intel_pixel_rate_to_cdclk 
we assume 2 PPC.

So I can add a check for the dsc_split and use 1 PPC/2PPC  in the two 
functions as a separate patch perhaps.


> Main thing is to properly align that one you propose above with that check,
> where we decide how many VDSC engines to use:
>
>          /*
>           * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
>           * is greater than the maximum Cdclock and if slice count is even
>           * then we need to use 2 VDSC instances.
>           */
>          if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq) {
>                  if (pipe_config->dsc.slice_count > 1) {
>                          pipe_config->dsc.dsc_split = true;
>                  } else {
>                          drm_dbg_kms(&dev_priv->drm,
>                                      "Cannot split stream to use 2 VDSC instances\n");
>                          return -EINVAL;
>                  }
>          }
>
> Otherwise I agree that we should do that check preferrably in *_plane_min_cdclk
> and use plane data rate which is adjusted after scaling is applied(I think we even have correspondent function there)
> It is strange that scaling wasn't mentioned in BSpec formula.
> I would also say that we should account for number of slices(i.e VDSC engines) now only in Bigjoiner case, but always, as I understand that number can be different not only for Bigjoiner cases.
>
> Stan
>
Hmm does it mean:

if (!crtc_state->dsc.dsc_split) {

         if (bigjoiner)

             min_cdclk = compressed_bpp * Pixel clock / (PPC * Bigjoiner 
Interface bits);

     else

             min_cdclk = compressed_bpp * Pixel clock;

}

For Pixel clock, should it not be crtc_state->hw.adjusted_mode->clock ?

Regards,

Ankit


>>> +
>>> +		min_cdclk = max(min_cdclk, min_cdclk_bj);
>>> +	}
>>> +
>>> +	return min_cdclk;
>>> +}
>>> +
>>>   int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>>>   {
>>>   	struct drm_i915_private *dev_priv =
>>> @@ -2591,13 +2631,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>>>   	/* Account for additional needs from the planes */
>>>   	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
>>>   
>>> -	/*
>>> -	 * When we decide to use only one VDSC engine, since
>>> -	 * each VDSC operates with 1 ppc throughput, pixel clock
>>> -	 * cannot be higher than the VDSC clock (cdclk)
>>> -	 */
>>> -	if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split)
>>> -		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
>>> +	if (crtc_state->dsc.compression_enable)
>>> +		min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state));
>>>   
>>>   	/*
>>>   	 * HACK. Currently for TGL/DG2 platforms we calculate
>>> -- 
>>> 2.25.1
>> -- 
>> Ville Syrjälä
>> Intel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 6bed75f1541a..3532640c5027 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2520,6 +2520,46 @@  static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
 	return min_cdclk;
 }
 
+static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	int min_cdclk = 0;
+
+	/*
+	 * When we decide to use only one VDSC engine, since
+	 * each VDSC operates with 1 ppc throughput, pixel clock
+	 * cannot be higher than the VDSC clock (cdclk)
+	 */
+	if (!crtc_state->dsc.dsc_split)
+		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
+
+	if (crtc_state->bigjoiner_pipes) {
+		/*
+		 * According to Bigjoiner bw check:
+		 * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock
+		 *
+		 * We have already computed compressed_bpp, so now compute the min CDCLK that
+		 * is required to support this compressed_bpp.
+		 *
+		 * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits)
+		 *
+		 * Since Num of pipes joined = 2, and PPC = 2 with bigjoiner
+		 * => CDCLK >= compressed_bpp * pixel_rate  / Bigjoiner Interface bits
+		 *
+		 * #TODO Bspec mentions to account for FEC overhead while using pixel clock.
+		 * Check if we need to use FEC overhead in the above calculations.
+		 */
+		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
+		int min_cdclk_bj = crtc_state->dsc.compressed_bpp * crtc_state->pixel_rate /
+				   bigjoiner_interface_bits;
+
+		min_cdclk = max(min_cdclk, min_cdclk_bj);
+	}
+
+	return min_cdclk;
+}
+
 int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv =
@@ -2591,13 +2631,8 @@  int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	/* Account for additional needs from the planes */
 	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
 
-	/*
-	 * When we decide to use only one VDSC engine, since
-	 * each VDSC operates with 1 ppc throughput, pixel clock
-	 * cannot be higher than the VDSC clock (cdclk)
-	 */
-	if (crtc_state->dsc.compression_enable && !crtc_state->dsc.dsc_split)
-		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
+	if (crtc_state->dsc.compression_enable)
+		min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state));
 
 	/*
 	 * HACK. Currently for TGL/DG2 platforms we calculate