diff mbox series

[v4,7/7] drm/i915/dsc: Check if vblank is sufficient for dsc prefill

Message ID 20250108123541.2101643-8-mitulkumar.ajitkumar.golani@intel.com (mailing list archive)
State New, archived
Headers show
Series Check Scaler and DSC Prefill Latency Against Vblank | expand

Commit Message

Golani, Mitulkumar Ajitkumar Jan. 8, 2025, 12:35 p.m. UTC
Check if vblank is sufficient for dsc prefill latency.

--v2:
- Consider chroma downscaling factor in latency calculation. [Ankit]
- Replace with appropriate function name.

--v3:
- Remove FIXME tag.[Ankit]
- Replace Ycbcr444 to Ycbcr420.[Anit]
- Correct precision. [Ankit]
- Use some local valiables like linetime_factor and latency to
adjust precision.
- Declare latency to 0 initially to avoid returning any garbage values.
- Account for second scaler downscaling factor as well. [Ankit]

Signed-off-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
---
 drivers/gpu/drm/i915/display/skl_watermark.c | 37 ++++++++++++++++++++
 1 file changed, 37 insertions(+)

Comments

Nautiyal, Ankit K Jan. 13, 2025, 11:13 a.m. UTC | #1
On 1/8/2025 6:05 PM, Mitul Golani wrote:
> Check if vblank is sufficient for dsc prefill latency.
>
> --v2:
> - Consider chroma downscaling factor in latency calculation. [Ankit]
> - Replace with appropriate function name.
>
> --v3:
> - Remove FIXME tag.[Ankit]
> - Replace Ycbcr444 to Ycbcr420.[Anit]
> - Correct precision. [Ankit]
> - Use some local valiables like linetime_factor and latency to
> adjust precision.
> - Declare latency to 0 initially to avoid returning any garbage values.
> - Account for second scaler downscaling factor as well. [Ankit]
>
> Signed-off-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
> ---
>   drivers/gpu/drm/i915/display/skl_watermark.c | 37 ++++++++++++++++++++
>   1 file changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c
> index fe91854e456c..d275ec687748 100644
> --- a/drivers/gpu/drm/i915/display/skl_watermark.c
> +++ b/drivers/gpu/drm/i915/display/skl_watermark.c
> @@ -2292,6 +2292,42 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
>   	return 0;
>   }
>   
> +static int
> +dsc_prefill_latency(const struct intel_crtc_state *crtc_state)
> +{
> +	const struct intel_crtc_scaler_state *scaler_state =
> +						&crtc_state->scaler_state;
> +	int latency = 0;
> +	int count = hweight32(scaler_state->scaler_users);
> +	long long hscale_k[2] = {1, 1};
> +	long long vscale_k[2] = {1, 1};
> +
> +	if (!crtc_state->dsc.compression_enable)
> +		return latency;
> +
> +	for (int i = 0; i < count; i++) {
> +		hscale_k[i] = mul_u32_u32(scaler_state->scalers[i].hscale, 1000) >> 16;
> +		vscale_k[i] = mul_u32_u32(scaler_state->scalers[i].vscale, 1000) >> 16;
> +	}
> +
> +	if (count) {
> +		int chroma_downscaling_factor =
> +			crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ? 2 : 1;
> +		long long total_scaling_factor;
> +		int linetime_factor = DIV_ROUND_UP(15 * crtc_state->linetime, 10);
> +
> +		total_scaling_factor  = DIV_ROUND_UP_ULL(hscale_k[0] * vscale_k[0], 1000000);

I think this might end up being 0.

Consider hscale and vscale to be 0.5 each. hscale_k and vscale_k will be 
500 each.

total_scaling_factor will become 500*500/ 10,00,000 or 2,50,000 
/10,00,000 = 0.

I think you should compute latency for scaler 0 and then if there is 
another scaler, multiply it with the total scaling factor for scaler 1.

Regards,

Ankit


> +
> +		if (count > 1)
> +			total_scaling_factor *= DIV_ROUND_UP_ULL(hscale_k[1] * vscale_k[1],
> +								 1000000);
> +
> +		latency = total_scaling_factor * linetime_factor * chroma_downscaling_factor;
> +	}
> +
> +	return latency;
> +}
> +
>   static int
>   scaler_prefill_latency(const struct intel_crtc_state *crtc_state)
>   {
> @@ -2333,6 +2369,7 @@ skl_is_vblank_too_short(const struct intel_crtc_state *crtc_state,
>   	return crtc_state->framestart_delay +
>   		intel_usecs_to_scanlines(adjusted_mode, latency) +
>   		scaler_prefill_latency(crtc_state) +
> +		dsc_prefill_latency(crtc_state) +
>   		wm0_lines >
>   		adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vblank_start;
>   }
Nautiyal, Ankit K Jan. 13, 2025, 11:37 a.m. UTC | #2
On 1/13/2025 4:43 PM, Nautiyal, Ankit K wrote:
>
> On 1/8/2025 6:05 PM, Mitul Golani wrote:
>> Check if vblank is sufficient for dsc prefill latency.
>>
>> --v2:
>> - Consider chroma downscaling factor in latency calculation. [Ankit]
>> - Replace with appropriate function name.
>>
>> --v3:
>> - Remove FIXME tag.[Ankit]
>> - Replace Ycbcr444 to Ycbcr420.[Anit]
>> - Correct precision. [Ankit]
>> - Use some local valiables like linetime_factor and latency to
>> adjust precision.
>> - Declare latency to 0 initially to avoid returning any garbage values.
>> - Account for second scaler downscaling factor as well. [Ankit]
>>
>> Signed-off-by: Mitul Golani <mitulkumar.ajitkumar.golani@intel.com>
>> ---
>>   drivers/gpu/drm/i915/display/skl_watermark.c | 37 ++++++++++++++++++++
>>   1 file changed, 37 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c 
>> b/drivers/gpu/drm/i915/display/skl_watermark.c
>> index fe91854e456c..d275ec687748 100644
>> --- a/drivers/gpu/drm/i915/display/skl_watermark.c
>> +++ b/drivers/gpu/drm/i915/display/skl_watermark.c
>> @@ -2292,6 +2292,42 @@ static int icl_build_plane_wm(struct 
>> intel_crtc_state *crtc_state,
>>       return 0;
>>   }
>>   +static int
>> +dsc_prefill_latency(const struct intel_crtc_state *crtc_state)
>> +{
>> +    const struct intel_crtc_scaler_state *scaler_state =
>> +                        &crtc_state->scaler_state;
>> +    int latency = 0;
>> +    int count = hweight32(scaler_state->scaler_users);
>> +    long long hscale_k[2] = {1, 1};
>> +    long long vscale_k[2] = {1, 1};
>> +
>> +    if (!crtc_state->dsc.compression_enable)
>> +        return latency;
>> +
>> +    for (int i = 0; i < count; i++) {
>> +        hscale_k[i] = mul_u32_u32(scaler_state->scalers[i].hscale, 
>> 1000) >> 16;
>> +        vscale_k[i] = mul_u32_u32(scaler_state->scalers[i].vscale, 
>> 1000) >> 16;
>> +    }
>> +
>> +    if (count) {
>> +        int chroma_downscaling_factor =
>> +            crtc_state->output_format == 
>> INTEL_OUTPUT_FORMAT_YCBCR420 ? 2 : 1;
>> +        long long total_scaling_factor;
>> +        int linetime_factor = DIV_ROUND_UP(15 * 
>> crtc_state->linetime, 10);
>> +
>> +        total_scaling_factor  = DIV_ROUND_UP_ULL(hscale_k[0] * 
>> vscale_k[0], 1000000);
>
> I think this might end up being 0.
>
> Consider hscale and vscale to be 0.5 each. hscale_k and vscale_k will 
> be 500 each.
>
> total_scaling_factor will become 500*500/ 10,00,000 or 2,50,000 
> /10,00,000 = 0.
>
> I think you should compute latency for scaler 0 and then if there is 
> another scaler, multiply it with the total scaling factor for scaler 1.

Scratch that.

Just checked the Bspec. We need to have downscale amount <for each 
direction and scaler> = MAX(1, scaler input / scaler output)

So at min the scaling factor will be one. This check is missing and we 
need to consider this while computing the scaling factor.


Regards,

Ankit


>
> Regards,
>
> Ankit
>
>
>> +
>> +        if (count > 1)
>> +            total_scaling_factor *= DIV_ROUND_UP_ULL(hscale_k[1] * 
>> vscale_k[1],
>> +                                 1000000);
>> +
>> +        latency = total_scaling_factor * linetime_factor * 
>> chroma_downscaling_factor;
>> +    }
>> +
>> +    return latency;
>> +}
>> +
>>   static int
>>   scaler_prefill_latency(const struct intel_crtc_state *crtc_state)
>>   {
>> @@ -2333,6 +2369,7 @@ skl_is_vblank_too_short(const struct 
>> intel_crtc_state *crtc_state,
>>       return crtc_state->framestart_delay +
>>           intel_usecs_to_scanlines(adjusted_mode, latency) +
>>           scaler_prefill_latency(crtc_state) +
>> +        dsc_prefill_latency(crtc_state) +
>>           wm0_lines >
>>           adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vblank_start;
>>   }
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c
index fe91854e456c..d275ec687748 100644
--- a/drivers/gpu/drm/i915/display/skl_watermark.c
+++ b/drivers/gpu/drm/i915/display/skl_watermark.c
@@ -2292,6 +2292,42 @@  static int icl_build_plane_wm(struct intel_crtc_state *crtc_state,
 	return 0;
 }
 
+static int
+dsc_prefill_latency(const struct intel_crtc_state *crtc_state)
+{
+	const struct intel_crtc_scaler_state *scaler_state =
+						&crtc_state->scaler_state;
+	int latency = 0;
+	int count = hweight32(scaler_state->scaler_users);
+	long long hscale_k[2] = {1, 1};
+	long long vscale_k[2] = {1, 1};
+
+	if (!crtc_state->dsc.compression_enable)
+		return latency;
+
+	for (int i = 0; i < count; i++) {
+		hscale_k[i] = mul_u32_u32(scaler_state->scalers[i].hscale, 1000) >> 16;
+		vscale_k[i] = mul_u32_u32(scaler_state->scalers[i].vscale, 1000) >> 16;
+	}
+
+	if (count) {
+		int chroma_downscaling_factor =
+			crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ? 2 : 1;
+		long long total_scaling_factor;
+		int linetime_factor = DIV_ROUND_UP(15 * crtc_state->linetime, 10);
+
+		total_scaling_factor  = DIV_ROUND_UP_ULL(hscale_k[0] * vscale_k[0], 1000000);
+
+		if (count > 1)
+			total_scaling_factor *= DIV_ROUND_UP_ULL(hscale_k[1] * vscale_k[1],
+								 1000000);
+
+		latency = total_scaling_factor * linetime_factor * chroma_downscaling_factor;
+	}
+
+	return latency;
+}
+
 static int
 scaler_prefill_latency(const struct intel_crtc_state *crtc_state)
 {
@@ -2333,6 +2369,7 @@  skl_is_vblank_too_short(const struct intel_crtc_state *crtc_state,
 	return crtc_state->framestart_delay +
 		intel_usecs_to_scanlines(adjusted_mode, latency) +
 		scaler_prefill_latency(crtc_state) +
+		dsc_prefill_latency(crtc_state) +
 		wm0_lines >
 		adjusted_mode->crtc_vtotal - adjusted_mode->crtc_vblank_start;
 }