diff mbox series

[v5,19/44] drm/vkms: add 3x4 matrix in color pipeline

Message ID 20240819205714.316380-20-harry.wentland@amd.com (mailing list archive)
State New, archived
Headers show
Series Color Pipeline API w/ VKMS | expand

Commit Message

Harry Wentland Aug. 19, 2024, 8:56 p.m. UTC
We add two 3x4 matrices into the VKMS color pipeline. The reason
we're adding matrices is so that we can test that application
of a matrix and its inverse yields an output equal to the input
image.

One complication with the matrix implementation has to do with
the fact that the matrix entries are in signed-magnitude fixed
point, whereas the drm_fixed.h implementation uses 2s-complement.
The latter one is the one that we want for easy addition and
subtraction, so we convert all entries to 2s-complement.

Signed-off-by: Harry Wentland <harry.wentland@amd.com>
---
 drivers/gpu/drm/vkms/vkms_colorop.c  | 32 +++++++++++++++++++++++++++-
 drivers/gpu/drm/vkms/vkms_composer.c | 27 +++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

Comments

Louis Chauvet Aug. 27, 2024, 5:49 p.m. UTC | #1
Le 19/08/24 - 16:56, Harry Wentland a écrit :
> We add two 3x4 matrices into the VKMS color pipeline. The reason
> we're adding matrices is so that we can test that application
> of a matrix and its inverse yields an output equal to the input
> image.
> 
> One complication with the matrix implementation has to do with
> the fact that the matrix entries are in signed-magnitude fixed
> point, whereas the drm_fixed.h implementation uses 2s-complement.
> The latter one is the one that we want for easy addition and
> subtraction, so we convert all entries to 2s-complement.

Is there a reason to use signed-magnitude and not 2s-complement here? I 
did not read the whole amd driver, but it seems that the matrix is always 
converted to fixed point notation (amdgpu_dm_fixpt_from_s3132 in 
amdgpu_dm_color.c). It may reduce the complexity here and in the amd 
driver too.

> 
> Signed-off-by: Harry Wentland <harry.wentland@amd.com>
> ---
>  drivers/gpu/drm/vkms/vkms_colorop.c  | 32 +++++++++++++++++++++++++++-
>  drivers/gpu/drm/vkms/vkms_composer.c | 27 +++++++++++++++++++++++
>  2 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c b/drivers/gpu/drm/vkms/vkms_colorop.c
> index f61dfde47156..adcb08153a09 100644
> --- a/drivers/gpu/drm/vkms/vkms_colorop.c
> +++ b/drivers/gpu/drm/vkms/vkms_colorop.c
> @@ -37,7 +37,37 @@ static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pr
>  
>  	prev_op = op;
>  
> -	/* 2nd op: 1d curve */
> +	/* 2nd op: 3x4 matrix */
> +	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> +	if (!op) {
> +		DRM_ERROR("KMS: Failed to allocate colorop\n");
> +		return -ENOMEM;
> +	}

Same as before, don't we leak memory/properties here?

> +	ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> +	if (ret)
> +		return ret;
> +
> +	drm_colorop_set_next_property(prev_op, op);
> +
> +	prev_op = op;
> +
> +	/* 3rd op: 3x4 matrix */
> +	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> +	if (!op) {
> +		DRM_ERROR("KMS: Failed to allocate colorop\n");
> +		return -ENOMEM;
> +	}
> +
> +	ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> +	if (ret)
> +		return ret;
> +
> +	drm_colorop_set_next_property(prev_op, op);
> +
> +	prev_op = op;
> +
> +	/* 4th op: 1d curve */
>  	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
>  	if (!op) {
>  		DRM_ERROR("KMS: Failed to allocate colorop\n");
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
> index 6e939d3a6d5c..bd1df06ced85 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -164,6 +164,30 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff
>  	}
>  }
>  
> +static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct drm_color_ctm_3x4 *matrix)
> +{
> +	s64 rf, gf, bf;
> +
> +	rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), drm_int2fixp(pixel->r)) +
> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), drm_int2fixp(pixel->g)) +
> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), drm_int2fixp(pixel->b)) +
> +	     drm_sm2fixp(matrix->matrix[3]);
> +
> +	gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), drm_int2fixp(pixel->r)) +
> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), drm_int2fixp(pixel->g)) +
> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), drm_int2fixp(pixel->b)) +
> +	     drm_sm2fixp(matrix->matrix[7]);
> +
> +	bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), drm_int2fixp(pixel->r)) +
> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), drm_int2fixp(pixel->g)) +
> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), drm_int2fixp(pixel->b)) +
> +	     drm_sm2fixp(matrix->matrix[11]);
> +
> +	pixel->r = drm_fixp2int(rf);
> +	pixel->g = drm_fixp2int(gf);
> +	pixel->b = drm_fixp2int(bf);

There is no need to round here, like done in [1]?

I don't know if the performance improvment is huge, bug maybe you can 
precompute drm_int2fixp(pixel->r/g/b)?

[1]: https://lore.kernel.org/all/20240802-yuv-v9-12-08a706669e16@bootlin.com/

> +}
> +
>  static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colorop)
>  {
>  	struct drm_colorop_state *colorop_state = colorop->state;
> @@ -184,6 +208,9 @@ static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colo
>  				DRM_DEBUG_DRIVER("unkown colorop 1D curve type %d\n", colorop_state->curve_1d_type);
>  				break;
>  		}
> +	} else if (colorop->type == DRM_COLOROP_CTM_3X4) {
> +		if (colorop_state->data)
> +			apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) colorop_state->data->data);
>  	}
>  
>  }
> -- 
> 2.46.0
>
Harry Wentland Sept. 6, 2024, 5:08 p.m. UTC | #2
On 2024-08-27 13:49, Louis Chauvet wrote:
> Le 19/08/24 - 16:56, Harry Wentland a écrit :
>> We add two 3x4 matrices into the VKMS color pipeline. The reason
>> we're adding matrices is so that we can test that application
>> of a matrix and its inverse yields an output equal to the input
>> image.
>>
>> One complication with the matrix implementation has to do with
>> the fact that the matrix entries are in signed-magnitude fixed
>> point, whereas the drm_fixed.h implementation uses 2s-complement.
>> The latter one is the one that we want for easy addition and
>> subtraction, so we convert all entries to 2s-complement.
> 
> Is there a reason to use signed-magnitude and not 2s-complement here? I 
> did not read the whole amd driver, but it seems that the matrix is always 
> converted to fixed point notation (amdgpu_dm_fixpt_from_s3132 in 
> amdgpu_dm_color.c). It may reduce the complexity here and in the amd 
> driver too.
> 

It's so as to keep the 3x4 matrix the same as the 3x3 one
(drm_color_ctm_3x4 and drm_color_ctm) and the original
3x3 one was defined to use S31.32 sign-magnitude. I'd
prefer 2s-complement myself but that ship has sailed.

>>
>> Signed-off-by: Harry Wentland <harry.wentland@amd.com>
>> ---
>>  drivers/gpu/drm/vkms/vkms_colorop.c  | 32 +++++++++++++++++++++++++++-
>>  drivers/gpu/drm/vkms/vkms_composer.c | 27 +++++++++++++++++++++++
>>  2 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c b/drivers/gpu/drm/vkms/vkms_colorop.c
>> index f61dfde47156..adcb08153a09 100644
>> --- a/drivers/gpu/drm/vkms/vkms_colorop.c
>> +++ b/drivers/gpu/drm/vkms/vkms_colorop.c
>> @@ -37,7 +37,37 @@ static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pr
>>  
>>  	prev_op = op;
>>  
>> -	/* 2nd op: 1d curve */
>> +	/* 2nd op: 3x4 matrix */
>> +	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
>> +	if (!op) {
>> +		DRM_ERROR("KMS: Failed to allocate colorop\n");
>> +		return -ENOMEM;
>> +	}
> 
> Same as before, don't we leak memory/properties here?
> 

Thanks. Fix for this will be in v6 (for both vkms and amdgpu).

>> +	ret = drm_colorop_ctm_3x4_init(dev, op, plane);
>> +	if (ret)
>> +		return ret;
>> +
>> +	drm_colorop_set_next_property(prev_op, op);
>> +
>> +	prev_op = op;
>> +
>> +	/* 3rd op: 3x4 matrix */
>> +	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
>> +	if (!op) {
>> +		DRM_ERROR("KMS: Failed to allocate colorop\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	ret = drm_colorop_ctm_3x4_init(dev, op, plane);
>> +	if (ret)
>> +		return ret;
>> +
>> +	drm_colorop_set_next_property(prev_op, op);
>> +
>> +	prev_op = op;
>> +
>> +	/* 4th op: 1d curve */
>>  	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
>>  	if (!op) {
>>  		DRM_ERROR("KMS: Failed to allocate colorop\n");
>> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
>> index 6e939d3a6d5c..bd1df06ced85 100644
>> --- a/drivers/gpu/drm/vkms/vkms_composer.c
>> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
>> @@ -164,6 +164,30 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff
>>  	}
>>  }
>>  
>> +static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct drm_color_ctm_3x4 *matrix)
>> +{
>> +	s64 rf, gf, bf;
>> +
>> +	rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), drm_int2fixp(pixel->r)) +
>> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), drm_int2fixp(pixel->g)) +
>> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), drm_int2fixp(pixel->b)) +
>> +	     drm_sm2fixp(matrix->matrix[3]);
>> +
>> +	gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), drm_int2fixp(pixel->r)) +
>> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), drm_int2fixp(pixel->g)) +
>> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), drm_int2fixp(pixel->b)) +
>> +	     drm_sm2fixp(matrix->matrix[7]);
>> +
>> +	bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), drm_int2fixp(pixel->r)) +
>> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), drm_int2fixp(pixel->g)) +
>> +	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), drm_int2fixp(pixel->b)) +
>> +	     drm_sm2fixp(matrix->matrix[11]);
>> +
>> +	pixel->r = drm_fixp2int(rf);
>> +	pixel->g = drm_fixp2int(gf);
>> +	pixel->b = drm_fixp2int(bf);
> 
> There is no need to round here, like done in [1]?
> 

Good call. This makes the CTM test in IGT pass with a [0, 0] bracket,
i.e., it means the CTM is applied with zero error. :)

> I don't know if the performance improvment is huge, bug maybe you can 
> precompute drm_int2fixp(pixel->r/g/b)?
> 

Good call. Doesn't hurt to precompute.

Harry

> [1]: https://lore.kernel.org/all/20240802-yuv-v9-12-08a706669e16@bootlin.com/
> 
>> +}
>> +
>>  static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colorop)
>>  {
>>  	struct drm_colorop_state *colorop_state = colorop->state;
>> @@ -184,6 +208,9 @@ static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colo
>>  				DRM_DEBUG_DRIVER("unkown colorop 1D curve type %d\n", colorop_state->curve_1d_type);
>>  				break;
>>  		}
>> +	} else if (colorop->type == DRM_COLOROP_CTM_3X4) {
>> +		if (colorop_state->data)
>> +			apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) colorop_state->data->data);
>>  	}
>>  
>>  }
>> -- 
>> 2.46.0
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c b/drivers/gpu/drm/vkms/vkms_colorop.c
index f61dfde47156..adcb08153a09 100644
--- a/drivers/gpu/drm/vkms/vkms_colorop.c
+++ b/drivers/gpu/drm/vkms/vkms_colorop.c
@@ -37,7 +37,37 @@  static int vkms_initialize_color_pipeline(struct drm_plane *plane, struct drm_pr
 
 	prev_op = op;
 
-	/* 2nd op: 1d curve */
+	/* 2nd op: 3x4 matrix */
+	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+	if (!op) {
+		DRM_ERROR("KMS: Failed to allocate colorop\n");
+		return -ENOMEM;
+	}
+
+	ret = drm_colorop_ctm_3x4_init(dev, op, plane);
+	if (ret)
+		return ret;
+
+	drm_colorop_set_next_property(prev_op, op);
+
+	prev_op = op;
+
+	/* 3rd op: 3x4 matrix */
+	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
+	if (!op) {
+		DRM_ERROR("KMS: Failed to allocate colorop\n");
+		return -ENOMEM;
+	}
+
+	ret = drm_colorop_ctm_3x4_init(dev, op, plane);
+	if (ret)
+		return ret;
+
+	drm_colorop_set_next_property(prev_op, op);
+
+	prev_op = op;
+
+	/* 4th op: 1d curve */
 	op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
 	if (!op) {
 		DRM_ERROR("KMS: Failed to allocate colorop\n");
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
index 6e939d3a6d5c..bd1df06ced85 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -164,6 +164,30 @@  static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff
 	}
 }
 
+static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct drm_color_ctm_3x4 *matrix)
+{
+	s64 rf, gf, bf;
+
+	rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), drm_int2fixp(pixel->r)) +
+	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), drm_int2fixp(pixel->g)) +
+	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), drm_int2fixp(pixel->b)) +
+	     drm_sm2fixp(matrix->matrix[3]);
+
+	gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), drm_int2fixp(pixel->r)) +
+	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), drm_int2fixp(pixel->g)) +
+	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), drm_int2fixp(pixel->b)) +
+	     drm_sm2fixp(matrix->matrix[7]);
+
+	bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), drm_int2fixp(pixel->r)) +
+	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), drm_int2fixp(pixel->g)) +
+	     drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), drm_int2fixp(pixel->b)) +
+	     drm_sm2fixp(matrix->matrix[11]);
+
+	pixel->r = drm_fixp2int(rf);
+	pixel->g = drm_fixp2int(gf);
+	pixel->b = drm_fixp2int(bf);
+}
+
 static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colorop)
 {
 	struct drm_colorop_state *colorop_state = colorop->state;
@@ -184,6 +208,9 @@  static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colo
 				DRM_DEBUG_DRIVER("unkown colorop 1D curve type %d\n", colorop_state->curve_1d_type);
 				break;
 		}
+	} else if (colorop->type == DRM_COLOROP_CTM_3X4) {
+		if (colorop_state->data)
+			apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) colorop_state->data->data);
 	}
 
 }