diff mbox series

[v5,08/16] drm/vkms: Avoid computing blending limits inside pre_mul_alpha_blend

Message ID 20240313-yuv-v5-8-e610cbd03f52@bootlin.com (mailing list archive)
State New, archived
Headers show
Series drm/vkms: Reimplement line-per-line pixel conversion for plane reading | expand

Commit Message

Louis Chauvet March 13, 2024, 5:45 p.m. UTC
The pre_mul_alpha_blend is dedicated to blending, so to avoid mixing
different concepts (coordinate calculation and color management), extract
the x_limit and x_dst computation outside of this helper.
It also increases the maintainability by grouping the computation related
to coordinates in the same place: the loop in `blend`.

Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com>
---
 drivers/gpu/drm/vkms/vkms_composer.c | 40 +++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

Comments

Pekka Paalanen March 25, 2024, 12:41 p.m. UTC | #1
On Wed, 13 Mar 2024 18:45:02 +0100
Louis Chauvet <louis.chauvet@bootlin.com> wrote:

> The pre_mul_alpha_blend is dedicated to blending, so to avoid mixing
> different concepts (coordinate calculation and color management), extract
> the x_limit and x_dst computation outside of this helper.
> It also increases the maintainability by grouping the computation related
> to coordinates in the same place: the loop in `blend`.
> 
> Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com>
> ---
>  drivers/gpu/drm/vkms/vkms_composer.c | 40 +++++++++++++++++-------------------
>  1 file changed, 19 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
> index da0651a94c9b..9254086f23ff 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -24,34 +24,30 @@ static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
>  
>  /**
>   * pre_mul_alpha_blend - alpha blending equation
> - * @frame_info: Source framebuffer's metadata
>   * @stage_buffer: The line with the pixels from src_plane
>   * @output_buffer: A line buffer that receives all the blends output
> + * @x_start: The start offset to avoid useless copy

I'd say just:

+ * @x_start: The start offset

It describes the parameter, and the paragraph below explains the why.

It would be explaining, that x_start applies to output_buffer, but
input_buffer is always read starting from 0.

> + * @count: The number of byte to copy

You named it pixel_count, and it counts pixels, not bytes. It's not a
copy but a blend into output_buffer.

>   *
> - * Using the information from the `frame_info`, this blends only the
> - * necessary pixels from the `stage_buffer` to the `output_buffer`
> - * using premultiplied blend formula.
> + * Using @x_start and @count information, only few pixel can be blended instead of the whole line
> + * each time.
>   *
>   * The current DRM assumption is that pixel color values have been already
>   * pre-multiplied with the alpha channel values. See more
>   * drm_plane_create_blend_mode_property(). Also, this formula assumes a
>   * completely opaque background.
>   */
> -static void pre_mul_alpha_blend(struct vkms_frame_info *frame_info,
> -				struct line_buffer *stage_buffer,
> -				struct line_buffer *output_buffer)
> +static void pre_mul_alpha_blend(const struct line_buffer *stage_buffer,
> +				struct line_buffer *output_buffer, int x_start, int pixel_count)
>  {
> -	int x_dst = frame_info->dst.x1;
> -	struct pixel_argb_u16 *out = output_buffer->pixels + x_dst;
> -	struct pixel_argb_u16 *in = stage_buffer->pixels;
> -	int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
> -			    stage_buffer->n_pixels);
> -
> -	for (int x = 0; x < x_limit; x++) {
> -		out[x].a = (u16)0xffff;
> -		out[x].r = pre_mul_blend_channel(in[x].r, out[x].r, in[x].a);
> -		out[x].g = pre_mul_blend_channel(in[x].g, out[x].g, in[x].a);
> -		out[x].b = pre_mul_blend_channel(in[x].b, out[x].b, in[x].a);
> +	struct pixel_argb_u16 *out = &output_buffer->pixels[x_start];
> +	const struct pixel_argb_u16 *in = stage_buffer->pixels;
> +
> +	for (int i = 0; i < pixel_count; i++) {
> +		out[i].a = (u16)0xffff;
> +		out[i].r = pre_mul_blend_channel(in[i].r, out[i].r, in[i].a);
> +		out[i].g = pre_mul_blend_channel(in[i].g, out[i].g, in[i].a);
> +		out[i].b = pre_mul_blend_channel(in[i].b, out[i].b, in[i].a);
>  	}
>  }
>  
> @@ -183,7 +179,7 @@ static void blend(struct vkms_writeback_job *wb,
>  {
>  	struct vkms_plane_state **plane = crtc_state->active_planes;
>  	u32 n_active_planes = crtc_state->num_active_planes;
> -	int y_pos;
> +	int y_pos, x_dst, x_limit;
>  
>  	const struct pixel_argb_u16 background_color = { .a = 0xffff };
>  
> @@ -201,14 +197,16 @@ static void blend(struct vkms_writeback_job *wb,
>  
>  		/* The active planes are composed associatively in z-order. */
>  		for (size_t i = 0; i < n_active_planes; i++) {
> +			x_dst = plane[i]->frame_info->dst.x1;
> +			x_limit = min_t(size_t, drm_rect_width(&plane[i]->frame_info->dst),
> +					stage_buffer->n_pixels);

Are those input values to min_t() really of type size_t? Or why is
size_t here?

>  			y_pos = get_y_pos(plane[i]->frame_info, y);
>  
>  			if (!check_limit(plane[i]->frame_info, y_pos))
>  				continue;
>  
>  			vkms_compose_row(stage_buffer, plane[i], y_pos);
> -			pre_mul_alpha_blend(plane[i]->frame_info, stage_buffer,
> -					    output_buffer);
> +			pre_mul_alpha_blend(stage_buffer, output_buffer, x_dst, x_limit);

I thought it was a count, not a limit?

"Limit" sounds to me like "end", and end - start = count.

>  		}
>  
>  		apply_lut(crtc_state, output_buffer);
> 

The details aside, this is a good move.


Thanks,
pq
Louis Chauvet March 26, 2024, 3:57 p.m. UTC | #2
Le 25/03/24 - 14:41, Pekka Paalanen a écrit :
> On Wed, 13 Mar 2024 18:45:02 +0100
> Louis Chauvet <louis.chauvet@bootlin.com> wrote:
> 
> > The pre_mul_alpha_blend is dedicated to blending, so to avoid mixing
> > different concepts (coordinate calculation and color management), extract
> > the x_limit and x_dst computation outside of this helper.
> > It also increases the maintainability by grouping the computation related
> > to coordinates in the same place: the loop in `blend`.
> > 
> > Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com>
> > ---
> >  drivers/gpu/drm/vkms/vkms_composer.c | 40 +++++++++++++++++-------------------
> >  1 file changed, 19 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
> > index da0651a94c9b..9254086f23ff 100644
> > --- a/drivers/gpu/drm/vkms/vkms_composer.c
> > +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> > @@ -24,34 +24,30 @@ static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
> >  
> >  /**
> >   * pre_mul_alpha_blend - alpha blending equation
> > - * @frame_info: Source framebuffer's metadata
> >   * @stage_buffer: The line with the pixels from src_plane
> >   * @output_buffer: A line buffer that receives all the blends output
> > + * @x_start: The start offset to avoid useless copy
> 
> I'd say just:
> 
> + * @x_start: The start offset
> 
> It describes the parameter, and the paragraph below explains the why.
> 
> It would be explaining, that x_start applies to output_buffer, but
> input_buffer is always read starting from 0.

I will change it to:

 * Using @x_start and @count information, only few pixel can be blended instead of the whole line
 * each time. @x_start is only used for the output buffer. The staging buffer is always read from
 * the start (0..@count in stage_buffer is blended at @x_start..@x_start+@count in output_buffer).

> > + * @count: The number of byte to copy
> 
> You named it pixel_count, and it counts pixels, not bytes. It's not a
> copy but a blend into output_buffer.

Oops, fixed in v6.
 
> >   *
> > - * Using the information from the `frame_info`, this blends only the
> > - * necessary pixels from the `stage_buffer` to the `output_buffer`
> > - * using premultiplied blend formula.
> > + * Using @x_start and @count information, only few pixel can be blended instead of the whole line
> > + * each time.
> >   *
> >   * The current DRM assumption is that pixel color values have been already
> >   * pre-multiplied with the alpha channel values. See more
> >   * drm_plane_create_blend_mode_property(). Also, this formula assumes a
> >   * completely opaque background.
> >   */
> > -static void pre_mul_alpha_blend(struct vkms_frame_info *frame_info,
> > -				struct line_buffer *stage_buffer,
> > -				struct line_buffer *output_buffer)
> > +static void pre_mul_alpha_blend(const struct line_buffer *stage_buffer,
> > +				struct line_buffer *output_buffer, int x_start, int pixel_count)
> >  {
> > -	int x_dst = frame_info->dst.x1;
> > -	struct pixel_argb_u16 *out = output_buffer->pixels + x_dst;
> > -	struct pixel_argb_u16 *in = stage_buffer->pixels;
> > -	int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
> > -			    stage_buffer->n_pixels);
> > -
> > -	for (int x = 0; x < x_limit; x++) {
> > -		out[x].a = (u16)0xffff;
> > -		out[x].r = pre_mul_blend_channel(in[x].r, out[x].r, in[x].a);
> > -		out[x].g = pre_mul_blend_channel(in[x].g, out[x].g, in[x].a);
> > -		out[x].b = pre_mul_blend_channel(in[x].b, out[x].b, in[x].a);
> > +	struct pixel_argb_u16 *out = &output_buffer->pixels[x_start];
> > +	const struct pixel_argb_u16 *in = stage_buffer->pixels;
> > +
> > +	for (int i = 0; i < pixel_count; i++) {
> > +		out[i].a = (u16)0xffff;
> > +		out[i].r = pre_mul_blend_channel(in[i].r, out[i].r, in[i].a);
> > +		out[i].g = pre_mul_blend_channel(in[i].g, out[i].g, in[i].a);
> > +		out[i].b = pre_mul_blend_channel(in[i].b, out[i].b, in[i].a);
> >  	}
> >  }
> >  
> > @@ -183,7 +179,7 @@ static void blend(struct vkms_writeback_job *wb,
> >  {
> >  	struct vkms_plane_state **plane = crtc_state->active_planes;
> >  	u32 n_active_planes = crtc_state->num_active_planes;
> > -	int y_pos;
> > +	int y_pos, x_dst, x_limit;
> >  
> >  	const struct pixel_argb_u16 background_color = { .a = 0xffff };
> >  
> > @@ -201,14 +197,16 @@ static void blend(struct vkms_writeback_job *wb,
> >  
> >  		/* The active planes are composed associatively in z-order. */
> >  		for (size_t i = 0; i < n_active_planes; i++) {
> > +			x_dst = plane[i]->frame_info->dst.x1;
> > +			x_limit = min_t(size_t, drm_rect_width(&plane[i]->frame_info->dst),
> > +					stage_buffer->n_pixels);
> 
> Are those input values to min_t() really of type size_t? Or why is
> size_t here?

n_pixel is size_t, drm_rect_width is int. I will change everything to int. 
Is there a way to ask the compiler "please don't do implicit conversion 
and report them as warn/errors"?

> >  			y_pos = get_y_pos(plane[i]->frame_info, y);
> >  
> >  			if (!check_limit(plane[i]->frame_info, y_pos))
> >  				continue;
> >  
> >  			vkms_compose_row(stage_buffer, plane[i], y_pos);
> > -			pre_mul_alpha_blend(plane[i]->frame_info, stage_buffer,
> > -					    output_buffer);
> > +			pre_mul_alpha_blend(stage_buffer, output_buffer, x_dst, x_limit);
> 
> I thought it was a count, not a limit?
> 
> "Limit" sounds to me like "end", and end - start = count.

It is effectively a pixel count. I just took those naming from the 
original pre_mul_alpha_blend. I will change it to pixel_count.

Thanks,
Louis Chauvet

> >  		}
> >  
> >  		apply_lut(crtc_state, output_buffer);
> > 
> 
> The details aside, this is a good move.
> 
> 
> Thanks,
> pq
Pekka Paalanen March 27, 2024, 11:48 a.m. UTC | #3
On Tue, 26 Mar 2024 16:57:00 +0100
Louis Chauvet <louis.chauvet@bootlin.com> wrote:

> Le 25/03/24 - 14:41, Pekka Paalanen a écrit :
> > On Wed, 13 Mar 2024 18:45:02 +0100
> > Louis Chauvet <louis.chauvet@bootlin.com> wrote:
> >   
> > > The pre_mul_alpha_blend is dedicated to blending, so to avoid mixing
> > > different concepts (coordinate calculation and color management), extract
> > > the x_limit and x_dst computation outside of this helper.
> > > It also increases the maintainability by grouping the computation related
> > > to coordinates in the same place: the loop in `blend`.
> > > 
> > > Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com>
> > > ---
> > >  drivers/gpu/drm/vkms/vkms_composer.c | 40 +++++++++++++++++-------------------
> > >  1 file changed, 19 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
> > > index da0651a94c9b..9254086f23ff 100644
> > > --- a/drivers/gpu/drm/vkms/vkms_composer.c
> > > +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> > > @@ -24,34 +24,30 @@ static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
> > >  
> > >  /**
> > >   * pre_mul_alpha_blend - alpha blending equation
> > > - * @frame_info: Source framebuffer's metadata
> > >   * @stage_buffer: The line with the pixels from src_plane
> > >   * @output_buffer: A line buffer that receives all the blends output
> > > + * @x_start: The start offset to avoid useless copy  
> > 
> > I'd say just:
> > 
> > + * @x_start: The start offset
> > 
> > It describes the parameter, and the paragraph below explains the why.
> > 
> > It would be explaining, that x_start applies to output_buffer, but
> > input_buffer is always read starting from 0.  
> 
> I will change it to:
> 
>  * Using @x_start and @count information, only few pixel can be blended instead of the whole line
>  * each time. @x_start is only used for the output buffer. The staging buffer is always read from
>  * the start (0..@count in stage_buffer is blended at @x_start..@x_start+@count in output_buffer).

The important part is

0..@count in stage_buffer is blended at @x_start..@x_start+@count in output_buffer

and everything else from that paragraph is not really adding much.

Remember to update the doc in "drm/vkms: Re-introduce line-per-line
composition  algorithm" to follow the changes.


> > > + * @count: The number of byte to copy  
> > 
> > You named it pixel_count, and it counts pixels, not bytes. It's not a
> > copy but a blend into output_buffer.  
> 
> Oops, fixed in v6.
>  
> > >   *
> > > - * Using the information from the `frame_info`, this blends only the
> > > - * necessary pixels from the `stage_buffer` to the `output_buffer`
> > > - * using premultiplied blend formula.
> > > + * Using @x_start and @count information, only few pixel can be blended instead of the whole line
> > > + * each time.
> > >   *
> > >   * The current DRM assumption is that pixel color values have been already
> > >   * pre-multiplied with the alpha channel values. See more
> > >   * drm_plane_create_blend_mode_property(). Also, this formula assumes a
> > >   * completely opaque background.
> > >   */
> > > -static void pre_mul_alpha_blend(struct vkms_frame_info *frame_info,
> > > -				struct line_buffer *stage_buffer,
> > > -				struct line_buffer *output_buffer)
> > > +static void pre_mul_alpha_blend(const struct line_buffer *stage_buffer,
> > > +				struct line_buffer *output_buffer, int x_start, int pixel_count)
> > >  {
> > > -	int x_dst = frame_info->dst.x1;
> > > -	struct pixel_argb_u16 *out = output_buffer->pixels + x_dst;
> > > -	struct pixel_argb_u16 *in = stage_buffer->pixels;
> > > -	int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
> > > -			    stage_buffer->n_pixels);
> > > -
> > > -	for (int x = 0; x < x_limit; x++) {
> > > -		out[x].a = (u16)0xffff;
> > > -		out[x].r = pre_mul_blend_channel(in[x].r, out[x].r, in[x].a);
> > > -		out[x].g = pre_mul_blend_channel(in[x].g, out[x].g, in[x].a);
> > > -		out[x].b = pre_mul_blend_channel(in[x].b, out[x].b, in[x].a);
> > > +	struct pixel_argb_u16 *out = &output_buffer->pixels[x_start];
> > > +	const struct pixel_argb_u16 *in = stage_buffer->pixels;
> > > +
> > > +	for (int i = 0; i < pixel_count; i++) {
> > > +		out[i].a = (u16)0xffff;
> > > +		out[i].r = pre_mul_blend_channel(in[i].r, out[i].r, in[i].a);
> > > +		out[i].g = pre_mul_blend_channel(in[i].g, out[i].g, in[i].a);
> > > +		out[i].b = pre_mul_blend_channel(in[i].b, out[i].b, in[i].a);
> > >  	}
> > >  }
> > >  
> > > @@ -183,7 +179,7 @@ static void blend(struct vkms_writeback_job *wb,
> > >  {
> > >  	struct vkms_plane_state **plane = crtc_state->active_planes;
> > >  	u32 n_active_planes = crtc_state->num_active_planes;
> > > -	int y_pos;
> > > +	int y_pos, x_dst, x_limit;
> > >  
> > >  	const struct pixel_argb_u16 background_color = { .a = 0xffff };
> > >  
> > > @@ -201,14 +197,16 @@ static void blend(struct vkms_writeback_job *wb,
> > >  
> > >  		/* The active planes are composed associatively in z-order. */
> > >  		for (size_t i = 0; i < n_active_planes; i++) {
> > > +			x_dst = plane[i]->frame_info->dst.x1;
> > > +			x_limit = min_t(size_t, drm_rect_width(&plane[i]->frame_info->dst),
> > > +					stage_buffer->n_pixels);  
> > 
> > Are those input values to min_t() really of type size_t? Or why is
> > size_t here?  
> 
> n_pixel is size_t, drm_rect_width is int. I will change everything to int. 
> Is there a way to ask the compiler "please don't do implicit conversion 
> and report them as warn/errors"?

There probably is, you can find it in the gcc manual. However, I suspect
you would drown in warnings for cases where the implicit conversion is
wanted and an explicit cast is unwanted.


Thanks,
pq

> > >  			y_pos = get_y_pos(plane[i]->frame_info, y);
> > >  
> > >  			if (!check_limit(plane[i]->frame_info, y_pos))
> > >  				continue;
> > >  
> > >  			vkms_compose_row(stage_buffer, plane[i], y_pos);
> > > -			pre_mul_alpha_blend(plane[i]->frame_info, stage_buffer,
> > > -					    output_buffer);
> > > +			pre_mul_alpha_blend(stage_buffer, output_buffer, x_dst, x_limit);  
> > 
> > I thought it was a count, not a limit?
> > 
> > "Limit" sounds to me like "end", and end - start = count.  
> 
> It is effectively a pixel count. I just took those naming from the 
> original pre_mul_alpha_blend. I will change it to pixel_count.
> 
> Thanks,
> Louis Chauvet
> 
> > >  		}
> > >  
> > >  		apply_lut(crtc_state, output_buffer);
> > >   
> > 
> > The details aside, this is a good move.
> > 
> > 
> > Thanks,
> > pq  
> 
> 
>
Louis Chauvet April 8, 2024, 7:50 a.m. UTC | #4
Le 27/03/24 - 13:48, Pekka Paalanen a écrit :
> On Tue, 26 Mar 2024 16:57:00 +0100
> Louis Chauvet <louis.chauvet@bootlin.com> wrote:
> 
> > Le 25/03/24 - 14:41, Pekka Paalanen a écrit :
> > > On Wed, 13 Mar 2024 18:45:02 +0100
> > > Louis Chauvet <louis.chauvet@bootlin.com> wrote:
> > >   
> > > > The pre_mul_alpha_blend is dedicated to blending, so to avoid mixing
> > > > different concepts (coordinate calculation and color management), extract
> > > > the x_limit and x_dst computation outside of this helper.
> > > > It also increases the maintainability by grouping the computation related
> > > > to coordinates in the same place: the loop in `blend`.
> > > > 
> > > > Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com>
> > > > ---
> > > >  drivers/gpu/drm/vkms/vkms_composer.c | 40 +++++++++++++++++-------------------
> > > >  1 file changed, 19 insertions(+), 21 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
> > > > index da0651a94c9b..9254086f23ff 100644
> > > > --- a/drivers/gpu/drm/vkms/vkms_composer.c
> > > > +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> > > > @@ -24,34 +24,30 @@ static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
> > > >  
> > > >  /**
> > > >   * pre_mul_alpha_blend - alpha blending equation
> > > > - * @frame_info: Source framebuffer's metadata
> > > >   * @stage_buffer: The line with the pixels from src_plane
> > > >   * @output_buffer: A line buffer that receives all the blends output
> > > > + * @x_start: The start offset to avoid useless copy  
> > > 
> > > I'd say just:
> > > 
> > > + * @x_start: The start offset
> > > 
> > > It describes the parameter, and the paragraph below explains the why.
> > > 
> > > It would be explaining, that x_start applies to output_buffer, but
> > > input_buffer is always read starting from 0.  
> > 
> > I will change it to:
> > 
> >  * Using @x_start and @count information, only few pixel can be blended instead of the whole line
> >  * each time. @x_start is only used for the output buffer. The staging buffer is always read from
> >  * the start (0..@count in stage_buffer is blended at @x_start..@x_start+@count in output_buffer).
> 
> The important part is
> 
> 0..@count in stage_buffer is blended at @x_start..@x_start+@count in output_buffer
> 
> and everything else from that paragraph is not really adding much.

Ok, I will only keep this sentence.
 
> Remember to update the doc in "drm/vkms: Re-introduce line-per-line
> composition  algorithm" to follow the changes.

Thanks for the reminder, I will check!

> 
> > > > + * @count: The number of byte to copy  
> > > 
> > > You named it pixel_count, and it counts pixels, not bytes. It's not a
> > > copy but a blend into output_buffer.  
> > 
> > Oops, fixed in v6.
> >  
> > > >   *
> > > > - * Using the information from the `frame_info`, this blends only the
> > > > - * necessary pixels from the `stage_buffer` to the `output_buffer`
> > > > - * using premultiplied blend formula.
> > > > + * Using @x_start and @count information, only few pixel can be blended instead of the whole line
> > > > + * each time.
> > > >   *
> > > >   * The current DRM assumption is that pixel color values have been already
> > > >   * pre-multiplied with the alpha channel values. See more
> > > >   * drm_plane_create_blend_mode_property(). Also, this formula assumes a
> > > >   * completely opaque background.
> > > >   */
> > > > -static void pre_mul_alpha_blend(struct vkms_frame_info *frame_info,
> > > > -				struct line_buffer *stage_buffer,
> > > > -				struct line_buffer *output_buffer)
> > > > +static void pre_mul_alpha_blend(const struct line_buffer *stage_buffer,
> > > > +				struct line_buffer *output_buffer, int x_start, int pixel_count)
> > > >  {
> > > > -	int x_dst = frame_info->dst.x1;
> > > > -	struct pixel_argb_u16 *out = output_buffer->pixels + x_dst;
> > > > -	struct pixel_argb_u16 *in = stage_buffer->pixels;
> > > > -	int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
> > > > -			    stage_buffer->n_pixels);
> > > > -
> > > > -	for (int x = 0; x < x_limit; x++) {
> > > > -		out[x].a = (u16)0xffff;
> > > > -		out[x].r = pre_mul_blend_channel(in[x].r, out[x].r, in[x].a);
> > > > -		out[x].g = pre_mul_blend_channel(in[x].g, out[x].g, in[x].a);
> > > > -		out[x].b = pre_mul_blend_channel(in[x].b, out[x].b, in[x].a);
> > > > +	struct pixel_argb_u16 *out = &output_buffer->pixels[x_start];
> > > > +	const struct pixel_argb_u16 *in = stage_buffer->pixels;
> > > > +
> > > > +	for (int i = 0; i < pixel_count; i++) {
> > > > +		out[i].a = (u16)0xffff;
> > > > +		out[i].r = pre_mul_blend_channel(in[i].r, out[i].r, in[i].a);
> > > > +		out[i].g = pre_mul_blend_channel(in[i].g, out[i].g, in[i].a);
> > > > +		out[i].b = pre_mul_blend_channel(in[i].b, out[i].b, in[i].a);
> > > >  	}
> > > >  }
> > > >  
> > > > @@ -183,7 +179,7 @@ static void blend(struct vkms_writeback_job *wb,
> > > >  {
> > > >  	struct vkms_plane_state **plane = crtc_state->active_planes;
> > > >  	u32 n_active_planes = crtc_state->num_active_planes;
> > > > -	int y_pos;
> > > > +	int y_pos, x_dst, x_limit;
> > > >  
> > > >  	const struct pixel_argb_u16 background_color = { .a = 0xffff };
> > > >  
> > > > @@ -201,14 +197,16 @@ static void blend(struct vkms_writeback_job *wb,
> > > >  
> > > >  		/* The active planes are composed associatively in z-order. */
> > > >  		for (size_t i = 0; i < n_active_planes; i++) {
> > > > +			x_dst = plane[i]->frame_info->dst.x1;
> > > > +			x_limit = min_t(size_t, drm_rect_width(&plane[i]->frame_info->dst),
> > > > +					stage_buffer->n_pixels);  
> > > 
> > > Are those input values to min_t() really of type size_t? Or why is
> > > size_t here?  
> > 
> > n_pixel is size_t, drm_rect_width is int. I will change everything to int. 
> > Is there a way to ask the compiler "please don't do implicit conversion 
> > and report them as warn/errors"?
> 
> There probably is, you can find it in the gcc manual. However, I suspect
> you would drown in warnings for cases where the implicit conversion is
> wanted and an explicit cast is unwanted.

That true, I found it (-Wconversion), but very noisy...

Thanks,
Louis Chauvet

> 
> Thanks,
> pq
> 
> > > >  			y_pos = get_y_pos(plane[i]->frame_info, y);
> > > >  
> > > >  			if (!check_limit(plane[i]->frame_info, y_pos))
> > > >  				continue;
> > > >  
> > > >  			vkms_compose_row(stage_buffer, plane[i], y_pos);
> > > > -			pre_mul_alpha_blend(plane[i]->frame_info, stage_buffer,
> > > > -					    output_buffer);
> > > > +			pre_mul_alpha_blend(stage_buffer, output_buffer, x_dst, x_limit);  
> > > 
> > > I thought it was a count, not a limit?
> > > 
> > > "Limit" sounds to me like "end", and end - start = count.  
> > 
> > It is effectively a pixel count. I just took those naming from the 
> > original pre_mul_alpha_blend. I will change it to pixel_count.
> > 
> > Thanks,
> > Louis Chauvet
> > 
> > > >  		}
> > > >  
> > > >  		apply_lut(crtc_state, output_buffer);
> > > >   
> > > 
> > > The details aside, this is a good move.
> > > 
> > > 
> > > Thanks,
> > > pq  
> > 
> > 
> > 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
index da0651a94c9b..9254086f23ff 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -24,34 +24,30 @@  static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
 
 /**
  * pre_mul_alpha_blend - alpha blending equation
- * @frame_info: Source framebuffer's metadata
  * @stage_buffer: The line with the pixels from src_plane
  * @output_buffer: A line buffer that receives all the blends output
+ * @x_start: The start offset to avoid useless copy
+ * @count: The number of byte to copy
  *
- * Using the information from the `frame_info`, this blends only the
- * necessary pixels from the `stage_buffer` to the `output_buffer`
- * using premultiplied blend formula.
+ * Using @x_start and @count information, only few pixel can be blended instead of the whole line
+ * each time.
  *
  * The current DRM assumption is that pixel color values have been already
  * pre-multiplied with the alpha channel values. See more
  * drm_plane_create_blend_mode_property(). Also, this formula assumes a
  * completely opaque background.
  */
-static void pre_mul_alpha_blend(struct vkms_frame_info *frame_info,
-				struct line_buffer *stage_buffer,
-				struct line_buffer *output_buffer)
+static void pre_mul_alpha_blend(const struct line_buffer *stage_buffer,
+				struct line_buffer *output_buffer, int x_start, int pixel_count)
 {
-	int x_dst = frame_info->dst.x1;
-	struct pixel_argb_u16 *out = output_buffer->pixels + x_dst;
-	struct pixel_argb_u16 *in = stage_buffer->pixels;
-	int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
-			    stage_buffer->n_pixels);
-
-	for (int x = 0; x < x_limit; x++) {
-		out[x].a = (u16)0xffff;
-		out[x].r = pre_mul_blend_channel(in[x].r, out[x].r, in[x].a);
-		out[x].g = pre_mul_blend_channel(in[x].g, out[x].g, in[x].a);
-		out[x].b = pre_mul_blend_channel(in[x].b, out[x].b, in[x].a);
+	struct pixel_argb_u16 *out = &output_buffer->pixels[x_start];
+	const struct pixel_argb_u16 *in = stage_buffer->pixels;
+
+	for (int i = 0; i < pixel_count; i++) {
+		out[i].a = (u16)0xffff;
+		out[i].r = pre_mul_blend_channel(in[i].r, out[i].r, in[i].a);
+		out[i].g = pre_mul_blend_channel(in[i].g, out[i].g, in[i].a);
+		out[i].b = pre_mul_blend_channel(in[i].b, out[i].b, in[i].a);
 	}
 }
 
@@ -183,7 +179,7 @@  static void blend(struct vkms_writeback_job *wb,
 {
 	struct vkms_plane_state **plane = crtc_state->active_planes;
 	u32 n_active_planes = crtc_state->num_active_planes;
-	int y_pos;
+	int y_pos, x_dst, x_limit;
 
 	const struct pixel_argb_u16 background_color = { .a = 0xffff };
 
@@ -201,14 +197,16 @@  static void blend(struct vkms_writeback_job *wb,
 
 		/* The active planes are composed associatively in z-order. */
 		for (size_t i = 0; i < n_active_planes; i++) {
+			x_dst = plane[i]->frame_info->dst.x1;
+			x_limit = min_t(size_t, drm_rect_width(&plane[i]->frame_info->dst),
+					stage_buffer->n_pixels);
 			y_pos = get_y_pos(plane[i]->frame_info, y);
 
 			if (!check_limit(plane[i]->frame_info, y_pos))
 				continue;
 
 			vkms_compose_row(stage_buffer, plane[i], y_pos);
-			pre_mul_alpha_blend(plane[i]->frame_info, stage_buffer,
-					    output_buffer);
+			pre_mul_alpha_blend(stage_buffer, output_buffer, x_dst, x_limit);
 		}
 
 		apply_lut(crtc_state, output_buffer);