Message ID | 20240226-yuv-v3-3-ff662f0994db@bootlin.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/vkms: Reimplement line-per-line pixel conversion for plane reading | expand |
On 26/02/24 05:46, Louis Chauvet wrote: > Add some documentation on pixel conversion functions. > Update of outdated comments for pixel_write functions. > > Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com> > --- > drivers/gpu/drm/vkms/vkms_composer.c | 4 +++ > drivers/gpu/drm/vkms/vkms_drv.h | 13 ++++++++ > drivers/gpu/drm/vkms/vkms_formats.c | 58 ++++++++++++++++++++++++++++++------ > 3 files changed, 66 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c > index c6d9b4a65809..5b341222d239 100644 > --- a/drivers/gpu/drm/vkms/vkms_composer.c > +++ b/drivers/gpu/drm/vkms/vkms_composer.c > @@ -189,6 +189,10 @@ static void blend(struct vkms_writeback_job *wb, > > size_t crtc_y_limit = crtc_state->base.crtc->mode.vdisplay; > > + /* > + * The planes are composed line-by-line. It is a necessary complexity to avoid poor > + * blending performance. At this moment in the series, you have not yet reintroduced the line-by-line algorithm yet. Maybe it's better to add this comment when you do. Also, I think it's good to give more context, like: "The planes are composed line-by-line, instead of pixel-by-pixel" Best Regards, ~Arthur Grillo > + */ > for (size_t y = 0; y < crtc_y_limit; y++) { > fill_background(&background_color, output_buffer); > > diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h > index b4b357447292..18086423a3a7 100644 > --- a/drivers/gpu/drm/vkms/vkms_drv.h > +++ b/drivers/gpu/drm/vkms/vkms_drv.h > @@ -25,6 +25,17 @@ > > #define VKMS_LUT_SIZE 256 > > +/** > + * struct vkms_frame_info - structure to store the state of a frame > + * > + * @fb: backing drm framebuffer > + * @src: source rectangle of this frame in the source framebuffer > + * @dst: destination rectangle in the crtc buffer > + * @map: see drm_shadow_plane_state@data > + * @rotation: rotation applied to the source. > + * > + * @src and @dst should have the same size modulo the rotation. > + */ > struct vkms_frame_info { > struct drm_framebuffer *fb; > struct drm_rect src, dst; > @@ -52,6 +63,8 @@ struct vkms_writeback_job { > * vkms_plane_state - Driver specific plane state > * @base: base plane state > * @frame_info: data required for composing computation > + * @pixel_read: function to read a pixel in this plane. The creator of a vkms_plane_state must > + * ensure that this pointer is valid > */ > struct vkms_plane_state { > struct drm_shadow_plane_state base; > diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c > index 172830a3936a..cb7a49b7c8e7 100644 > --- a/drivers/gpu/drm/vkms/vkms_formats.c > +++ b/drivers/gpu/drm/vkms/vkms_formats.c > @@ -9,6 +9,17 @@ > > #include "vkms_formats.h" > > +/** > + * packed_pixels_offset() - Get the offset of the block containing the pixel at coordinates x/y > + * in the first plane > + * > + * @frame_info: Buffer metadata > + * @x: The x coordinate of the wanted pixel in the buffer > + * @y: The y coordinate of the wanted pixel in the buffer > + * > + * The caller must be aware that this offset is not always a pointer to a pixel. If individual > + * pixel values are needed, they have to be extracted from the resulting block. > + */ > static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int y) > { > struct drm_framebuffer *fb = frame_info->fb; > @@ -17,12 +28,13 @@ static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int > + (x * fb->format->cpp[0]); > } > > -/* > - * packed_pixels_addr - Get the pointer to pixel of a given pair of coordinates > +/** > + * packed_pixels_addr() - Get the pointer to the block containing the pixel at the given > + * coordinates > * > * @frame_info: Buffer metadata > - * @x: The x(width) coordinate of the 2D buffer > - * @y: The y(Heigth) coordinate of the 2D buffer > + * @x: The x(width) coordinate inside the plane > + * @y: The y(height) coordinate inside the plane > * > * Takes the information stored in the frame_info, a pair of coordinates, and > * returns the address of the first color channel. > @@ -53,6 +65,13 @@ static int get_x_position(const struct vkms_frame_info *frame_info, int limit, i > return x; > } > > +/* > + * The following functions take pixel data from the buffer and convert them to the format > + * ARGB16161616 in out_pixel. > + * > + * They are used in the `vkms_compose_row` function to handle multiple formats. > + */ > + > static void ARGB8888_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) > { > /* > @@ -145,12 +164,11 @@ void vkms_compose_row(struct line_buffer *stage_buffer, struct vkms_plane_state > } > > /* > - * The following functions take an line of argb_u16 pixels from the > - * src_buffer, convert them to a specific format, and store them in the > - * destination. > + * The following functions take one argb_u16 pixel and convert it to a specific format. The > + * result is stored in @dst_pixels. > * > - * They are used in the `compose_active_planes` to convert and store a line > - * from the src_buffer to the writeback buffer. > + * They are used in the `vkms_writeback_row` to convert and store a pixel from the src_buffer to > + * the writeback buffer. > */ > static void argb_u16_to_ARGB8888(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) > { > @@ -216,6 +234,14 @@ static void argb_u16_to_RGB565(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) > *pixels = cpu_to_le16(r << 11 | g << 5 | b); > } > > +/** > + * Generic loop for all supported writeback format. It is executed just after the blending to > + * write a line in the writeback buffer. > + * > + * @wb: Job where to insert the final image > + * @src_buffer: Line to write > + * @y: Row to write in the writeback buffer > + */ > void vkms_writeback_row(struct vkms_writeback_job *wb, > const struct line_buffer *src_buffer, int y) > { > @@ -229,6 +255,13 @@ void vkms_writeback_row(struct vkms_writeback_job *wb, > wb->pixel_write(dst_pixels, &in_pixels[x]); > } > > +/** > + * Retrieve the correct read_pixel function for a specific format. > + * The returned pointer is NULL for unsupported pixel formats. The caller must ensure that the > + * pointer is valid before using it in a vkms_plane_state. > + * > + * @format: 4cc of the format > + */ > void *get_pixel_conversion_function(u32 format) > { > switch (format) { > @@ -247,6 +280,13 @@ void *get_pixel_conversion_function(u32 format) > } > } > > +/** > + * Retrieve the correct write_pixel function for a specific format. > + * The returned pointer is NULL for unsupported pixel formats. The caller must ensure that the > + * pointer is valid before using it in a vkms_writeback_job. > + * > + * @format: 4cc of the format > + */ > void *get_pixel_write_function(u32 format) > { > switch (format) { >
Le 26/02/24 - 10:07, Arthur Grillo a écrit : > > > On 26/02/24 05:46, Louis Chauvet wrote: > > Add some documentation on pixel conversion functions. > > Update of outdated comments for pixel_write functions. > > > > Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com> > > --- > > drivers/gpu/drm/vkms/vkms_composer.c | 4 +++ > > drivers/gpu/drm/vkms/vkms_drv.h | 13 ++++++++ > > drivers/gpu/drm/vkms/vkms_formats.c | 58 ++++++++++++++++++++++++++++++------ > > 3 files changed, 66 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c > > index c6d9b4a65809..5b341222d239 100644 > > --- a/drivers/gpu/drm/vkms/vkms_composer.c > > +++ b/drivers/gpu/drm/vkms/vkms_composer.c > > @@ -189,6 +189,10 @@ static void blend(struct vkms_writeback_job *wb, > > > > size_t crtc_y_limit = crtc_state->base.crtc->mode.vdisplay; > > > > + /* > > + * The planes are composed line-by-line. It is a necessary complexity to avoid poor > > + * blending performance. > > At this moment in the series, you have not yet reintroduced the > line-by-line algorithm yet. Maybe it's better to add this comment when > you do. Is it better with this: /* * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary * complexity to avoid poor blending performance. * * The function vkms_compose_row is used to read a line, pixel-by-pixel, into the staging * buffer. */ > Also, I think it's good to give more context, like: > "The planes are composed line-by-line, instead of pixel-by-pixel" And after PATCHv3 5/9: /* * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary * complexity to avoid poor blending performance. * * The function pixel_read_line callback is used to read a line, using an efficient * algorithm for a specific format, into the staging buffer. */ Kind regards, Louis Chauvet > Best Regards, > ~Arthur Grillo [...]
On 27/02/24 12:02, Louis Chauvet wrote: > Le 26/02/24 - 10:07, Arthur Grillo a écrit : >> >> >> On 26/02/24 05:46, Louis Chauvet wrote: >>> Add some documentation on pixel conversion functions. >>> Update of outdated comments for pixel_write functions. >>> >>> Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com> >>> --- >>> drivers/gpu/drm/vkms/vkms_composer.c | 4 +++ >>> drivers/gpu/drm/vkms/vkms_drv.h | 13 ++++++++ >>> drivers/gpu/drm/vkms/vkms_formats.c | 58 ++++++++++++++++++++++++++++++------ >>> 3 files changed, 66 insertions(+), 9 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c >>> index c6d9b4a65809..5b341222d239 100644 >>> --- a/drivers/gpu/drm/vkms/vkms_composer.c >>> +++ b/drivers/gpu/drm/vkms/vkms_composer.c >>> @@ -189,6 +189,10 @@ static void blend(struct vkms_writeback_job *wb, >>> >>> size_t crtc_y_limit = crtc_state->base.crtc->mode.vdisplay; >>> >>> + /* >>> + * The planes are composed line-by-line. It is a necessary complexity to avoid poor >>> + * blending performance. >> >> At this moment in the series, you have not yet reintroduced the >> line-by-line algorithm yet. Maybe it's better to add this comment when >> you do. > > Is it better with this: > > /* > * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary > * complexity to avoid poor blending performance. > * > * The function vkms_compose_row is used to read a line, pixel-by-pixel, into the staging > * buffer. > */ > >> Also, I think it's good to give more context, like: >> "The planes are composed line-by-line, instead of pixel-by-pixel" > > And after PATCHv3 5/9: > > /* > * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary > * complexity to avoid poor blending performance. > * > * The function pixel_read_line callback is used to read a line, using an efficient > * algorithm for a specific format, into the staging buffer. > */ > Hi, This looks good to me. Best Regards, ~Arthur Grillo > Kind regards, > Louis Chauvet > >> Best Regards, >> ~Arthur Grillo > > [...] >
On Tue, 27 Feb 2024 15:47:08 -0300 Arthur Grillo <arthurgrillo@riseup.net> wrote: > On 27/02/24 12:02, Louis Chauvet wrote: > > Le 26/02/24 - 10:07, Arthur Grillo a écrit : > >> > >> > >> On 26/02/24 05:46, Louis Chauvet wrote: > >>> Add some documentation on pixel conversion functions. > >>> Update of outdated comments for pixel_write functions. > >>> > >>> Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com> > >>> --- > >>> drivers/gpu/drm/vkms/vkms_composer.c | 4 +++ > >>> drivers/gpu/drm/vkms/vkms_drv.h | 13 ++++++++ > >>> drivers/gpu/drm/vkms/vkms_formats.c | 58 ++++++++++++++++++++++++++++++------ > >>> 3 files changed, 66 insertions(+), 9 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c > >>> index c6d9b4a65809..5b341222d239 100644 > >>> --- a/drivers/gpu/drm/vkms/vkms_composer.c > >>> +++ b/drivers/gpu/drm/vkms/vkms_composer.c > >>> @@ -189,6 +189,10 @@ static void blend(struct vkms_writeback_job *wb, > >>> > >>> size_t crtc_y_limit = crtc_state->base.crtc->mode.vdisplay; > >>> > >>> + /* > >>> + * The planes are composed line-by-line. It is a necessary complexity to avoid poor > >>> + * blending performance. > >> > >> At this moment in the series, you have not yet reintroduced the > >> line-by-line algorithm yet. Maybe it's better to add this comment when > >> you do. > > > > Is it better with this: > > > > /* > > * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary > > * complexity to avoid poor blending performance. > > * > > * The function vkms_compose_row is used to read a line, pixel-by-pixel, into the staging > > * buffer. > > */ > > > >> Also, I think it's good to give more context, like: > >> "The planes are composed line-by-line, instead of pixel-by-pixel" > > > > And after PATCHv3 5/9: > > > > /* > > * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary > > * complexity to avoid poor blending performance. > > * > > * The function pixel_read_line callback is used to read a line, using an efficient > > * algorithm for a specific format, into the staging buffer. > > */ > > Hi, there are a few reasons for the line-by-line algorithm, and the optimizations at large: VKMS uses temporary stage and output buffers so that blending functions can operate on just one high-precision pixel format, struct pixel_argb_u16. We can make pixel-format-specific read and write functions completely orthogonal from the blending operations and FB format combinations. This avoids a combinatorial explosion of needed functions for { input pixel formats × blending operations × output pixel formats }. We can use a temporary stage and output buffer whose size is one line and not whole FB or CRTC framebuffer. This is the memory savings. Using a temporary output buffer also avoids repeated read-decode-blend-encode-write cycles into the final destination buffer, as we don't need to decode/encode the pixel format. Finally, doing elementary operations (read, blend, write) line-by-line is much more efficient than pixel-by-pixel, because it allows making the inner-most loop very tight. It avoids repeatedly computing a result that does not change, like which function to call for a specific pixel format or blending equation. Thanks, pq
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index c6d9b4a65809..5b341222d239 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -189,6 +189,10 @@ static void blend(struct vkms_writeback_job *wb, size_t crtc_y_limit = crtc_state->base.crtc->mode.vdisplay; + /* + * The planes are composed line-by-line. It is a necessary complexity to avoid poor + * blending performance. + */ for (size_t y = 0; y < crtc_y_limit; y++) { fill_background(&background_color, output_buffer); diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index b4b357447292..18086423a3a7 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -25,6 +25,17 @@ #define VKMS_LUT_SIZE 256 +/** + * struct vkms_frame_info - structure to store the state of a frame + * + * @fb: backing drm framebuffer + * @src: source rectangle of this frame in the source framebuffer + * @dst: destination rectangle in the crtc buffer + * @map: see drm_shadow_plane_state@data + * @rotation: rotation applied to the source. + * + * @src and @dst should have the same size modulo the rotation. + */ struct vkms_frame_info { struct drm_framebuffer *fb; struct drm_rect src, dst; @@ -52,6 +63,8 @@ struct vkms_writeback_job { * vkms_plane_state - Driver specific plane state * @base: base plane state * @frame_info: data required for composing computation + * @pixel_read: function to read a pixel in this plane. The creator of a vkms_plane_state must + * ensure that this pointer is valid */ struct vkms_plane_state { struct drm_shadow_plane_state base; diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index 172830a3936a..cb7a49b7c8e7 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -9,6 +9,17 @@ #include "vkms_formats.h" +/** + * packed_pixels_offset() - Get the offset of the block containing the pixel at coordinates x/y + * in the first plane + * + * @frame_info: Buffer metadata + * @x: The x coordinate of the wanted pixel in the buffer + * @y: The y coordinate of the wanted pixel in the buffer + * + * The caller must be aware that this offset is not always a pointer to a pixel. If individual + * pixel values are needed, they have to be extracted from the resulting block. + */ static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int y) { struct drm_framebuffer *fb = frame_info->fb; @@ -17,12 +28,13 @@ static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int + (x * fb->format->cpp[0]); } -/* - * packed_pixels_addr - Get the pointer to pixel of a given pair of coordinates +/** + * packed_pixels_addr() - Get the pointer to the block containing the pixel at the given + * coordinates * * @frame_info: Buffer metadata - * @x: The x(width) coordinate of the 2D buffer - * @y: The y(Heigth) coordinate of the 2D buffer + * @x: The x(width) coordinate inside the plane + * @y: The y(height) coordinate inside the plane * * Takes the information stored in the frame_info, a pair of coordinates, and * returns the address of the first color channel. @@ -53,6 +65,13 @@ static int get_x_position(const struct vkms_frame_info *frame_info, int limit, i return x; } +/* + * The following functions take pixel data from the buffer and convert them to the format + * ARGB16161616 in out_pixel. + * + * They are used in the `vkms_compose_row` function to handle multiple formats. + */ + static void ARGB8888_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) { /* @@ -145,12 +164,11 @@ void vkms_compose_row(struct line_buffer *stage_buffer, struct vkms_plane_state } /* - * The following functions take an line of argb_u16 pixels from the - * src_buffer, convert them to a specific format, and store them in the - * destination. + * The following functions take one argb_u16 pixel and convert it to a specific format. The + * result is stored in @dst_pixels. * - * They are used in the `compose_active_planes` to convert and store a line - * from the src_buffer to the writeback buffer. + * They are used in the `vkms_writeback_row` to convert and store a pixel from the src_buffer to + * the writeback buffer. */ static void argb_u16_to_ARGB8888(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) { @@ -216,6 +234,14 @@ static void argb_u16_to_RGB565(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) *pixels = cpu_to_le16(r << 11 | g << 5 | b); } +/** + * Generic loop for all supported writeback format. It is executed just after the blending to + * write a line in the writeback buffer. + * + * @wb: Job where to insert the final image + * @src_buffer: Line to write + * @y: Row to write in the writeback buffer + */ void vkms_writeback_row(struct vkms_writeback_job *wb, const struct line_buffer *src_buffer, int y) { @@ -229,6 +255,13 @@ void vkms_writeback_row(struct vkms_writeback_job *wb, wb->pixel_write(dst_pixels, &in_pixels[x]); } +/** + * Retrieve the correct read_pixel function for a specific format. + * The returned pointer is NULL for unsupported pixel formats. The caller must ensure that the + * pointer is valid before using it in a vkms_plane_state. + * + * @format: 4cc of the format + */ void *get_pixel_conversion_function(u32 format) { switch (format) { @@ -247,6 +280,13 @@ void *get_pixel_conversion_function(u32 format) } } +/** + * Retrieve the correct write_pixel function for a specific format. + * The returned pointer is NULL for unsupported pixel formats. The caller must ensure that the + * pointer is valid before using it in a vkms_writeback_job. + * + * @format: 4cc of the format + */ void *get_pixel_write_function(u32 format) { switch (format) {
Add some documentation on pixel conversion functions. Update of outdated comments for pixel_write functions. Signed-off-by: Louis Chauvet <louis.chauvet@bootlin.com> --- drivers/gpu/drm/vkms/vkms_composer.c | 4 +++ drivers/gpu/drm/vkms/vkms_drv.h | 13 ++++++++ drivers/gpu/drm/vkms/vkms_formats.c | 58 ++++++++++++++++++++++++++++++------ 3 files changed, 66 insertions(+), 9 deletions(-)