Message ID | 20250325110407.81107-7-tzimmermann@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/format-helper: Add helpers for line conversion | expand |
On 25/03/2025 11:31, Thomas Zimmermann wrote: > For ease of implementation, existing line-conversion functions > for 24-bit formats write each byte individually. Optimize the > performance by writing 4 pixels in 3 32-bit stores. > Thanks, it looks good to me. Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com> > Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> > --- > drivers/gpu/drm/drm_format_helper.c | 36 ++++++++++++++++++++++++++++- > 1 file changed, 35 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c > index a926aa6671fc..b9c9c712aa9c 100644 > --- a/drivers/gpu/drm/drm_format_helper.c > +++ b/drivers/gpu/drm/drm_format_helper.c > @@ -274,10 +274,44 @@ static __always_inline void drm_fb_xfrm_line_32to24(void *dbuf, const void *sbuf > unsigned int pixels, > u32 (*xfrm_pixel)(u32)) > { > - u8 *dbuf8 = dbuf; > + __le32 *dbuf32 = dbuf; > + u8 *dbuf8; > const __le32 *sbuf32 = sbuf; > const __le32 *send32 = sbuf32 + pixels; > > + /* write pixels in chunks of 4 */ > + send32 -= pixels & GENMASK(1, 0); > + while (sbuf32 < send32) { > + u32 val24[4] = { > + xfrm_pixel(le32_to_cpup(sbuf32++)), > + xfrm_pixel(le32_to_cpup(sbuf32++)), > + xfrm_pixel(le32_to_cpup(sbuf32++)), > + xfrm_pixel(le32_to_cpup(sbuf32++)), > + }; > + u32 out32[3] = { > + /* write output bytes in reverse order for little endianness */ > + ((val24[0] & 0x000000ff)) | > + ((val24[0] & 0x0000ff00)) | > + ((val24[0] & 0x00ff0000)) | > + ((val24[1] & 0x000000ff) << 24), > + ((val24[1] & 0x0000ff00) >> 8) | > + ((val24[1] & 0x00ff0000) >> 8) | > + ((val24[2] & 0x000000ff) << 16) | > + ((val24[2] & 0x0000ff00) << 16), > + ((val24[2] & 0x00ff0000) >> 16) | > + ((val24[3] & 0x000000ff) << 8) | > + ((val24[3] & 0x0000ff00) << 8) | > + ((val24[3] & 0x00ff0000) << 8), > + }; > + > + *dbuf32++ = cpu_to_le32(out32[0]); > + *dbuf32++ = cpu_to_le32(out32[1]); > + *dbuf32++ = cpu_to_le32(out32[2]); > + } > + send32 += pixels & GENMASK(1, 0); > + > + /* write trailing pixel */ > + dbuf8 = (u8 __force *)dbuf32; > while (sbuf32 < send32) { > u32 val24 = xfrm_pixel(le32_to_cpup(sbuf32++)); > /* write output in reverse order for little endianness */
diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index a926aa6671fc..b9c9c712aa9c 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -274,10 +274,44 @@ static __always_inline void drm_fb_xfrm_line_32to24(void *dbuf, const void *sbuf unsigned int pixels, u32 (*xfrm_pixel)(u32)) { - u8 *dbuf8 = dbuf; + __le32 *dbuf32 = dbuf; + u8 *dbuf8; const __le32 *sbuf32 = sbuf; const __le32 *send32 = sbuf32 + pixels; + /* write pixels in chunks of 4 */ + send32 -= pixels & GENMASK(1, 0); + while (sbuf32 < send32) { + u32 val24[4] = { + xfrm_pixel(le32_to_cpup(sbuf32++)), + xfrm_pixel(le32_to_cpup(sbuf32++)), + xfrm_pixel(le32_to_cpup(sbuf32++)), + xfrm_pixel(le32_to_cpup(sbuf32++)), + }; + u32 out32[3] = { + /* write output bytes in reverse order for little endianness */ + ((val24[0] & 0x000000ff)) | + ((val24[0] & 0x0000ff00)) | + ((val24[0] & 0x00ff0000)) | + ((val24[1] & 0x000000ff) << 24), + ((val24[1] & 0x0000ff00) >> 8) | + ((val24[1] & 0x00ff0000) >> 8) | + ((val24[2] & 0x000000ff) << 16) | + ((val24[2] & 0x0000ff00) << 16), + ((val24[2] & 0x00ff0000) >> 16) | + ((val24[3] & 0x000000ff) << 8) | + ((val24[3] & 0x0000ff00) << 8) | + ((val24[3] & 0x00ff0000) << 8), + }; + + *dbuf32++ = cpu_to_le32(out32[0]); + *dbuf32++ = cpu_to_le32(out32[1]); + *dbuf32++ = cpu_to_le32(out32[2]); + } + send32 += pixels & GENMASK(1, 0); + + /* write trailing pixel */ + dbuf8 = (u8 __force *)dbuf32; while (sbuf32 < send32) { u32 val24 = xfrm_pixel(le32_to_cpup(sbuf32++)); /* write output in reverse order for little endianness */
For ease of implementation, existing line-conversion functions for 24-bit formats write each byte individually. Optimize the performance by writing 4 pixels in 3 32-bit stores. Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> --- drivers/gpu/drm/drm_format_helper.c | 36 ++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-)