Message ID | 20220313192952.12058-3-tzimmermann@suse.de (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | fbdev: Fix image blitting for arbitrary image widths | expand |
On 13.03.2022 20:29, Thomas Zimmermann wrote: > Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > broke cfb_imageblit() for image widths that are not aligned to 8-bit > boundaries. Fix this by handling the trailing pixels on each line > separately. The performance improvements in the original commit do not > regress by this change. > > Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> > Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> > Cc: Thomas Zimmermann <tzimmermann@suse.de> > Cc: Javier Martinez Canillas <javierm@redhat.com> > Cc: Sam Ravnborg <sam@ravnborg.org> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > --- > drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- > 1 file changed, 24 insertions(+), 4 deletions(-) > > diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c > index 7361cfabdd85..9ebda4e0dc7a 100644 > --- a/drivers/video/fbdev/core/cfbimgblt.c > +++ b/drivers/video/fbdev/core/cfbimgblt.c > @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > { > u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; > u32 ppw = 32/bpp, spitch = (image->width + 7)/8; > - u32 bit_mask, eorx; > + u32 bit_mask, eorx, shift; > const char *s = image->data, *src; > u32 __iomem *dst; > const u32 *tab = NULL; > @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > > for (i = image->height; i--; ) { > dst = (u32 __iomem *)dst1; > + shift = 8; > src = s; > > + /* > + * Manually unroll the per-line copying loop for better > + * performance. This works until we processed the last > + * completely filled source byte (inclusive). > + */ > switch (ppw) { > case 4: /* 8 bpp */ > - for (j = k; j; j -= 2, ++src) { > + for (j = k; j >= 2; j -= 2, ++src) { > FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); > } > break; > case 2: /* 16 bpp */ > - for (j = k; j; j -= 4, ++src) { > + for (j = k; j >= 4; j -= 4, ++src) { > FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++); > @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > } > break; > case 1: /* 32 bpp */ > - for (j = k; j; j -= 8, ++src) { > + for (j = k; j >= 8; j -= 8, ++src) { > FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++); > @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > break; > } > > + /* > + * For image widths that are not a multiple of 8, there > + * are trailing pixels left on the current line. Print > + * them as well. > + */ > + for (; j--; ) { > + shift -= ppw; > + FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++); > + if (!shift) { > + shift = 8; > + ++src; > + } > + } > + > dst1 += p->fix.line_length; > s += spitch; > } Best regards
On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote: > Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > broke cfb_imageblit() for image widths that are not aligned to 8-bit > boundaries. Fix this by handling the trailing pixels on each line > separately. The performance improvements in the original commit do not > regress by this change. > > Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> > Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> > Cc: Thomas Zimmermann <tzimmermann@suse.de> > Cc: Javier Martinez Canillas <javierm@redhat.com> > Cc: Sam Ravnborg <sam@ravnborg.org> On both patches: Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> > --- > drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- > 1 file changed, 24 insertions(+), 4 deletions(-) > > diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c > index 7361cfabdd85..9ebda4e0dc7a 100644 > --- a/drivers/video/fbdev/core/cfbimgblt.c > +++ b/drivers/video/fbdev/core/cfbimgblt.c > @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > { > u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; > u32 ppw = 32/bpp, spitch = (image->width + 7)/8; > - u32 bit_mask, eorx; > + u32 bit_mask, eorx, shift; > const char *s = image->data, *src; > u32 __iomem *dst; > const u32 *tab = NULL; > @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > > for (i = image->height; i--; ) { > dst = (u32 __iomem *)dst1; > + shift = 8; > src = s; > > + /* > + * Manually unroll the per-line copying loop for better > + * performance. This works until we processed the last > + * completely filled source byte (inclusive). > + */ > switch (ppw) { > case 4: /* 8 bpp */ > - for (j = k; j; j -= 2, ++src) { > + for (j = k; j >= 2; j -= 2, ++src) { > FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); > } > break; > case 2: /* 16 bpp */ > - for (j = k; j; j -= 4, ++src) { > + for (j = k; j >= 4; j -= 4, ++src) { > FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++); > @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > } > break; > case 1: /* 32 bpp */ > - for (j = k; j; j -= 8, ++src) { > + for (j = k; j >= 8; j -= 8, ++src) { > FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++); > @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > break; > } > > + /* > + * For image widths that are not a multiple of 8, there > + * are trailing pixels left on the current line. Print > + * them as well. > + */ > + for (; j--; ) { > + shift -= ppw; > + FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++); > + if (!shift) { > + shift = 8; > + ++src; > + } > + } > + > dst1 += p->fix.line_length; > s += spitch; > } > -- > 2.35.1 >
On 3/13/22 20:29, Thomas Zimmermann wrote: > Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > broke cfb_imageblit() for image widths that are not aligned to 8-bit > boundaries. Fix this by handling the trailing pixels on each line > separately. The performance improvements in the original commit do not > regress by this change. > > Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> > Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> > Cc: Thomas Zimmermann <tzimmermann@suse.de> > Cc: Javier Martinez Canillas <javierm@redhat.com> > Cc: Sam Ravnborg <sam@ravnborg.org> > --- Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote: > Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > broke cfb_imageblit() for image widths that are not aligned to 8-bit > boundaries. Fix this by handling the trailing pixels on each line > separately. The performance improvements in the original commit do not > regress by this change. > > Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> > Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") > Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> > Cc: Thomas Zimmermann <tzimmermann@suse.de> > Cc: Javier Martinez Canillas <javierm@redhat.com> > Cc: Sam Ravnborg <sam@ravnborg.org> > Tested-by: Marek Szyprowski <m.szyprowski@samsung.com> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> > Reviewed-by: Javier Martinez Canillas <javierm@redhat.com> Tested-by: Guenter Roeck <linux@roeck-us.net> > --- > drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- > 1 file changed, 24 insertions(+), 4 deletions(-) > > diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c > index 7361cfabdd85..9ebda4e0dc7a 100644 > --- a/drivers/video/fbdev/core/cfbimgblt.c > +++ b/drivers/video/fbdev/core/cfbimgblt.c > @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > { > u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; > u32 ppw = 32/bpp, spitch = (image->width + 7)/8; > - u32 bit_mask, eorx; > + u32 bit_mask, eorx, shift; > const char *s = image->data, *src; > u32 __iomem *dst; > const u32 *tab = NULL; > @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > > for (i = image->height; i--; ) { > dst = (u32 __iomem *)dst1; > + shift = 8; > src = s; > > + /* > + * Manually unroll the per-line copying loop for better > + * performance. This works until we processed the last > + * completely filled source byte (inclusive). > + */ > switch (ppw) { > case 4: /* 8 bpp */ > - for (j = k; j; j -= 2, ++src) { > + for (j = k; j >= 2; j -= 2, ++src) { > FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); > } > break; > case 2: /* 16 bpp */ > - for (j = k; j; j -= 4, ++src) { > + for (j = k; j >= 4; j -= 4, ++src) { > FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++); > @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > } > break; > case 1: /* 32 bpp */ > - for (j = k; j; j -= 8, ++src) { > + for (j = k; j >= 8; j -= 8, ++src) { > FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); > FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++); > @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * > break; > } > > + /* > + * For image widths that are not a multiple of 8, there > + * are trailing pixels left on the current line. Print > + * them as well. > + */ > + for (; j--; ) { > + shift -= ppw; > + FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++); > + if (!shift) { > + shift = 8; > + ++src; > + } > + } > + > dst1 += p->fix.line_length; > s += spitch; > }
diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c index 7361cfabdd85..9ebda4e0dc7a 100644 --- a/drivers/video/fbdev/core/cfbimgblt.c +++ b/drivers/video/fbdev/core/cfbimgblt.c @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8; - u32 bit_mask, eorx; + u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 __iomem *dst; const u32 *tab = NULL; @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * for (i = image->height; i--; ) { dst = (u32 __iomem *)dst1; + shift = 8; src = s; + /* + * Manually unroll the per-line copying loop for better + * performance. This works until we processed the last + * completely filled source byte (inclusive). + */ switch (ppw) { case 4: /* 8 bpp */ - for (j = k; j; j -= 2, ++src) { + for (j = k; j >= 2; j -= 2, ++src) { FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); } break; case 2: /* 16 bpp */ - for (j = k; j; j -= 4, ++src) { + for (j = k; j >= 4; j -= 4, ++src) { FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++); @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * } break; case 1: /* 32 bpp */ - for (j = k; j; j -= 8, ++src) { + for (j = k; j >= 8; j -= 8, ++src) { FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++); @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * break; } + /* + * For image widths that are not a multiple of 8, there + * are trailing pixels left on the current line. Print + * them as well. + */ + for (; j--; ) { + shift -= ppw; + FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++); + if (!shift) { + shift = 8; + ++src; + } + } + dst1 += p->fix.line_length; s += spitch; }
Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") broke cfb_imageblit() for image widths that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change. Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de> Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Thomas Zimmermann <tzimmermann@suse.de> Cc: Javier Martinez Canillas <javierm@redhat.com> Cc: Sam Ravnborg <sam@ravnborg.org> --- drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-)