Recent optimization of the fbdev image-bitting helpers broke the code for image width that do not align with multiples of 8. Both, sys and cfb, are affected. Fix this problem by handling the trailing pixels on each line separately.
Tested with simpledrm and the 7x14 font.
Thomas Zimmermann (2): fbdev: Fix sys_imageblit() for arbitrary image widths fbdev: Fix cfb_imageblit() for arbitrary image widths
drivers/video/fbdev/core/cfbimgblt.c | 28 +++++++++++++++++++++++---- drivers/video/fbdev/core/sysimgblt.c | 29 ++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 8 deletions(-)
Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()") broke sys_imageblit() for image width that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()") Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Javier Martinez Canillas javierm@redhat.com Cc: Sam Ravnborg sam@ravnborg.org --- drivers/video/fbdev/core/sysimgblt.c | 29 ++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-)
diff --git a/drivers/video/fbdev/core/sysimgblt.c b/drivers/video/fbdev/core/sysimgblt.c index 722c327a381b..335e92b813fc 100644 --- a/drivers/video/fbdev/core/sysimgblt.c +++ b/drivers/video/fbdev/core/sysimgblt.c @@ -188,7 +188,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8; - u32 bit_mask, eorx; + u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 *dst; const u32 *tab; @@ -229,17 +229,23 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
for (i = image->height; i--; ) { dst = dst1; + shift = 8; src = s;
+ /* + * Manually unroll the per-line copying loop for better + * performance. This works until we processed the last + * completely filled source byte (inclusive). + */ switch (ppw) { case 4: /* 8 bpp */ - for (j = k; j; j -= 2, ++src) { + for (j = k; j >= 2; j -= 2, ++src) { *dst++ = colortab[(*src >> 4) & bit_mask]; *dst++ = colortab[(*src >> 0) & bit_mask]; } break; case 2: /* 16 bpp */ - for (j = k; j; j -= 4, ++src) { + for (j = k; j >= 4; j -= 4, ++src) { *dst++ = colortab[(*src >> 6) & bit_mask]; *dst++ = colortab[(*src >> 4) & bit_mask]; *dst++ = colortab[(*src >> 2) & bit_mask]; @@ -247,7 +253,7 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, } break; case 1: /* 32 bpp */ - for (j = k; j; j -= 8, ++src) { + for (j = k; j >= 8; j -= 8, ++src) { *dst++ = colortab[(*src >> 7) & bit_mask]; *dst++ = colortab[(*src >> 6) & bit_mask]; *dst++ = colortab[(*src >> 5) & bit_mask]; @@ -259,6 +265,21 @@ static void fast_imageblit(const struct fb_image *image, struct fb_info *p, } break; } + + /* + * For image widths that are not a multiple of 8, there + * are trailing pixels left on the current line. Print + * them as well. + */ + for (; j--; ) { + shift -= ppw; + *dst++ = colortab[(*src >> shift) & bit_mask]; + if (!shift) { + shift = 8; + ++src; + } + } + dst1 += p->fix.line_length; s += spitch; }
Hi Thomas,
On Sun, Mar 13, 2022 at 8:29 PM Thomas Zimmermann tzimmermann@suse.de wrote:
Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()") broke sys_imageblit() for image width that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de
Thanks for fixing! This was very valuable for hammering the bugs out of ssd130xdrm and the xrgb888-to-mono conversion...
Tested-by: Geert Uytterhoeven geert@linux-m68k.org
Gr{oetje,eeting}s,
Geert
-- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Hello Thomas,
On 3/13/22 20:29, Thomas Zimmermann wrote:
Commit 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()") broke sys_imageblit() for image width that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 6f29e04938bf ("fbdev: Improve performance of sys_imageblit()") Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Javier Martinez Canillas javierm@redhat.com Cc: Sam Ravnborg sam@ravnborg.org
Looks good to me. Also Marek and Geert mentioned that fixes the issue they were seeing.
Reviewed-by: Javier Martinez Canillas javierm@redhat.com
Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") broke cfb_imageblit() for image widths that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") Reported-by: Marek Szyprowski m.szyprowski@samsung.com Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Javier Martinez Canillas javierm@redhat.com Cc: Sam Ravnborg sam@ravnborg.org --- drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c index 7361cfabdd85..9ebda4e0dc7a 100644 --- a/drivers/video/fbdev/core/cfbimgblt.c +++ b/drivers/video/fbdev/core/cfbimgblt.c @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8; - u32 bit_mask, eorx; + u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 __iomem *dst; const u32 *tab = NULL; @@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
for (i = image->height; i--; ) { dst = (u32 __iomem *)dst1; + shift = 8; src = s;
+ /* + * Manually unroll the per-line copying loop for better + * performance. This works until we processed the last + * completely filled source byte (inclusive). + */ switch (ppw) { case 4: /* 8 bpp */ - for (j = k; j; j -= 2, ++src) { + for (j = k; j >= 2; j -= 2, ++src) { FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); } break; case 2: /* 16 bpp */ - for (j = k; j; j -= 4, ++src) { + for (j = k; j >= 4; j -= 4, ++src) { FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++); @@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * } break; case 1: /* 32 bpp */ - for (j = k; j; j -= 8, ++src) { + for (j = k; j >= 8; j -= 8, ++src) { FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++); @@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * break; }
+ /* + * For image widths that are not a multiple of 8, there + * are trailing pixels left on the current line. Print + * them as well. + */ + for (; j--; ) { + shift -= ppw; + FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++); + if (!shift) { + shift = 8; + ++src; + } + } + dst1 += p->fix.line_length; s += spitch; }
On 13.03.2022 20:29, Thomas Zimmermann wrote:
Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") broke cfb_imageblit() for image widths that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") Reported-by: Marek Szyprowski m.szyprowski@samsung.com Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Javier Martinez Canillas javierm@redhat.com Cc: Sam Ravnborg sam@ravnborg.org
Tested-by: Marek Szyprowski m.szyprowski@samsung.com
drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c index 7361cfabdd85..9ebda4e0dc7a 100644 --- a/drivers/video/fbdev/core/cfbimgblt.c +++ b/drivers/video/fbdev/core/cfbimgblt.c @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
- u32 bit_mask, eorx;
- u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 __iomem *dst; const u32 *tab = NULL;
@@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
for (i = image->height; i--; ) { dst = (u32 __iomem *)dst1;
shift = 8;
src = s;
/*
* Manually unroll the per-line copying loop for better
* performance. This works until we processed the last
* completely filled source byte (inclusive).
*/
switch (ppw) { case 4: /* 8 bpp */
for (j = k; j; j -= 2, ++src) {
case 2: /* 16 bpp */for (j = k; j >= 2; j -= 2, ++src) { FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); } break;
for (j = k; j; j -= 4, ++src) {
for (j = k; j >= 4; j -= 4, ++src) { FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
@@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * } break; case 1: /* 32 bpp */
for (j = k; j; j -= 8, ++src) {
for (j = k; j >= 8; j -= 8, ++src) { FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
@@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * break; }
/*
* For image widths that are not a multiple of 8, there
* are trailing pixels left on the current line. Print
* them as well.
*/
for (; j--; ) {
shift -= ppw;
FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
if (!shift) {
shift = 8;
++src;
}
}
- dst1 += p->fix.line_length; s += spitch; }
Best regards
On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") broke cfb_imageblit() for image widths that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") Reported-by: Marek Szyprowski m.szyprowski@samsung.com Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Javier Martinez Canillas javierm@redhat.com Cc: Sam Ravnborg sam@ravnborg.org
On both patches:
Acked-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c index 7361cfabdd85..9ebda4e0dc7a 100644 --- a/drivers/video/fbdev/core/cfbimgblt.c +++ b/drivers/video/fbdev/core/cfbimgblt.c @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
- u32 bit_mask, eorx;
- u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 __iomem *dst; const u32 *tab = NULL;
@@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
for (i = image->height; i--; ) { dst = (u32 __iomem *)dst1;
shift = 8;
src = s;
/*
* Manually unroll the per-line copying loop for better
* performance. This works until we processed the last
* completely filled source byte (inclusive).
*/
switch (ppw) { case 4: /* 8 bpp */
for (j = k; j; j -= 2, ++src) {
case 2: /* 16 bpp */for (j = k; j >= 2; j -= 2, ++src) { FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); } break;
for (j = k; j; j -= 4, ++src) {
for (j = k; j >= 4; j -= 4, ++src) { FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
@@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * } break; case 1: /* 32 bpp */
for (j = k; j; j -= 8, ++src) {
for (j = k; j >= 8; j -= 8, ++src) { FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
@@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * break; }
/*
* For image widths that are not a multiple of 8, there
* are trailing pixels left on the current line. Print
* them as well.
*/
for (; j--; ) {
shift -= ppw;
FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
if (!shift) {
shift = 8;
++src;
}
}
- dst1 += p->fix.line_length; s += spitch; }
-- 2.35.1
On 3/13/22 20:29, Thomas Zimmermann wrote:
Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") broke cfb_imageblit() for image widths that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") Reported-by: Marek Szyprowski m.szyprowski@samsung.com Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Javier Martinez Canillas javierm@redhat.com Cc: Sam Ravnborg sam@ravnborg.org
Reviewed-by: Javier Martinez Canillas javierm@redhat.com
On Sun, Mar 13, 2022 at 08:29:52PM +0100, Thomas Zimmermann wrote:
Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") broke cfb_imageblit() for image widths that are not aligned to 8-bit boundaries. Fix this by handling the trailing pixels on each line separately. The performance improvements in the original commit do not regress by this change.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()") Reported-by: Marek Szyprowski m.szyprowski@samsung.com Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Javier Martinez Canillas javierm@redhat.com Cc: Sam Ravnborg sam@ravnborg.org Tested-by: Marek Szyprowski m.szyprowski@samsung.com Acked-by: Daniel Vetter daniel.vetter@ffwll.ch Reviewed-by: Javier Martinez Canillas javierm@redhat.com
Tested-by: Guenter Roeck linux@roeck-us.net
drivers/video/fbdev/core/cfbimgblt.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/drivers/video/fbdev/core/cfbimgblt.c b/drivers/video/fbdev/core/cfbimgblt.c index 7361cfabdd85..9ebda4e0dc7a 100644 --- a/drivers/video/fbdev/core/cfbimgblt.c +++ b/drivers/video/fbdev/core/cfbimgblt.c @@ -218,7 +218,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * { u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel; u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
- u32 bit_mask, eorx;
- u32 bit_mask, eorx, shift; const char *s = image->data, *src; u32 __iomem *dst; const u32 *tab = NULL;
@@ -259,17 +259,23 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
for (i = image->height; i--; ) { dst = (u32 __iomem *)dst1;
shift = 8;
src = s;
/*
* Manually unroll the per-line copying loop for better
* performance. This works until we processed the last
* completely filled source byte (inclusive).
*/
switch (ppw) { case 4: /* 8 bpp */
for (j = k; j; j -= 2, ++src) {
case 2: /* 16 bpp */for (j = k; j >= 2; j -= 2, ++src) { FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++); } break;
for (j = k; j; j -= 4, ++src) {
for (j = k; j >= 4; j -= 4, ++src) { FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
@@ -277,7 +283,7 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * } break; case 1: /* 32 bpp */
for (j = k; j; j -= 8, ++src) {
for (j = k; j >= 8; j -= 8, ++src) { FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++); FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
@@ -290,6 +296,20 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info * break; }
/*
* For image widths that are not a multiple of 8, there
* are trailing pixels left on the current line. Print
* them as well.
*/
for (; j--; ) {
shift -= ppw;
FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
if (!shift) {
shift = 8;
++src;
}
}
- dst1 += p->fix.line_length; s += spitch; }
dri-devel@lists.freedesktop.org