render invalid utf8 sequences as U+FFFD

previously drw_text would do the width calculations as if
invalid utf8 sequences were replaced with U+FFFD but would pass
the invalid utf8 sequence to xft to render where xft would just
cut it off at the first invalid byte.

this change makes invalid utf8 render as U+FFFD and avoids
sending invalid sequences to xft. the following can be used to
check the behavior before and after the patch:

	$ printf "0\xef1234567\ntest" | dmenu

Ref: https://lists.suckless.org/dev/2407/35646.html

Ref.
https://git.suckless.org/dmenu/commit/59936c7d972587a47d61161279bb8e8abc0b02f3.html
This commit is contained in:
bakkeby 2024-07-18 09:36:48 +02:00
parent d66c96ba0b
commit b980b0a359

19
drw.c
View File

@ -443,7 +443,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
XftResult result; XftResult result;
int charexists = 0, overflow = 0; int charexists = 0, overflow = 0;
/* keep track of a couple codepoints for which we have no match. */ /* keep track of a couple codepoints for which we have no match. */
static unsigned int nomatches[128], ellipsis_width; static unsigned int nomatches[128], ellipsis_width, invalid_width;
static const char invalid[] = "<EFBFBD>";
const char *ellipsis = "..."; const char *ellipsis = "...";
if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts) if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
@ -468,6 +469,10 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
usedfont = drw->fonts; usedfont = drw->fonts;
if (!ellipsis_width && render) if (!ellipsis_width && render)
ellipsis_width = drw_fontset_getwidth(drw, ellipsis); ellipsis_width = drw_fontset_getwidth(drw, ellipsis);
if (!invalid_width) {
invalid_width = -1; /* stop infinite recursion */
invalid_width = drw_fontset_getwidth(drw, invalid);
}
while (1) { while (1) {
ew = ellipsis_len = utf8err = utf8strlen = 0; ew = ellipsis_len = utf8err = utf8strlen = 0;
utf8str = text; utf8str = text;
@ -495,9 +500,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
else else
utf8strlen = ellipsis_len; utf8strlen = ellipsis_len;
} else if (curfont == usedfont) { } else if (curfont == usedfont) {
utf8strlen += utf8charlen;
text += utf8charlen; text += utf8charlen;
ew += tmpw; utf8strlen += utf8err ? 0 : utf8charlen;
ew += utf8err ? 0 : tmpw;
} else { } else {
nextfont = curfont; nextfont = curfont;
} }
@ -505,7 +510,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
} }
} }
if (overflow || !charexists || nextfont) if (overflow || !charexists || nextfont || utf8err)
break; break;
else else
charexists = 0; charexists = 0;
@ -520,6 +525,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
x += ew; x += ew;
w -= ew; w -= ew;
} }
if (utf8err && (!render || invalid_width < w)) {
if (render)
drw_text(drw, x, y, w, h, 0, invalid, invert);
x += invalid_width;
w -= invalid_width;
}
if (render && overflow && ellipsis_w) if (render && overflow && ellipsis_w)
drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, ellipsis, invert); drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, ellipsis, invert);