mirror of
https://github.com/mintycube/dmenu.git
synced 2024-10-22 14:05:48 +02:00
overhaul utf8decode()
this changes the utf8decode function to: * report when an error occurs * report how many bytes to advance on error these will be useful in the next commit to render invalid utf8 sequences. the new implementation is also shorter and more direct. ref. https://git.suckless.org/dmenu/commit/51e32d49b56c86cd288c64fccf6cd765547781b9.html
This commit is contained in:
parent
34b991503c
commit
d66c96ba0b
81
drw.c
81
drw.c
@ -11,63 +11,50 @@
|
|||||||
|
|
||||||
#if !PANGO_PATCH || HIGHLIGHT_PATCH
|
#if !PANGO_PATCH || HIGHLIGHT_PATCH
|
||||||
#define UTF_INVALID 0xFFFD
|
#define UTF_INVALID 0xFFFD
|
||||||
#define UTF_SIZ 4
|
|
||||||
|
|
||||||
static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0};
|
static int
|
||||||
static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
|
utf8decode(const char *s_in, long *u, int *err)
|
||||||
static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000};
|
|
||||||
static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
|
|
||||||
|
|
||||||
static long
|
|
||||||
utf8decodebyte(const char c, size_t *i)
|
|
||||||
{
|
{
|
||||||
for (*i = 0; *i < (UTF_SIZ + 1); ++(*i))
|
static const unsigned char lens[] = {
|
||||||
if (((unsigned char)c & utfmask[*i]) == utfbyte[*i])
|
/* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
return (unsigned char)c & ~utfmask[*i];
|
/* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0, /* invalid */
|
||||||
return 0;
|
/* 110XX */ 2, 2, 2, 2,
|
||||||
}
|
/* 1110X */ 3, 3,
|
||||||
|
/* 11110 */ 4,
|
||||||
|
/* 11111 */ 0, /* invalid */
|
||||||
|
};
|
||||||
|
static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
|
||||||
|
static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 };
|
||||||
|
|
||||||
static size_t
|
const unsigned char *s = (const unsigned char *)s_in;
|
||||||
utf8validate(long *u, size_t i)
|
int len = lens[*s >> 3];
|
||||||
{
|
|
||||||
if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
|
|
||||||
*u = UTF_INVALID;
|
*u = UTF_INVALID;
|
||||||
for (i = 1; *u > utfmax[i]; ++i)
|
*err = 1;
|
||||||
;
|
if (len == 0)
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
utf8decode(const char *c, long *u, size_t clen)
|
|
||||||
{
|
|
||||||
size_t i, j, len, type;
|
|
||||||
long udecoded;
|
|
||||||
|
|
||||||
*u = UTF_INVALID;
|
|
||||||
if (!clen)
|
|
||||||
return 0;
|
|
||||||
udecoded = utf8decodebyte(c[0], &len);
|
|
||||||
if (!BETWEEN(len, 1, UTF_SIZ))
|
|
||||||
return 1;
|
return 1;
|
||||||
for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
|
|
||||||
udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
|
|
||||||
if (type)
|
|
||||||
return j;
|
|
||||||
}
|
|
||||||
if (j < len)
|
|
||||||
return 0;
|
|
||||||
*u = udecoded;
|
|
||||||
utf8validate(u, len);
|
|
||||||
|
|
||||||
|
long cp = s[0] & leading_mask[len - 1];
|
||||||
|
for (int i = 1; i < len; ++i) {
|
||||||
|
if (s[i] == '\0' || (s[i] & 0xC0) != 0x80)
|
||||||
|
return i;
|
||||||
|
cp = (cp << 6) | (s[i] & 0x3F);
|
||||||
|
}
|
||||||
|
/* out of range, surrogate, overlong encoding */
|
||||||
|
if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1])
|
||||||
|
return len;
|
||||||
|
|
||||||
|
*err = 0;
|
||||||
|
*u = cp;
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HIGHLIGHT_PATCH
|
#if HIGHLIGHT_PATCH
|
||||||
size_t
|
int
|
||||||
utf8len(const char *c)
|
utf8len(const char *c)
|
||||||
{
|
{
|
||||||
long utf8codepoint = 0;
|
long utf8codepoint = 0;
|
||||||
return utf8decode(c, &utf8codepoint, UTF_SIZ);
|
int utf8err = 0;
|
||||||
|
return utf8decode(c, &utf8codepoint, &utf8err);
|
||||||
}
|
}
|
||||||
#endif // HIGHLIGHT_PATCH
|
#endif // HIGHLIGHT_PATCH
|
||||||
#endif // PANGO_PATCH
|
#endif // PANGO_PATCH
|
||||||
@ -447,7 +434,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
|
|||||||
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
|
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
|
||||||
XftDraw *d = NULL;
|
XftDraw *d = NULL;
|
||||||
Fnt *usedfont, *curfont, *nextfont;
|
Fnt *usedfont, *curfont, *nextfont;
|
||||||
int utf8strlen, utf8charlen, render = x || y || w || h;
|
int utf8strlen, utf8charlen, utf8err, render = x || y || w || h;
|
||||||
long utf8codepoint = 0;
|
long utf8codepoint = 0;
|
||||||
const char *utf8str;
|
const char *utf8str;
|
||||||
FcCharSet *fccharset;
|
FcCharSet *fccharset;
|
||||||
@ -482,11 +469,11 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lp
|
|||||||
if (!ellipsis_width && render)
|
if (!ellipsis_width && render)
|
||||||
ellipsis_width = drw_fontset_getwidth(drw, ellipsis);
|
ellipsis_width = drw_fontset_getwidth(drw, ellipsis);
|
||||||
while (1) {
|
while (1) {
|
||||||
ew = ellipsis_len = utf8strlen = 0;
|
ew = ellipsis_len = utf8err = utf8strlen = 0;
|
||||||
utf8str = text;
|
utf8str = text;
|
||||||
nextfont = NULL;
|
nextfont = NULL;
|
||||||
while (*text) {
|
while (*text) {
|
||||||
utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ);
|
utf8charlen = utf8decode(text, &utf8codepoint, &utf8err);
|
||||||
for (curfont = drw->fonts; curfont; curfont = curfont->next) {
|
for (curfont = drw->fonts; curfont; curfont = curfont->next) {
|
||||||
charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
|
charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
|
||||||
if (charexists) {
|
if (charexists) {
|
||||||
|
Loading…
Reference in New Issue
Block a user