diff options
author | B. Watson <urchlay@slackware.uk> | 2024-12-17 22:47:36 -0500 |
---|---|---|
committer | B. Watson <urchlay@slackware.uk> | 2024-12-17 22:47:57 -0500 |
commit | f0e0a74cbf43d771075ad2d801197b8072d5b15c (patch) | |
tree | 71d2f41619aa4cc39487c850a59e97f90895669b /uxd.c | |
parent | 548e7d04b4b2fa60b71615ed590be54016dac52d (diff) | |
download | uxd-f0e0a74cbf43d771075ad2d801197b8072d5b15c.tar.gz |
uxd.c: add overlong sequence detection; ver.rst: regenerate
Diffstat (limited to 'uxd.c')
-rw-r--r-- | uxd.c | 44 |
1 files changed, 39 insertions, 5 deletions
@@ -77,6 +77,7 @@ int cur_normal_hilite = 0; #define HL_NORM_INV 1 #define HL_SPECIAL 2 #define HL_BAD 3 +#define HL_OVERLONG 4 /* terminal codes for mono highlighting. */ #define MONO_NORMAL 0 @@ -417,8 +418,12 @@ void append_color(char *buf, int hl_type) { fgcolor = special_color; bgcolor = 0; break; - default: + case HL_OVERLONG: + fgcolor = 0; + bgcolor = special_color; + break; case HL_BAD: + default: fgcolor = 0; bgcolor = bad_color; break; @@ -447,6 +452,7 @@ void append_mono(char *buf, int hl_type) { code = MONO_BOLD; break; default: + case HL_OVERLONG: /* maybe change this later */ case HL_BAD: code = MONO_REVERSE; break; @@ -521,6 +527,26 @@ int is_bom(unsigned char *b) { return (b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); } +/* Detect overlong encodings, without doing a full decode. */ +int is_overlong(int cont_count, unsigned char *b) { + /* 1 byte seqs are never overlong. */ + if(!cont_count) + return 0; + + /* 2 byte seqs, if the first byte is 0xc0 or 0xc1, it's overlong. */ + if(cont_count == 1 && b[0] <= 0xc1) + return 1; + + /* for 3 and 4 byte seqs, it's the 2nd byte that matters. */ + if(cont_count == 2 && b[1] <= 0x9f) + return 1; + + if(cont_count == 3 && b[1] <= 0x8f) + return 1; + + return 0; +} + /* U+10FFFF is the last valid codepoint. It encodes to f4 8f bf bf. 'count' is the count of continuation bytes only (so, 3 for a 4-byte sqeuence). */ @@ -560,7 +586,7 @@ int dump_utf8_char(void) { unsigned char bytes[] = { 0, 0, 0, 0, 0 }; unsigned char *cont_bytes = bytes + 1; char *printable; - int bad = 0, special = 0, hl_type; + int bad = 0, special = 0, overlong = 0, hl_type; int c, cont_count, i; static int byte0; @@ -625,7 +651,10 @@ int dump_utf8_char(void) { if(is_out_of_range(cont_count, bytes)) bad = 1; - if(bad) { + if(is_overlong(cont_count, bytes)) + overlong = 1; + + if(bad || overlong) { bad_count++; } else { char_count++; @@ -644,8 +673,13 @@ int dump_utf8_char(void) { hl_type = HL_SPECIAL; printable = PRINT_BOM; } else { - hl_type = HL_NORMAL; - printable = (char *)bytes; + if(overlong) { + hl_type = HL_OVERLONG; + printable = PRINT_BAD; + } else { + hl_type = HL_NORMAL; + printable = (char *)bytes; + } } /* human-readable (right) column: */ |