diff options
author | B. Watson <urchlay@slackware.uk> | 2024-12-18 05:47:07 -0500 |
---|---|---|
committer | B. Watson <urchlay@slackware.uk> | 2024-12-18 05:47:07 -0500 |
commit | c205a7ea2a7171b61dae4ac51a3a251cceb1dde1 (patch) | |
tree | 58447b4934f93eb8cb48909fc1efc3b15c72c5ed /uxd.c | |
parent | f467fec27bc25d51020ce482750361c102417efb (diff) | |
download | uxd-c205a7ea2a7171b61dae4ac51a3a251cceb1dde1.tar.gz |
detect UTF-16 surrogates as bad, use red for overlong
Diffstat (limited to 'uxd.c')
-rw-r--r-- | uxd.c | 17 |
1 files changed, 14 insertions, 3 deletions
@@ -425,9 +425,11 @@ void append_color(char *buf, int hl_type) { bgcolor = 0; break; case HL_OVERLONG: + /* don't use a separate color for this any more fgcolor = 0; bgcolor = special_color; break; + */ case HL_BAD: default: fgcolor = 0; @@ -563,6 +565,12 @@ int is_out_of_range(int cont_count, unsigned char *b) { return 1; } +/* surrogates for UTF-16 are not valid Unicode (therefore not UTF-8) */ +int is_surrogate(int cont_count, unsigned char *b) { + if(cont_count != 2) return 0; + return b[0] == 0xed && b[1] > 0x9f; +} + int get_next_byte(void) { int c; @@ -655,9 +663,12 @@ int dump_utf8_char(void) { } } - /* don't check bad sequences for out-of-range */ - if(!bad && is_out_of_range(cont_count, bytes)) - bad = 1; + /* don't check bad sequences for out-of-range or surrogate */ + if(!bad) { + if(is_out_of_range(cont_count, bytes) || is_surrogate(cont_count, bytes)) + bad = 1; + } + if(is_overlong(cont_count, bytes)) overlong = 1; |