aboutsummaryrefslogtreecommitdiff
path: root/uxd.c
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2024-12-18 05:47:07 -0500
committerB. Watson <urchlay@slackware.uk>2024-12-18 05:47:07 -0500
commitc205a7ea2a7171b61dae4ac51a3a251cceb1dde1 (patch)
tree58447b4934f93eb8cb48909fc1efc3b15c72c5ed /uxd.c
parentf467fec27bc25d51020ce482750361c102417efb (diff)
downloaduxd-c205a7ea2a7171b61dae4ac51a3a251cceb1dde1.tar.gz
detect UTF-16 surrogates as bad, use red for overlong
Diffstat (limited to 'uxd.c')
-rw-r--r--uxd.c17
1 files changed, 14 insertions, 3 deletions
diff --git a/uxd.c b/uxd.c
index e32356b..d141c2e 100644
--- a/uxd.c
+++ b/uxd.c
@@ -425,9 +425,11 @@ void append_color(char *buf, int hl_type) {
bgcolor = 0;
break;
case HL_OVERLONG:
+ /* don't use a separate color for this any more
fgcolor = 0;
bgcolor = special_color;
break;
+ */
case HL_BAD:
default:
fgcolor = 0;
@@ -563,6 +565,12 @@ int is_out_of_range(int cont_count, unsigned char *b) {
return 1;
}
+/* surrogates for UTF-16 are not valid Unicode (therefore not UTF-8) */
+int is_surrogate(int cont_count, unsigned char *b) {
+ if(cont_count != 2) return 0;
+ return b[0] == 0xed && b[1] > 0x9f;
+}
+
int get_next_byte(void) {
int c;
@@ -655,9 +663,12 @@ int dump_utf8_char(void) {
}
}
- /* don't check bad sequences for out-of-range */
- if(!bad && is_out_of_range(cont_count, bytes))
- bad = 1;
+ /* don't check bad sequences for out-of-range or surrogate */
+ if(!bad) {
+ if(is_out_of_range(cont_count, bytes) || is_surrogate(cont_count, bytes))
+ bad = 1;
+ }
+
if(is_overlong(cont_count, bytes))
overlong = 1;