aboutsummaryrefslogtreecommitdiff
path: root/uxd.c
diff options
context:
space:
mode:
Diffstat (limited to 'uxd.c')
-rw-r--r--uxd.c44
1 files changed, 39 insertions, 5 deletions
diff --git a/uxd.c b/uxd.c
index dbea5f0..38c5862 100644
--- a/uxd.c
+++ b/uxd.c
@@ -77,6 +77,7 @@ int cur_normal_hilite = 0;
#define HL_NORM_INV 1
#define HL_SPECIAL 2
#define HL_BAD 3
+#define HL_OVERLONG 4
/* terminal codes for mono highlighting. */
#define MONO_NORMAL 0
@@ -417,8 +418,12 @@ void append_color(char *buf, int hl_type) {
fgcolor = special_color;
bgcolor = 0;
break;
- default:
+ case HL_OVERLONG:
+ fgcolor = 0;
+ bgcolor = special_color;
+ break;
case HL_BAD:
+ default:
fgcolor = 0;
bgcolor = bad_color;
break;
@@ -447,6 +452,7 @@ void append_mono(char *buf, int hl_type) {
code = MONO_BOLD;
break;
default:
+ case HL_OVERLONG: /* maybe change this later */
case HL_BAD:
code = MONO_REVERSE;
break;
@@ -521,6 +527,26 @@ int is_bom(unsigned char *b) {
return (b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
}
+/* Detect overlong encodings, without doing a full decode. */
+int is_overlong(int cont_count, unsigned char *b) {
+ /* 1 byte seqs are never overlong. */
+ if(!cont_count)
+ return 0;
+
+ /* 2 byte seqs, if the first byte is 0xc0 or 0xc1, it's overlong. */
+ if(cont_count == 1 && b[0] <= 0xc1)
+ return 1;
+
+ /* for 3 and 4 byte seqs, it's the 2nd byte that matters. */
+ if(cont_count == 2 && b[1] <= 0x9f)
+ return 1;
+
+ if(cont_count == 3 && b[1] <= 0x8f)
+ return 1;
+
+ return 0;
+}
+
/* U+10FFFF is the last valid codepoint. It encodes to f4 8f bf bf.
'count' is the count of continuation bytes only (so, 3 for a 4-byte
sqeuence). */
@@ -560,7 +586,7 @@ int dump_utf8_char(void) {
unsigned char bytes[] = { 0, 0, 0, 0, 0 };
unsigned char *cont_bytes = bytes + 1;
char *printable;
- int bad = 0, special = 0, hl_type;
+ int bad = 0, special = 0, overlong = 0, hl_type;
int c, cont_count, i;
static int byte0;
@@ -625,7 +651,10 @@ int dump_utf8_char(void) {
if(is_out_of_range(cont_count, bytes))
bad = 1;
- if(bad) {
+ if(is_overlong(cont_count, bytes))
+ overlong = 1;
+
+ if(bad || overlong) {
bad_count++;
} else {
char_count++;
@@ -644,8 +673,13 @@ int dump_utf8_char(void) {
hl_type = HL_SPECIAL;
printable = PRINT_BOM;
} else {
- hl_type = HL_NORMAL;
- printable = (char *)bytes;
+ if(overlong) {
+ hl_type = HL_OVERLONG;
+ printable = PRINT_BAD;
+ } else {
+ hl_type = HL_NORMAL;
+ printable = (char *)bytes;
+ }
}
/* human-readable (right) column: */