From b757a7b42d6c54f431c31642cfbe0127e77cf822 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Fri, 20 Dec 2024 16:32:14 -0500 Subject: better printable forms of invalid unicode --- uxd.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/uxd.c b/uxd.c index 7724669..3b899be 100644 --- a/uxd.c +++ b/uxd.c @@ -84,6 +84,9 @@ extern int optind; /* replacement character � is U+FFFD */ #define PRINT_BAD "�" #define PRINT_BOM "B" +#define PRINT_OLONG "O" +#define PRINT_OORANGE "+" +#define PRINT_SURR "S" /* sprintf() formats for hex data */ #define LC_BYTE_FMT "%02x" @@ -643,25 +646,25 @@ char *classify_char(int *hl, unsigned char *bytes, int cont_count) { } else { *hl = HL_BAD; } - return PRINT_BAD; + return PRINT_OLONG; } if(is_surrogate(cont_count, bytes)) { if(wtf8_mode || permissive) { - *hl = HL_NORMAL; + *hl = HL_SPECIAL; } else { *hl = HL_BAD; } - return PRINT_BAD; + return PRINT_SURR; } if(is_out_of_range(cont_count, bytes)) { if(permissive) { - *hl = HL_NORMAL; + *hl = HL_SPECIAL; } else { *hl = HL_BAD; } - return PRINT_BAD; + return PRINT_OORANGE; } *hl = HL_NORMAL; -- cgit v1.2.3