diff options
Diffstat (limited to 'uxd.c')
-rw-r--r-- | uxd.c | 34 |
1 files changed, 25 insertions, 9 deletions
@@ -150,6 +150,9 @@ char *dump_data_arg = NULL; /* -d */ long dump_data_idx = 0; /* -d */ int term_utf8 = 0; /* -t, -T */ int restore_term = 0; /* -T only */ +int java_mode = 0; /* -j */ +int wtf8_mode = 0; /* -w */ +int permissive = 0; /* -l */ /* stats for -i option */ long byte_count = 0; @@ -265,8 +268,14 @@ void parse_args(int argc, char **argv) { version(); } - while((opt = my_getopt(argc, argv, "tTd:1ic:nbl:rmo:S:s:uhv")) != -1) { + while((opt = my_getopt(argc, argv, "jwptTd:1ic:nbl:rmo:S:s:uhv")) != -1) { switch(opt) { + case 'j': + java_mode = 1; break; + case 'w': + wtf8_mode = 1; break; + case 'p': + permissive = 1; break; case 't': term_utf8 = restore_term = 1; break; case 'T': @@ -551,6 +560,10 @@ int is_overlong(int cont_count, unsigned char *b) { if(!cont_count) return 0; + /* java mode (MUTF-8) allows exactly one overlong: */ + if(java_mode && cont_count == 1 && b[0] == 0xc0 && b[1] == 0x80) + return 0; + /* 2 byte seqs, if the first byte is 0xc0 or 0xc1, it's overlong. */ if(cont_count == 1 && b[0] <= 0xc1) return 1; @@ -673,15 +686,18 @@ int dump_utf8_char(void) { } } - /* don't check bad sequences for out-of-range or surrogate */ - if(!bad) { - if(is_out_of_range(cont_count, bytes) || is_surrogate(cont_count, bytes)) - bad = 1; - } - + if(!permissive) { + /* don't check bad sequences for out-of-range or surrogate */ + if(!bad) { + if(is_out_of_range(cont_count, bytes)) + bad = 1; + else if((!wtf8_mode) && is_surrogate(cont_count, bytes)) + bad = 1; + } - if(is_overlong(cont_count, bytes)) - overlong = 1; + if(is_overlong(cont_count, bytes)) + overlong = 1; + } if(bad || overlong) { bad_count++; |