aboutsummaryrefslogtreecommitdiff
path: root/uxd.c
diff options
context:
space:
mode:
Diffstat (limited to 'uxd.c')
-rw-r--r--uxd.c34
1 files changed, 25 insertions, 9 deletions
diff --git a/uxd.c b/uxd.c
index e839330..b30b22b 100644
--- a/uxd.c
+++ b/uxd.c
@@ -150,6 +150,9 @@ char *dump_data_arg = NULL; /* -d */
long dump_data_idx = 0; /* -d */
int term_utf8 = 0; /* -t, -T */
int restore_term = 0; /* -T only */
+int java_mode = 0; /* -j */
+int wtf8_mode = 0; /* -w */
+int permissive = 0; /* -l */
/* stats for -i option */
long byte_count = 0;
@@ -265,8 +268,14 @@ void parse_args(int argc, char **argv) {
version();
}
- while((opt = my_getopt(argc, argv, "tTd:1ic:nbl:rmo:S:s:uhv")) != -1) {
+ while((opt = my_getopt(argc, argv, "jwptTd:1ic:nbl:rmo:S:s:uhv")) != -1) {
switch(opt) {
+ case 'j':
+ java_mode = 1; break;
+ case 'w':
+ wtf8_mode = 1; break;
+ case 'p':
+ permissive = 1; break;
case 't':
term_utf8 = restore_term = 1; break;
case 'T':
@@ -551,6 +560,10 @@ int is_overlong(int cont_count, unsigned char *b) {
if(!cont_count)
return 0;
+ /* java mode (MUTF-8) allows exactly one overlong: */
+ if(java_mode && cont_count == 1 && b[0] == 0xc0 && b[1] == 0x80)
+ return 0;
+
/* 2 byte seqs, if the first byte is 0xc0 or 0xc1, it's overlong. */
if(cont_count == 1 && b[0] <= 0xc1)
return 1;
@@ -673,15 +686,18 @@ int dump_utf8_char(void) {
}
}
- /* don't check bad sequences for out-of-range or surrogate */
- if(!bad) {
- if(is_out_of_range(cont_count, bytes) || is_surrogate(cont_count, bytes))
- bad = 1;
- }
-
+ if(!permissive) {
+ /* don't check bad sequences for out-of-range or surrogate */
+ if(!bad) {
+ if(is_out_of_range(cont_count, bytes))
+ bad = 1;
+ else if((!wtf8_mode) && is_surrogate(cont_count, bytes))
+ bad = 1;
+ }
- if(is_overlong(cont_count, bytes))
- overlong = 1;
+ if(is_overlong(cont_count, bytes))
+ overlong = 1;
+ }
if(bad || overlong) {
bad_count++;