--- convmv-1.10/convmv.orig 2007-09-17 10:54:18.000000000 +0900 +++ convmv-1.10/convmv 2007-09-23 11:09:22.000000000 +0900 @@ -267,6 +267,8 @@ $maxfilenamelength=255; # $maxpathlength=4096; # this might be used somehow, somewhere? +$RE_NORMALIZE='[^\x{2000}-\x{2fff}\x{f900}-\x{faff}\x{2f800}-\x{2faff}]'; + &listvalidencodings and exit 0 if ($opt_list); &printusage and exit 1 if (!@ARGV or $opt_help); @@ -301,10 +303,10 @@ } if ($opt_nfc) { - $norm=\&NFC; + $norm=\&modified_nfc; die "NFC requires UTF-8 as target charset\n" unless ($to_is_utf8); } elsif ($opt_nfd) { - $norm=\&NFD; + $norm=\&modified_nfd; die "NFD requires UTF-8 as target charset\n" unless ($to_is_utf8); } else { $norm=\&dummy; @@ -352,6 +354,22 @@ ## subs ### +sub modified_nfd { + $string=shift; + no bytes; + $string =~ s/(${RE_NORMALIZE}+)/NFD($1)/geo; + use bytes; + return $string; +} + +sub modified_nfc { + $string=shift; + no bytes; + $string =~ s/(${RE_NORMALIZE}+)/NFC($1)/geo; + use bytes; + return $string; +} + # scan for real files and check charset first: sub scan { $arg=$_; @@ -434,7 +452,7 @@ } else { if ($from_is_utf8 and ! $to_is_utf8) { # from_to can't convert from NFD to non-UTF-8! - $newname=encode_utf8(NFC(decode_utf8($oldfile))); + $newname=encode_utf8(modified_nfc(decode_utf8($oldfile))); } else { $newname=$oldfile; } @@ -517,7 +535,7 @@ $new = &$norm($new); $filenamelength=length($new); } else { - $new=encode_utf8(NFC(decode_utf8($new))); + $new=encode_utf8(modified_nfc(decode_utf8($new))); $filenamelength=from_to($new, "utf8", $opt_t, Encode::FB_QUIET); } ## print "$oldfile|$utf8oldfile|$new|$filenamelength\n";