четверг, 5 ноября 2009 г.

Processing UTF-8 Files with Perl

use Encode;

open(IN, "<$ARGV[0]") or die "$!"; # Input as default encoding
my $file = do { local $/; }; # Read file contents into scalar
close(IN);
if ($file =~ /]+encoding[\s\x0d\x0a]*=[\s\x0d\x0a]*['"]utf-?8/i ||
$file =~ /]+charset[\s\x0d\x0a]*=[\s\x0d\x0a]*utf-?8/i) {
$file = decode('utf8', $file);
}

$file =~ s/(.)/asciiize($1)/eg; # Process by char

sub asciiize {
return $_[0] if (ord($_[0]) < 128); # ASCII
return sprintf('&#x%04X;', ord($_[0])); # Non-ASCII
}


print $file;


source: http://ripary.com/utf8.html

Комментариев нет: