open(IN, "<$ARGV[0]") or die "$!"; # Input as default encoding
my $file = do { local $/;
close(IN);
if ($file =~ /]+encoding[\s\x0d\x0a]*=[\s\x0d\x0a]*['"]utf-?8/i ||
$file =~ /]+charset[\s\x0d\x0a]*=[\s\x0d\x0a]*utf-?8/i) {
$file = decode('utf8', $file);
}
$file =~ s/(.)/asciiize($1)/eg; # Process by char
sub asciiize {
return $_[0] if (ord($_[0]) < 128); # ASCII
return sprintf('&#x%04X;', ord($_[0])); # Non-ASCII
}
print $file;
source: http://ripary.com/utf8.html