Apache-MimeXML
view release on metacpan or search on metacpan
0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F);
sub handler {
my $r = shift;
return DECLINED unless -e $r->finfo;
return DECLINED if -d $r->finfo;
my $encoding = check_for_xml($r->filename);
if ($encoding) {
my $type = $r->dir_config('XMLMimeType') || 'application/xml';
if ($encoding eq 'utf-16-be') {
$encoding = $r->dir_config('XMLUtf16EncodingBE') || 'utf-16';
$type =~ s/^text\/xml$/application\/xml/;
}
elsif ($encoding eq 'utf-16-le') {
$encoding = $r->dir_config('XMLUtf16EncodingLE') || 'utf-16-le';
$type =~ s/^text\/xml$/application\/xml/;
}
$r->notes('is_xml', 1);
$r->push_handlers('PerlFixupHandler',
sub {
my $r = shift;
$r->content_type("$type; charset=$encoding");
return OK;
});
}
return DECLINED;
}
sub check_for_xml {
my $filename = shift;
my $firstline;
if (ref($filename) && UNIVERSAL::isa($filename, 'IO::Handler')) {
my $fh = $filename;
binmode $fh;
sysread($fh, $firstline, 200); # Read 200 bytes. This is a guestimate...
}
else {
eval {
my $fh = *{$filename}{IO};
binmode $fh;
sysread($fh, $firstline, 200); # Read 200 bytes. This is a guestimate...
};
if ($@) {
eval {
open(FH, $filename) or die "Open failed: $!";
binmode FH;
sysread(FH, $firstline, 200); # Read 200 bytes. This is a guestimate...
close FH;
};
if ($@) {
warn "failed? $@\n";
return;
}
}
}
if (substr($firstline, 0, 2) eq $feff) {
# Probably utf-16
if ($firstline =~ m/^$feff\x00<\x00\?\x00x\x00m\x00l/) {
return 'utf-16-be';
}
}
elsif (substr($firstline, 0, 2) eq $fffe) {
# Probably utf-16-little-endian...
if ($firstline =~ m/^$fffe<\x00\?\x00x\x00m\x00l\x00/) {
return 'utf-16-le';
}
}
elsif (substr($firstline, 0, 1) eq chr(0x4C)) {
# Possibly ebdic...
if ($firstline =~ m/^\x4C\x6F\xA7\x94\x93(.*?)\x6F\x6E/s) {
my $attribs = $1;
# EBCDIC things we need to know...
# encoding = 85 95 83 96 84 89 95 87
# whitespace = [ 40 05 0D 25 ]
# quote/apos = [ 7F 7D ]
# '=' = 7E
my $ws = '\x40\x05\x0d\x25';
if ($attribs =~ m/\x85\x95\x83\x96\x84\x89\x95\x87[$ws]*\x7e[$ws]*(\x7f|\x7d)(.*?)\1/s) {
my $encoding = $2;
$encoding =~ s/(.)/chr($ebasci[ord($1)])/eg;
return $encoding;
}
}
}
else {
if ($firstline =~ m/^<\?xml(.*?)\?>/s) {
my $attribs = $1;
if ($attribs =~ m/encoding[\s]*=[\s]*(["'])(.*?)\1/s) {
return $2;
}
else {
# Assume utf-8
return 'utf-8';
}
}
}
return;
}
1;
__END__
( run in 0.781 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )