Apache-MimeXML

 view release on metacpan or  search on metacpan

MimeXML.pm  view on Meta::CPAN

0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F);

sub handler {
	my $r = shift;
	
	return DECLINED unless -e $r->finfo;
	return DECLINED if -d $r->finfo;
		
	my $encoding = check_for_xml($r->filename);
	
	if ($encoding) {
		my $type = $r->dir_config('XMLMimeType') || 'application/xml';

		if ($encoding eq 'utf-16-be') {
			$encoding = $r->dir_config('XMLUtf16EncodingBE') || 'utf-16';
			$type =~ s/^text\/xml$/application\/xml/;
		}
		elsif ($encoding eq 'utf-16-le') {
			$encoding = $r->dir_config('XMLUtf16EncodingLE') || 'utf-16-le';
			$type =~ s/^text\/xml$/application\/xml/;
		}
		
		$r->notes('is_xml', 1);
		$r->push_handlers('PerlFixupHandler', 
				sub { 
					my $r = shift;
					$r->content_type("$type; charset=$encoding");
					return OK;
				});
	}

	return DECLINED;
}

sub check_for_xml {
	my $filename = shift;
	
	my $firstline;
	
	if (ref($filename) && UNIVERSAL::isa($filename, 'IO::Handler')) {
		my $fh = $filename;
		binmode $fh;
		sysread($fh, $firstline, 200); # Read 200 bytes. This is a guestimate...
	}
	else {
		eval {
			my $fh = *{$filename}{IO};
			binmode $fh;
			sysread($fh, $firstline, 200); # Read 200 bytes. This is a guestimate...
		};
		if ($@) {
			eval {
				open(FH, $filename) or die "Open failed: $!";
				binmode FH;
				sysread(FH, $firstline, 200); # Read 200 bytes. This is a guestimate...
				close FH;
			};
			if ($@) {
				warn "failed? $@\n";
				return;
			}
		}
	}
	
	if (substr($firstline, 0, 2) eq $feff) {
		# Probably utf-16
		if ($firstline =~ m/^$feff\x00<\x00\?\x00x\x00m\x00l/) {
			return 'utf-16-be';
		}
	}
	elsif (substr($firstline, 0, 2) eq $fffe) {
		# Probably utf-16-little-endian...
		if ($firstline =~ m/^$fffe<\x00\?\x00x\x00m\x00l\x00/) {
			return 'utf-16-le';
		}
	}
	elsif (substr($firstline, 0, 1) eq chr(0x4C)) {
		# Possibly ebdic...
		if ($firstline =~ m/^\x4C\x6F\xA7\x94\x93(.*?)\x6F\x6E/s) {
			my $attribs = $1;
			
			# EBCDIC things we need to know...
			# encoding = 85 95 83 96 84 89 95 87
			# whitespace = [ 40 05 0D 25 ]
			# quote/apos = [ 7F 7D ]
			# '=' = 7E

			my $ws = '\x40\x05\x0d\x25';

			if ($attribs =~ m/\x85\x95\x83\x96\x84\x89\x95\x87[$ws]*\x7e[$ws]*(\x7f|\x7d)(.*?)\1/s) {
				my $encoding = $2;
				$encoding =~ s/(.)/chr($ebasci[ord($1)])/eg;
				return $encoding;
			}
		}
	}
	else {
		if ($firstline =~ m/^<\?xml(.*?)\?>/s) {
			my $attribs = $1;
			if ($attribs =~ m/encoding[\s]*=[\s]*(["'])(.*?)\1/s) {
				return $2;
			}
			else {
				# Assume utf-8
				return 'utf-8';
			}
		}
	}

	return;
}

1;
__END__



( run in 0.781 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )