BOM results from the CPAN

Parse-AFP
view release on metacpan or search on metacpan
	if ($key eq 'Data') {
	    print "<tr><td colspan='2' class='item'>$x</td></tr>\n";
	}
	else {
	    print "<tr><td class='label'>$key</td><td class='item'>$x</td></tr>\n";
	}
    }

    print "</table>";
    if ($obj->has_members) {
	print "<ol>";
	dump_members($obj);
	print "</ol>";
    }
}

sub dump_members {
    my $obj = shift;
    while (my $rec = $obj->next_member) {
	my $type = substr(ref($rec), 12);
	print "<li><div><strong>$type</strong>";
	print " &ndash; $desc{$type}" if exists $desc{$type};
	print "</div>";
	dump_afp($rec);
	print "</li>";
    }
}

use constant Header => << '.';
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html><head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'><style type='text/css'><!--
body { background: #e0e0e0; font-family: times new roman, times; margin-left: 20px }
h1 { font-family: times }
span.hex { font-family: andale mono, courier }
ol { border-left: 1px dotted black }
ol.top { border-left: none }
table { font-size: small; border-left: 1px dotted black; padding-left: 6pt; width: 100% }
td.label { background: #d0d0d0; font-family: arial unicode ms, helvetica }
td.item { background: white; width: 100%; font-family: arial unicode ms, helvetica }
div { text-decoration: underline; background: #e0e0ff; font-family: arial unicode ms, helvetica }
--></style><title>AFP Dump</title></head><body>
.

1;

no warnings 'redefine';
package Encode::Guess;
sub guess {
    my $class = shift;
    my $obj   = ref($class) ? $class : $Encode::Encoding{$Canon};
    my $octet = shift;

    # sanity check
    return unless defined $octet and length $octet;

    # cheat 0: utf8 flag;
    if ( Encode::is_utf8($octet) ) {
	return find_encoding('utf8') unless $NoUTFAutoGuess;
	Encode::_utf8_off($octet);
    }
    # cheat 1: BOM
    use Encode::Unicode;
    unless ($NoUTFAutoGuess) {
	my $BOM = unpack('n', $octet);
	return find_encoding('UTF-16')
	    if (defined $BOM and ($BOM == 0xFeFF or $BOM == 0xFFFe));
	$BOM = unpack('N', $octet);
	return find_encoding('UTF-32')
	    if (defined $BOM and ($BOM == 0xFeFF or $BOM == 0xFFFe0000));
	if ($octet =~ /\x00/o){ # if \x00 found, we assume UTF-(16|32)(BE|LE)
	    my $utf;
	    my ($be, $le) = (0, 0);
	    if ($octet =~ /\x00\x00/o){ # UTF-32(BE|LE) assumed
		$utf = "UTF-32";
		for my $char (unpack('N*', $octet)){
		    $char & 0x0000ffff and $be++;
		    $char & 0xffff0000 and $le++;
		}
	    }else{ # UTF-16(BE|LE) assumed
		$utf = "UTF-16";
		for my $char (unpack('n*', $octet)){
		    $char & 0x00ff and $be++;
		    $char & 0xff00 and $le++;
		}
	    }
	    DEBUG and warn "$utf, be == $be, le == $le";
	    $be == $le 
		and return
		    "Encodings ambiguous between $utf BE and LE ($be, $le)";
	    $utf .= ($be > $le) ? 'BE' : 'LE';
	    return find_encoding($utf);
	}
    }
    my %try =  %{$obj->{Suspects}};
    for my $c (@_){
	my $e = find_encoding($c) or die "Unknown encoding: $c";
	$try{$e->name} = $e;
	DEBUG and warn "Added: ", $e->name;
    }
    my $nline = 1;
    for my $line (split /\r\n?|\n/, $octet){
	# cheat 2 -- \e in the string
	if ($line =~ /\e/o){
	    my @keys = keys %try;
	    delete @try{qw/utf8 ascii/};
	    for my $k (@keys){
		ref($try{$k}) eq 'Encode::XS' and delete $try{$k};
	    }
	}
	my %ok = %try;
	# warn join(",", keys %try);
	for my $k (keys %try){
	    my $scratch = $line;
	    $try{$k}->decode($scratch, FB_QUIET);
	    if ($scratch eq ''){
		DEBUG and warn sprintf("%4d:%-24s ok\n", $nline, $k);
	    }else{
		use bytes ();
		DEBUG and 
		    warn sprintf("%4d:%-24s not ok; %d bytes left\n", 
				 $nline, $k, bytes::length($scratch));
		delete $ok{$k};
	    }
	}
	%ok or return "No appropriate encodings found!";
	if (scalar(keys(%ok)) >= 1){
	    my ($retval) = sort values(%ok);
	    return $retval;
	}
( run in 1.370 second using v1.01-cache-2.11-cpan-df04353d9ac )