Apache-GuessCharset
view release on metacpan or search on metacpan
Revision history for Perl extension Apache::GuessCharset
0.03 Sat May 11 13:31:31 JST 2002
* handles any text/* requests without charset attribute
* charset attribute is now lowercased
- returns DECLINED if not in main request
- added mock test code
0.02 Fri May 3 22:27:43 JST 2002
- uses $r->finfo and fixed some style issue
(Thanks to Geoffrey Young <geoff@modperlcookbook.org>)
- CPAN release
0.01 Tue Apr 30 14:33:20 2002
- original version
NAME
Apache::GuessCharset - adds HTTP charset by guessing file's encoding
SYNOPSIS
PerlModule Apache::GuessCharset
SetHandler perl-script
PerlFixupHandler Apache::GuessCharset
# how many bytes to read for guessing (default 512)
PerlSetVar GuessCharsetBufferSize 1024
# list of encoding suspects
PerlSetVar GuessCharsetSuspects euc-jp
PerlAddVar GuessCharsetSuspects shiftjis
PerlAddVar GuessCharsetSuspects 7bit-jis
DESCRIPTION
Apache::GuessCharset is an Apache handler which adds HTTP charset
attribute by automaticaly guessing file' encodings via Encode::Guess.
CONFIGURATION
This module uses following configuration variables.
GuessCharsetSuspects
a list of encodings for "Encode::Guess" to check. See the
Encode::Guess manpage for details.
GuessCharsetBufferSize
lib/Apache/GuessCharset.pm view on Meta::CPAN
"ISO_8859-5:1988" => "ISO-8859-5",
"ISO_8859-9:1989" => "ISO-8859-9",
"Extended_UNIX_Code_Packed_Format_for_Japanese" => "EUC-JP",
);
sub handler {
my $r = shift;
return DECLINED if
! $r->is_main or
$r->content_type !~ m@^text/@ or
$r->content_type =~ /charset=/ or
! -e $r->finfo or
-d _ or
!(my $chunk = read_chunk($r));
my @suspects = $r->dir_config->get('GuessCharsetSuspects');
my $enc = guess_encoding($chunk, @suspects);
unless (ref $enc) {
warn "Couldn't guess encoding: $enc" if $DEBUG;
return DECLINED;
}
my $iana = iana_charset_name($enc->name);
my $charset = lc($Prefered_MIME{$iana} || $iana); # lowercased
warn "Guessed: $charset" if $DEBUG;
$r->content_type($r->content_type . "; charset=$charset");
return OK;
}
sub read_chunk {
my $r = shift;
my $fh = Apache::File->new($r->filename) or return;
my $buffer_size = $r->dir_config('GuessCharsetBufferSize') || 512;
read $fh, my($chunk), $buffer_size;
return $chunk;
}
1;
__END__
=head1 NAME
Apache::GuessCharset - adds HTTP charset by guessing file's encoding
=head1 SYNOPSIS
SetHandler perl-script
PerlFixupHandler +Apache::GuessCharset
# how many bytes to read for guessing (default 512)
PerlSetVar GuessCharsetBufferSize 1024
# list of encoding suspects
PerlSetVar GuessCharsetSuspects euc-jp
PerlAddVar GuessCharsetSuspects shiftjis
PerlAddVar GuessCharsetSuspects 7bit-jis
=head1 DESCRIPTION
Apache::GuessCharset is an Apache fix-up handler which adds HTTP
charset attribute by automaticaly guessing text files' encodings via
Encode::Guess.
=head1 CONFIGURATION
This module uses following configuration variables.
=over 4
=item GuessCharsetSuspects
t/01_guess.t view on Meta::CPAN
is_main => 1,
filename => "t/sjis.html",
content_type => 'text/html',
dir_config => {
GuessCharsetSuspects => [ qw(euc-jp shiftjis 7bit-jis) ],
},
);
my $code = Apache::GuessCharset::handler($r);
is $code, Apache::Constants::OK, 'status code is OK';
is $r->content_type, 'text/html; charset=shift_jis', 'encoding is shift_jis';
}
{
my $r = Apache::FakeRequest->new(
is_main => 1,
filename => "t/sjis.html",
content_type => 'text/plain',
dir_config => {
GuessCharsetSuspects => [ qw(shiftjis) ],
},
);
my $code = Apache::GuessCharset::handler($r);
is $code, Apache::Constants::OK, 'status code is OK: should work with text/plain';
is $r->content_type, 'text/plain; charset=shift_jis', 'encoding is shift_jis';
}
{
my $r = Apache::FakeRequest->new(
is_main => 1,
filename => "t",
content_type => 'text/plain',
dir_config => {
GuessCharsetSuspects => [ qw(shiftjis) ],
},
( run in 0.283 second using v1.01-cache-2.11-cpan-4d50c553e7e )