Apache-GuessCharset

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

Revision history for Perl extension Apache::GuessCharset

0.03  Sat May 11 13:31:31 JST 2002
	* handles any text/* requests without charset attribute
	* charset attribute is now lowercased
	- returns DECLINED if not in main request
	- added mock test code

0.02  Fri May  3 22:27:43 JST 2002
	- uses $r->finfo and fixed some style issue
	  (Thanks to Geoffrey Young <geoff@modperlcookbook.org>)
	- CPAN release

0.01  Tue Apr 30 14:33:20 2002
	- original version

README  view on Meta::CPAN

NAME
    Apache::GuessCharset - adds HTTP charset by guessing file's encoding

SYNOPSIS
      PerlModule Apache::GuessCharset
      SetHandler perl-script
      PerlFixupHandler Apache::GuessCharset

      # how many bytes to read for guessing (default 512)
      PerlSetVar GuessCharsetBufferSize 1024

      # list of encoding suspects
      PerlSetVar GuessCharsetSuspects euc-jp
      PerlAddVar GuessCharsetSuspects shiftjis
      PerlAddVar GuessCharsetSuspects 7bit-jis

DESCRIPTION
    Apache::GuessCharset is an Apache handler which adds HTTP charset
    attribute by automaticaly guessing file' encodings via Encode::Guess.

CONFIGURATION
    This module uses following configuration variables.

    GuessCharsetSuspects
        a list of encodings for "Encode::Guess" to check. See the
        Encode::Guess manpage for details.

    GuessCharsetBufferSize

lib/Apache/GuessCharset.pm  view on Meta::CPAN

    "ISO_8859-5:1988" => "ISO-8859-5",
    "ISO_8859-9:1989" => "ISO-8859-9",
    "Extended_UNIX_Code_Packed_Format_for_Japanese" => "EUC-JP",
);

sub handler {
    my $r = shift;
    return DECLINED if
	! $r->is_main                  or
	$r->content_type !~ m@^text/@  or
	$r->content_type =~ /charset=/ or
	! -e $r->finfo                 or
	-d _                           or
	!(my $chunk = read_chunk($r));

    my @suspects = $r->dir_config->get('GuessCharsetSuspects');
    my $enc  = guess_encoding($chunk, @suspects);
    unless (ref $enc) {
	warn "Couldn't guess encoding: $enc" if $DEBUG;
	return DECLINED;
    }

    my $iana    = iana_charset_name($enc->name);
    my $charset = lc($Prefered_MIME{$iana} || $iana); # lowercased
    warn "Guessed: $charset" if $DEBUG;
    $r->content_type($r->content_type . "; charset=$charset");
    return OK;
}

sub read_chunk {
    my $r  = shift;
    my $fh = Apache::File->new($r->filename) or return;
    my $buffer_size = $r->dir_config('GuessCharsetBufferSize') || 512;
    read $fh, my($chunk), $buffer_size;
    return $chunk;
}

1;
__END__

=head1 NAME

Apache::GuessCharset - adds HTTP charset by guessing file's encoding

=head1 SYNOPSIS

  SetHandler perl-script
  PerlFixupHandler +Apache::GuessCharset

  # how many bytes to read for guessing (default 512)
  PerlSetVar GuessCharsetBufferSize 1024

  # list of encoding suspects
  PerlSetVar GuessCharsetSuspects euc-jp
  PerlAddVar GuessCharsetSuspects shiftjis
  PerlAddVar GuessCharsetSuspects 7bit-jis

=head1 DESCRIPTION

Apache::GuessCharset is an Apache fix-up handler which adds HTTP
charset attribute by automaticaly guessing text files' encodings via
Encode::Guess.

=head1 CONFIGURATION

This module uses following configuration variables.

=over 4

=item GuessCharsetSuspects

t/01_guess.t  view on Meta::CPAN

	is_main => 1,
	filename => "t/sjis.html",
	content_type => 'text/html',
	dir_config => {
	    GuessCharsetSuspects => [ qw(euc-jp shiftjis 7bit-jis) ],
	},
    );

    my $code = Apache::GuessCharset::handler($r);
    is $code, Apache::Constants::OK, 'status code is OK';
    is $r->content_type, 'text/html; charset=shift_jis', 'encoding is shift_jis';
}

{
    my $r = Apache::FakeRequest->new(
	is_main => 1,
	filename => "t/sjis.html",
	content_type => 'text/plain',
	dir_config => {
	    GuessCharsetSuspects => [ qw(shiftjis) ],
	},
    );

    my $code = Apache::GuessCharset::handler($r);
    is $code, Apache::Constants::OK, 'status code is OK: should work with text/plain';
    is $r->content_type, 'text/plain; charset=shift_jis', 'encoding is shift_jis';
}

{
    my $r = Apache::FakeRequest->new(
	is_main => 1,
	filename => "t",
	content_type => 'text/plain',
	dir_config => {
	    GuessCharsetSuspects => [ qw(shiftjis) ],
	},



( run in 0.275 second using v1.01-cache-2.11-cpan-4d50c553e7e )