Apache-Request-I18N

 view release on metacpan or  search on metacpan

I18N.pm  view on Meta::CPAN

package Apache::Request::I18N;

use 5.008;
use strict;
use warnings;

use Apache::Request 0.32;
use Carp;
use Encode qw(decode_utf8 encode_utf8);

our @ISA = 'Apache::Request';

our $VERSION = '0.08';


=head1 NAME

Apache::Request::I18N - Internationalization extension to Apache::Request


=head1 SYNOPSIS

  use Apache::Request::I18N;
  my $apr = Apache::Request::I18N->new($r, DECODE_PARMS => 'utf-8');

Or, add something like this to your Apache F<httpd.conf>:

  PerlModule Apache::Request::I18N;

  <Location ...>
  SetHandler  perl-script
  PerlHandler Apache::Request::I18N <your other handlers ...>
  PerlSetVar  DecodeParms  utf-8
  </Location>


=head1 DESCRIPTION

I<Apache::Request::I18N> adds transparent support over I<Apache::Request> for
internationalized GET/POST parameters.  Form field names and values are
automatically decoded and converted either to Perl's internal UTF-8 format, or
to another character encoding.

Since this module inherits from I<Apache::Request>, it can be used as a
drop-in replacement.  (It is not a B<perfect> replacement, though; see
L<"COMPATIBILITY ISSUES"> below.)  It can also be used in a I<PerlHandler>
directive, in which case all subsequent handlers will -- if they play nicely
-- automatically see the converted names and values.


=head1 CONSTRUCTORS

=over 2

=item new( REQ [, OPTIONS ] )

Creates and returns a new I<Apache::Request::I18N> object.  REQ is the
I<Apache> or I<Apache::Request> associated with the current request.

OPTIONS is an optional list of name/value pairs.  Each option also has a
corresponding I<mod_perl> variable (listed in parentheses) that can be set via
I<PerlSetVar> in F<httpd.conf>.  Values in OPTIONS take precedence.  The
available options are:

=over 4

=item DECODE_PARMS (I<DecodeParms>)

I<Required>.  Declares the character encoding that will be used by default

I18N.pm  view on Meta::CPAN


Equivalent to the I<instance>() method in I<Apache::Request>, except that this
method will return a I<Apache::Request::I18N> object.  Subsequent calls to
I<< Apache::Request->instance >>() will also return the same object.  It is
allowed to call I<< Apache::Request->instance >>() beforehand.

=cut

sub instance {
	my ($class, $r, @args) = @_;

	return unless defined $r;

	my $apreq = $r->pnotes('apreq');

	# Instanciate ourself if necessary; we don't check isa($class) because
	# that only requires reblessing, handled below.
	unless ($apreq && $apreq->isa(__PACKAGE__)) {
		$apreq = $class->new($apreq || $r, @args);
		$r->pnotes('apreq', $apreq);
	}

	# Rebless if we've been called from a subclass
	if ($apreq && ! $apreq->isa($class)) {
		bless $apreq, $class;
	}

	return $apreq;
}

=back

=head1 METHODS

Almost all I<Apache::Request> methods are supported (see L<"COMPATIBILITY
ISSUES"> below for a list of exceptions), and will properly return values
according to ENCODE_PARMS.  (I<Apache> methods, like I<args>(), are not
affected by this module.)

All arguments passed to a method must be encoded to ENCODE_PARMS beforehand,
unless ENCODE_PARMS is empty.  This also applies to each key/value of any
I<Apache::Table> passed to I<parms>().

=cut

sub param {
	my $self = shift;

	# If the parameters are already encoded (ie. EncodeParms is not blank)
	# then our job is done.  Otherwise, we have to decode from UTF-8.
	#
	# TODO: Should we bother to re-encode?
	return $self->SUPER::param(@_) if $self->encode_parms;

	# param() is identical to parms() in scalar context
	return $self->parms if !wantarray && !@_;

	# Encode everything back to UTF-8.  (The second argument may be an
	# array reference.)
	my @args = map ref($_)
				? [ map encode_utf8($_), @$_ ]
				: encode_utf8($_),
			@_;

	# param() is context-sensitive
	if (wantarray) {
		return map decode_utf8($_), $self->SUPER::param(@args);
	} else {
		return decode_utf8 scalar $self->SUPER::param(@args);
	}
}

sub parms {
	my $self = shift;

	# parms() in list context returns an Apache::Table, which cannot
	# handle wide characters, so we croak if ENCODE_PARMS is empty.
	# (Maybe we could subclass Apache::Table and perform some magic?)

	carp 'Calling parms() with empty ENCODE_PARMS is unsupported'
		unless $self->encode_parms;
	
	return $self->SUPER::parms(@_);
}

sub upload {
	my ($self, $arg) = @_;

	my $upload_class = ref($self);
	$upload_class =~ s/\bRequest\b/Upload/;
	unless ($upload_class->isa('Apache::Upload::I18N')) {
		no strict 'refs';
		carp "\@$upload_class\::ISA should contain Apache::Upload::I18N";
		push @{"$upload_class\::ISA"}, 'Apache::Upload::I18N';
	}
	
	# upload(UPLOAD) is implemented, but undefined, so there's little
	# harm in not supporting it...
	if (UNIVERSAL::isa($arg, 'Apache::Upload')) {
		carp 'Calling upload($upload) is unsupported';
		return $self->SUPER::upload($arg);
	}

	unless ($self->{_uploads}) {
		my @uploads = $self->SUPER::upload;
		my %uploads;
		foreach (@uploads) {
			$upload_class->rebless($_, $self);
			push @{ $uploads{ $_->name } }, $_;
		}
		$self->{_uploads} = \@uploads;
		$self->{_uploads_hash} = \%uploads;
	}

	if (defined $arg) {
		my $uploads = $self->{_uploads_hash}{$arg};
		return unless $uploads;
		return wantarray ? @$uploads : $uploads->[0];
	} else {
		return wantarray
			? @{ $self->{_uploads} }
			: $self->{_uploads}[0];
	}
}

=head2 Additional methods

=over

=item decode_parms()

=item encode_parms()

Returns the current DECODE_PARMS or ENCODE_PARMS value.

=cut

sub decode_parms { $_[0]->{_decode_parms} }
sub encode_parms { $_[0]->{_encode_parms} }

=back

=cut


# Our core decode/encode functions.  If encode_parms is empty, we still need
# to encode to UTF-8, since libapreq won't handle wide characters.
sub _decode { Encode::decode($_[2] || $_[0]->decode_parms,  $_[1]) }
sub _encode { Encode::encode($_[0]->encode_parms || 'utf8', $_[1]) }

# Handling of Content-Disposition parameter values (form field names and
# filenames in multipart/form-data) is a bit tricky.  RFC 2047 clearly states
# (section 5) that parameter values cannot contain any encoded-word; however,
# RFC 1867 actually recommended using encoded-word for such purposes, and
# there are reports of browsers doing just that.  So, we support it anyway.
#
# Many browsers don't even bother encoding parameter values, and send them in
# whatever encoding is used for the contents of each HTTP entity.  So, if we
# can't find any encoded-word, we try the usual decoding method.
#
# Proper encoding of parameter values is defined in RFC 2184; unfortunately,
# libapreq does not recognize this format, so we can't support it.

{{
my $SPACE	 = '\040';
my $CTL		 = '\000-\037\377';
my $especials	 = quotemeta '()<>@,;:\\"/[]?.=';

my $token	 = qr/ [^ $SPACE $CTL $especials ]+ /x;
my $charset	 = $token;
my $language	 = $token;
my $encoding	 = $token;
my $encoded_text = qr/ [ \041-\076 \100-\176 ]+ /x;
my $encoded_word = qr/ =\? $charset (?: \* $language )? \? $encoding \?
							$encoded_text \?= /x;

sub _decode_value {
	my ($self, $value) = @_;

	if ($value =~ /$encoded_word/o) {
		return Encode::decode('MIME-Header', $value);
	} else {
		return $self->_decode($value);
	}
}
}}

# Decode all parameters, and re-encode them in ENCODE_PARMS (or UTF-8 if no
# ENCODE_PARMS has been defined, in which case we'll decode them back when
# they are read).

use Apache::Table;
use HTTP::Headers::Util qw(split_header_words);
sub _mangle_parms {
	my ($self) = @_;

	# Remember which arguments were passed on the query string
	# 
	# This used to call Apache->args, but it doesn't behave so well with
	# ill-formed query strings.  Apache::Request->query_params would be
	# nice, but it was introduced in 1.3, and Debian sarge only has 1.1.
	my %args = map { defined $_ ? $_ : '' }
			map Apache::unescape_url_info(defined $_ ? $_ : ''),
				map /^([^=]*)(?:=(.*))?/,
					split /[&;]+/ => $self->query_string;

	# Extract the Content-Type charset for x-www-form-urlencoded
	my ($is_urlenc, $charset);
	my ($ctype) = split_header_words($self->header_in('Content-Type'));



( run in 1.128 second using v1.01-cache-2.11-cpan-524268b4103 )