HTTP-OAI
view release on metacpan or search on metacpan
lib/HTTP/OAI/UserAgent.pm view on Meta::CPAN
sub request
{
my( $self, @args ) = @_;
my $delay = $self->delay;
if( defined $delay )
{
if( ref($delay) eq "CODE" )
{
$delay = &$delay( $self->last_request_completed );
}
select(undef,undef,undef,$delay) if $delay > 0;
}
my $r = $self->SUPER::request( @args );
$self->last_request_completed( time );
return $r;
}
sub lwp_badchar
{
my $codepoint = sprintf('U+%04x', ord($_[2]));
unless( $SILENT_BAD_CHARS )
{
warn "Bad Unicode character $codepoint at byte offset ".$_[1]->{content_length}." from ".$_[1]->{request}->uri."\n";
}
return $codepoint;
}
sub lwp_endparse
{
my( $self, $parser ) = @_;
my $utf8 = $parser->{content_buffer};
# Replace bad chars with '?'
if( $IGNORE_BAD_CHARS and length($utf8) ) {
$utf8 = Encode::decode('UTF-8', $utf8, sub { $self->lwp_badchar($parser, @_) });
}
if( length($utf8) > 0 )
{
_ccchars($utf8); # Fix control chars
$parser->{content_length} += length($utf8);
$parser->parse_chunk($utf8);
}
delete($parser->{content_buffer});
$parser->parse_chunk('', 1);
}
sub lwp_callback
{
my( $self, $parser ) = @_;
use bytes; # fixing utf-8 will need byte semantics
$parser->{content_buffer} .= $_[2];
do
{
# FB_QUIET won't split multi-byte chars on input
my $utf8 = Encode::decode('UTF-8', $parser->{content_buffer}, Encode::FB_QUIET);
if( length($utf8) > 0 )
{
use utf8;
_ccchars($utf8); # Fix control chars
$parser->{content_length} += length($utf8);
$parser->parse_chunk($utf8);
}
if( length($parser->{content_buffer}) > MAX_UTF8_BYTES )
{
$parser->{content_buffer} =~ s/^([\x80-\xff]{1,4})//s;
my $badbytes = $1;
if( length($badbytes) == 0 )
{
Carp::confess "Internal error - bad bytes but not in 0x80-0xff range???";
}
if( $IGNORE_BAD_CHARS )
{
$badbytes = join('', map {
$self->lwp_badchar($parser, $_)
} split //, $badbytes);
}
$parser->parse_chunk( $badbytes );
}
} while( length($parser->{content_buffer}) > MAX_UTF8_BYTES );
}
sub _ccchars {
$_[0] =~ s/([\x00-\x08\x0b-\x0c\x0e-\x1f])/sprintf("\\%04d",ord($1))/seg;
}
sub _buildurl {
my( $self, %args ) = @_;
Carp::confess "Requires verb parameter" unless $args{'verb'};
my $uri = URI->new( $self->baseURL );
return $uri->as_string if $uri->scheme eq "file";
if( defined($args{resumptionToken}) && !$args{force} ) {
$uri->query_form(verb=>$args{'verb'},resumptionToken=>$args{'resumptionToken'});
} else {
delete $args{force};
# http://www.cshc.ubc.ca/oai/ breaks if verb isn't first, doh
$uri->query_form(verb=>delete($args{'verb'}),%args);
}
return $uri->as_string;
}
sub decompress {
my ($response) = @_;
my $type = $response->headers->header("Content-Encoding");
return $response->{_content_filename} unless defined($type);
if( $type eq 'gzip' ) {
my $filename = File::Temp->new( UNLINK => 1 );
my $gz = Compress::Zlib::gzopen($response->{_content_filename}, "r") or die $!;
my ($buffer,$c);
( run in 2.003 seconds using v1.01-cache-2.11-cpan-71847e10f99 )