HTTP-OAI

 view release on metacpan or  search on metacpan

lib/HTTP/OAI/UserAgent.pm  view on Meta::CPAN

sub request
{
	my( $self, @args ) = @_;

	my $delay = $self->delay;
	if( defined $delay )
	{
		if( ref($delay) eq "CODE" )
		{
			$delay = &$delay( $self->last_request_completed );
		}
		select(undef,undef,undef,$delay) if $delay > 0;
	}

	my $r = $self->SUPER::request( @args );

	$self->last_request_completed( time );

	return $r;
}

sub lwp_badchar
{
	my $codepoint = sprintf('U+%04x', ord($_[2]));
	unless( $SILENT_BAD_CHARS )
	{
		warn "Bad Unicode character $codepoint at byte offset ".$_[1]->{content_length}." from ".$_[1]->{request}->uri."\n";
	}
	return $codepoint;
}

sub lwp_endparse
{
	my( $self, $parser ) = @_;

	my $utf8 = $parser->{content_buffer};
	# Replace bad chars with '?'
	if( $IGNORE_BAD_CHARS and length($utf8) ) {
		$utf8 = Encode::decode('UTF-8', $utf8, sub { $self->lwp_badchar($parser, @_) });
	}
	if( length($utf8) > 0 )
	{
		_ccchars($utf8); # Fix control chars
		$parser->{content_length} += length($utf8);
		$parser->parse_chunk($utf8);
	}
	delete($parser->{content_buffer});
	$parser->parse_chunk('', 1);
}

sub lwp_callback
{
	my( $self, $parser ) = @_;

	use bytes; # fixing utf-8 will need byte semantics

	$parser->{content_buffer} .= $_[2];

	do
	{
		# FB_QUIET won't split multi-byte chars on input
		my $utf8 = Encode::decode('UTF-8', $parser->{content_buffer}, Encode::FB_QUIET);

		if( length($utf8) > 0 )
		{
			use utf8;
			_ccchars($utf8); # Fix control chars
			$parser->{content_length} += length($utf8);
			$parser->parse_chunk($utf8);
		}

		if( length($parser->{content_buffer}) > MAX_UTF8_BYTES )
		{
			$parser->{content_buffer} =~ s/^([\x80-\xff]{1,4})//s;
			my $badbytes = $1;
			if( length($badbytes) == 0 )
			{
				Carp::confess "Internal error - bad bytes but not in 0x80-0xff range???";
			}
			if( $IGNORE_BAD_CHARS )
			{
				$badbytes = join('', map {
					$self->lwp_badchar($parser, $_)
				} split //, $badbytes);
			}
			$parser->parse_chunk( $badbytes );
		}
	} while( length($parser->{content_buffer}) > MAX_UTF8_BYTES );
}

sub _ccchars {
	$_[0] =~ s/([\x00-\x08\x0b-\x0c\x0e-\x1f])/sprintf("\\%04d",ord($1))/seg;
}

sub _buildurl {
	my( $self, %args ) = @_;

	Carp::confess "Requires verb parameter" unless $args{'verb'};

	my $uri = URI->new( $self->baseURL );
	return $uri->as_string if $uri->scheme eq "file";

	if( defined($args{resumptionToken}) && !$args{force} ) {
		$uri->query_form(verb=>$args{'verb'},resumptionToken=>$args{'resumptionToken'});
	} else {
		delete $args{force};
		# http://www.cshc.ubc.ca/oai/ breaks if verb isn't first, doh
		$uri->query_form(verb=>delete($args{'verb'}),%args);
	}

	return $uri->as_string;
}

sub decompress {
	my ($response) = @_;
	my $type = $response->headers->header("Content-Encoding");
	return $response->{_content_filename} unless defined($type);
	if( $type eq 'gzip' ) {
		my $filename = File::Temp->new( UNLINK => 1 );
		my $gz = Compress::Zlib::gzopen($response->{_content_filename}, "r") or die $!;
		my ($buffer,$c);



( run in 2.003 seconds using v1.01-cache-2.11-cpan-71847e10f99 )