HTTP-StreamParser

 view release on metacpan or  search on metacpan

lib/HTTP/StreamParser.pm  view on Meta::CPAN

package HTTP::StreamParser;
# ABSTRACT: streaming HTTP parser
use strict;
use warnings;
use parent qw(Mixin::Event::Dispatch);

our $VERSION = '0.101';

=head1 NAME

HTTP::StreamParser - support for streaming HTTP request/response parsing

=head1 VERSION

version 0.101

=head1 SYNOPSIS

 # For requests...
 my $req_parser = HTTP::StreamParser::Request->new;
 $req_parser->subscribe_to_event(
   http_method => sub { print "Method: $_[1]\n" },
   http_uri    => sub { print "URI:    $_[1]\n" },
   http_header => sub { print "Header: $_[1]: $_[2]\n" },
 );
 $req_parser->parse(<<'EOF');
 ...
 EOF

 # ... and responses:
 my $resp_parser = HTTP::StreamParser::Request->new;
 $resp_parser->subscribe_to_event(
   http_code   => sub { print "Code:   $_[1]\n" },
   http_status => sub { print "Status: $_[1]\n" },
   http_header => sub { print "Header: $_[1]: $_[2]\n" },
 );
 $resp_parser->parse(<<'EOF');
 ...
 EOF

=head1 DESCRIPTION

Parses HTTP requests or responses. Generates events. Should be suitable for streaming.
You may be looking for L<HTTP::Parser::XS> instead - it's at least 20x faster than
this module. If you wanted something without XS, there's L<HTTP::Parser>.

Actual implementation is in L<HTTP::StreamParser::Request> or L<HTTP::StreamParser::Response>.

Typically you'd instantiate one of these for each request you want to parse. You'd then
subscribe to the events you're interested in - for example, header information, request method,
etc. - and then start parsing via L</parse>.

=cut

use List::Util qw(min);

use constant BODY_CHUNK_SIZE => 4096;

my $CRLF = "\x0d\x0a";

=head2 new

Instantiates a new parser object.

=cut

sub new {
	my $class = shift;
	my $self = bless +{
		text => '',
	}, $class;
	$self->{state_pending} = [ $self->state_sequence ];
	$self->{state} = shift @{$self->{state_pending}};
	$self
}

=head2 parse

Adds the given data to the pending buffer, and calls the state handler to check
whether we have enough data to do some useful parsing.

=cut

sub parse {
	my $self = shift;
	my $text = shift;
	$self->{text} .= $text;
	$self->handle_state;
}

=head2 parse_state

Sets the current parse state, then calls the state handler.

=cut

sub parse_state {
	my $self = shift;
	my $state = shift;
	$self->{state} = $state;
	$self->handle_state;
}

lib/HTTP/StreamParser.pm  view on Meta::CPAN


Returns $self.

=cut

sub single_space {
	my $self = shift;
	my $buf = shift;
	return $self->next_state if $$buf =~ s{^ }{};
	return $self
}

=head2 newline

Parse the "newline" (CRLF) characters.

Returns $self.

=cut

sub newline {
	my $self = shift;
	my $buf = shift;
	return $self->next_state if $$buf =~ s{^$CRLF}{};
	return $self
}

=head2 http_body

Parse body chunks.

Returns $self.

=cut

sub http_body {
	my $self = shift;
	my $buf = shift;
	while(length $$buf) {
		my $chunk = substr $$buf, 0, min(BODY_CHUNK_SIZE, length($$buf), $self->{remaining} // ()), '';
		$self->{remaining} -= length $chunk if defined $self->{remaining};
		$self->invoke_event(http_body_chunk => $chunk, $self->{remaining});
	}
	$self->invoke_event(http_body_end =>) if 0 == ($self->{remaining} // 1);
	return $self
}

1;

__END__

=head1 SEE ALSO

=over 4

=item * L<HTTP::Parser::XS> - used by several other modules, fast implementation, pure-Perl fallback,
but doesn't give access to the data until the headers have been parsed and aside from header count and
per-header size limitation, seems not to have any way to deal with oversized requests

=item * L<HTTP::Parser> - parses into L<HTTP::Request>/L<HTTP::Response> objects. Doesn't seem to guard
against large buffers but does have at least some support for streaming.

=item * L<HTTP::MessageParser> - also parses HTTP content

=item * L<Mojo::Message::Request> - part of L<Mojolicious>

=item * L<Mojo::Message::Response> - part of L<Mojolicious>

=item * L<HTTP::Response::Parser> - parses responses...

=item * L<POE::Filter::HTTP::Parser> - seems to be backed by L<HTTP::Parser::XS> / L<HTTP::Parser>

=item * L<HTTP::HeaderParser::XS> - only parses the headers, albeit with some speed

=back

=head1 AUTHOR

Tom Molesworth <cpan@entitymodel.com>

=head1 LICENSE

Copyright Tom Molesworth 2013. Licensed under the same terms as Perl itself.



( run in 1.652 second using v1.01-cache-2.11-cpan-5b529ec07f3 )