HTML-Robot-Scrapper

 view release on metacpan or  search on metacpan

lib/HTML/Robot/Scrapper/UserAgent/Default.pm  view on Meta::CPAN


=cut

sub parse_content {
    my ( $self, $res ) = @_;
    my $content_types_avail = $self->robot->parser->content_types;
    #set headers
    $self->headers( $res->{ headers } );
    $self->response_headers( $res->{ headers } );
    #content type
    my $content_type =$res->{headers}->{'content-type'};
    $self->content_type( $content_type );
    #charset
    my $content_charset = $self->charset_from_headers( $res->{ headers } );
    $self->charset( $content_charset );

    my $content_type_found = 0;
    foreach my $ct (keys %$content_types_avail ) {
        foreach my $parser ( @{ $content_types_avail->{$ct} } ) {
            next unless $content_type =~ m/^$ct/ig;
            my $parse_method = $parser->{parse_method};
#           my $content = $res->{content};
            $self->robot->parser->$parse_method( $self->content );
            $content_type_found = 1;
        }
    }
    print "**** Content type not set for: " . $content_type . '... please configure it correctly adding a parser for that content type'."\n" if !$content_type_found;
#   foreach my $ct ( keys $self->parser_content_type ) {
#       if ( $self->response->{ headers }->{'content-type'} =~ m|^$ct|g ) {
#           my $parser_method = $self->parser_methods->{ $self->parser_content_type->{ $ct } };
#           $self->$parser_method();
#       }
#   }
#   my $reader_method = $item->{method};
#   $self->$reader_method;    #redirects back to method
}

sub charset_from_headers {
    my ( $self, $headers ) = @_;
    my $ct = $headers->{'content-type'};
    my $charset ;
    if ( $ct =~ m/charset=([^;|^ ]+)/ig ) {
        $charset = $1;
    }
    return $charset;
}

sub normalize_url {
    my ( $self, $url ) = @_;
#   if (       ref $self->before_normalize_url eq ref {}



( run in 3.317 seconds using v1.01-cache-2.11-cpan-524268b4103 )