App-RecordStream
view release on metacpan or search on metacpan
lib/App/RecordStream/Operation/fromxml.pm view on Meta::CPAN
package App::RecordStream::Operation::fromxml;
our $VERSION = "4.0.25";
use strict;
use warnings;
use base qw(App::RecordStream::Operation);
use App::RecordStream::Record;
use App::RecordStream::OptionalRequire 'HTTP::Request';
use App::RecordStream::OptionalRequire 'LWP::UserAgent';
use App::RecordStream::OptionalRequire 'List::MoreUtils', qw(uniq);
use App::RecordStream::OptionalRequire 'XML::Twig';
BEGIN { App::RecordStream::OptionalRequire::require_done() }
sub init {
my $this = shift;
my $args = shift;
my @elements;
my $nested = 0;
my $spec = {
'element=s' => sub { push @elements, split(/,/, $_[1]) },
'nested' => \$nested,
};
$this->parse_options($args, $spec);
$this->{'ELEMENTS'} = [ uniq @elements ];
$this->{'NESTED'} = $nested;
my $has_files = scalar @$args;
$this->{'HAS_URIS'} = $has_files;
$this->{'EXTRA_ARGS'} = $args;
$this->{'OPEN_TAGS'} = 0;
}
sub wants_input {
return 0;
}
sub stream_done {
my $this = shift;
my $elements = $this->{'ELEMENTS'};
my $elem_prefix = '/*/';
my $attr_prefix = '/';
if ( $this->{'NESTED'} ) {
$elem_prefix .= '/';
$attr_prefix .= '/';
}
my %start_tag_handlers;
my %twig_roots;
for my $element ( @$elements ) {
my $elem_expr = $elem_prefix . $element;
my $attr_expr = $attr_prefix . '[@' . $element . ']';
my $default_hash = {};
if ( @$elements > 1 ) {
$default_hash->{'element'} = $element;
}
$start_tag_handlers{$elem_expr} = sub { $this->{'OPEN_TAGS'}++ };
$twig_roots{$elem_expr} = sub { $this->handle_element($default_hash, @_) };
lib/App/RecordStream/Operation/fromxml.pm view on Meta::CPAN
sub push_value {
my $this = shift;
my $value = shift;
my $default_hash = shift;
if ( UNIVERSAL::isa($value, 'HASH') ) {
my $record = App::RecordStream::Record->new($value);
foreach my $key ( keys %$default_hash ) {
$record->{$key} = $default_hash->{$key};
}
$this->push_record($record);
}
elsif ( UNIVERSAL::isa($value, 'ARRAY') ) {
foreach my $item (@$value) {
$this->push_value($item, $default_hash);
}
}
else {
my $record = App::RecordStream::Record->new(%$default_hash);
$record->{'value'} = $value;
$this->push_record($record);
}
}
sub get_xml_string {
my $this = shift;
my $uris = $this->{'EXTRA_ARGS'};
my $contents;
if ( $this->{'HAS_URIS'} ) {
return undef unless ( @$uris );
my $uri = shift @$uris;
$this->update_current_filename($uri);
my $ua = $this->make_user_agent();
my $response = $ua->request($this->get_request($uri));
if ( ! $response->is_success() ) {
warn "GET uri: '$uri' failed, skipping!\n";
return $this->get_xml_string();
}
$contents = $response->content();
}
else {
local $/;
$contents = <STDIN>;
}
return $contents;
}
sub get_request {
my $this = shift;
my $uri = shift;
my $request = HTTP::Request->new();
$request->method('GET');
$request->uri($uri);
return $request;
}
sub make_user_agent {
return LWP::UserAgent->new();
}
sub usage {
my $this = shift;
my $options = [
[ 'element <elements>', 'May be comma separated, may be specified multiple times. Sets the elements/attributes to print records for'],
[ 'nested', 'search for elements at all levels of the xml document'],
];
my $args_string = $this->options_string($options);
return <<USAGE;
Usage: recs-fromxml <args> [<URIs>]
__FORMAT_TEXT__
Reads either from STDIN or from the specified URIs. Parses the xml
documents, and creates records for the specified elements.
If multiple element types are specified, will add a {'element' => element name} field to the output record.
__FORMAT_TEXT__
$args_string
Examples:
Create records for the bar element at the top level of myXMLDoc
recs-fromxml --element bar file:myXMLDoc
Create records for all foo and bar elements from the URL
recs-fromxml --element foo,bar --nested http://google.com
USAGE
}
1;
( run in 0.755 second using v1.01-cache-2.11-cpan-39bf76dae61 )