Data-Tubes
view release on metacpan or search on metacpan
lib/Data/Tubes/Plugin/Parser.pm view on Meta::CPAN
package Data::Tubes::Plugin::Parser;
use strict;
use warnings;
use English qw< -no_match_vars >;
use Data::Dumper;
our $VERSION = '0.740';
use Log::Log4perl::Tiny qw< :easy :dead_if_first >;
use Data::Tubes::Util qw<
assert_all_different
generalized_hashy
metadata
normalize_args
shorter_sub_names
test_all_equal
trim
unzip
>;
use Data::Tubes::Plugin::Util qw< identify >;
my %global_defaults = (
input => 'raw',
output => 'structured',
);
sub parse_by_format {
my %args = normalize_args(@_,
[{%global_defaults, name => 'parse by format'}, 'format']);
identify(\%args);
my $format = $args{format};
LOGDIE "parser of type 'format' needs a definition"
unless defined $format;
my @items = split m{(\W+)}, $format;
return parse_single(key => $items[0]) if @items == 1;
my ($keys, $separators) = unzip(\@items);
# all keys MUST be different, otherwise some fields are just trumping
# on each other
eval { assert_all_different($keys); }
or LOGDIE "'format' parser [$format] "
. "has duplicate key $EVAL_ERROR->{message}";
my $value = $args{value} //= ['whatever'];
$value = [$value] unless ref $value;
my $multiple =
(ref($value) ne 'ARRAY')
|| (scalar(@$value) > 1)
|| ($value->[0] ne 'whatever');
return parse_by_separators(
%args,
keys => $keys,
separators => $separators
) if $multiple || !test_all_equal(@$separators);
# a simple split will do if all separators are the same
return parse_by_split(
%args,
keys => $keys,
separator => $separators->[0]
);
} ## end sub parse_by_format
sub parse_by_regex {
my %args =
normalize_args(@_,
[{%global_defaults, name => 'parse by regex'}, 'regex']);
identify(\%args);
my $name = $args{name};
my $regex = $args{regex};
LOGDIE "parse_by_regex needs a regex"
unless defined $regex;
$regex = qr{$regex};
my $input = $args{input};
my $output = $args{output};
return sub {
my $record = shift;
$record->{$input} =~ m{$regex}
or die {
message => "'$name': invalid record, regex is $regex",
input => $input,
record => $record,
};
my $retval = {%+};
$record->{$output} = $retval;
return $record;
};
} ## end sub parse_by_regex
sub _resolve_separator {
( run in 0.726 second using v1.01-cache-2.11-cpan-71847e10f99 )