Data-Tubes

 view release on metacpan or  search on metacpan

lib/Data/Tubes/Plugin/Parser.pm  view on Meta::CPAN

package Data::Tubes::Plugin::Parser;
use strict;
use warnings;
use English qw< -no_match_vars >;
use Data::Dumper;
our $VERSION = '0.740';

use Log::Log4perl::Tiny qw< :easy :dead_if_first >;

use Data::Tubes::Util qw<
  assert_all_different
  generalized_hashy
  metadata
  normalize_args
  shorter_sub_names
  test_all_equal
  trim
  unzip
>;
use Data::Tubes::Plugin::Util qw< identify >;
my %global_defaults = (
   input  => 'raw',
   output => 'structured',
);

sub parse_by_format {
   my %args = normalize_args(@_,
      [{%global_defaults, name => 'parse by format'}, 'format']);
   identify(\%args);

   my $format = $args{format};
   LOGDIE "parser of type 'format' needs a definition"
     unless defined $format;

   my @items = split m{(\W+)}, $format;
   return parse_single(key => $items[0]) if @items == 1;

   my ($keys, $separators) = unzip(\@items);

   # all keys MUST be different, otherwise some fields are just trumping
   # on each other
   eval { assert_all_different($keys); }
     or LOGDIE "'format' parser [$format] "
     . "has duplicate key $EVAL_ERROR->{message}";

   my $value = $args{value} //= ['whatever'];
   $value = [$value] unless ref $value;
   my $multiple =
        (ref($value) ne 'ARRAY')
     || (scalar(@$value) > 1)
     || ($value->[0] ne 'whatever');

   return parse_by_separators(
      %args,
      keys       => $keys,
      separators => $separators
   ) if $multiple || !test_all_equal(@$separators);

   # a simple split will do if all separators are the same
   return parse_by_split(
      %args,
      keys      => $keys,
      separator => $separators->[0]
   );
} ## end sub parse_by_format

sub parse_by_regex {
   my %args =
     normalize_args(@_,
      [{%global_defaults, name => 'parse by regex'}, 'regex']);
   identify(\%args);

   my $name  = $args{name};
   my $regex = $args{regex};
   LOGDIE "parse_by_regex needs a regex"
     unless defined $regex;

   $regex = qr{$regex};
   my $input  = $args{input};
   my $output = $args{output};
   return sub {
      my $record = shift;
      $record->{$input} =~ m{$regex}
        or die {
         message => "'$name': invalid record, regex is $regex",
         input   => $input,
         record  => $record,
        };
      my $retval = {%+};
      $record->{$output} = $retval;
      return $record;
   };
} ## end sub parse_by_regex

sub _resolve_separator {



( run in 0.726 second using v1.01-cache-2.11-cpan-71847e10f99 )