App-PTP

 view release on metacpan or  search on metacpan

lib/App/PTP/Args.pm  view on Meta::CPAN

our $VERSION = '0.01';

# Name of files or directory to be processed. This can also contain a reference
# to the $stdin_marker variable, to indicate that the standard input needs to be
# processed.
my @inputs;

# The list of actions applied to the input. This is a list of array reference.
# Each of these array will contain the name of the command to run, the coderef
# for it, and then its arguments if any.
#
# There are two lists, the set of actions that happens before the --merge flag
# and the set of action that happens after.
my @pre_merge_pipeline;
my @post_merge_pipeline;
my $cur_pipeline = \@pre_merge_pipeline;

# This hash contains options that are used during the pipeline and that can be
# set or un-set for each command.
my %modes;

# This hash contains options that are global for the whole program.
# Note that --merge is an option because it impacts the way the final result is
# handled (and has compatibility limitations with other options), but it also
# impact the way the pipeline is built.
my %options;

my $default_input_field = '\s*,\s*|\t';
my $default_output_field = "\t";

# env(foo => default)
# Returns the given environment variable or the default value.
# Always return the default value if the HARNESS_ACTIVE variable is set (so that
# tests are not affected by environment variables).
sub env {
  my ($var, $default) = @_;
  return $default if $ENV{HARNESS_ACTIVE};
  return $ENV{$var} // $default;
}

sub get_default_modes {
  my %m;
  $m{case_sensitive} = not(env(PTP_DEFAULT_CASE_INSENSITIVE => 0));
  $m{quote_regex} = env(PTP_DEFAULT_QUOTE_REGEX => 0);
  $m{global_match} = not(env(PTP_DEFAULT_LOCAL_MATCH => 0));
  $m{comparator} = \'default';
  $m{regex_engine} = env(PTP_DEFAULT_REGEX_ENGINE => 'perl');
  $m{fatal_error} = env(PTP_DEFAULT_FATAL_ERROR => 0);
  $m{inverse_match} = env(PTP_DEFAULT_INVERSE_MATCH => 0);
  $m{input_field} = $default_input_field;
  $m{output_field} = $default_output_field;
  return %m;
}

sub get_default_options {
  my %o;
  $o{input_encoding} = 'UTF-8';
  $o{output_encoding} = 'UTF-8';
  $o{input_separator} = '\n';  # This will be interpreted in a regex
  $o{output_separator} = "\n";
  $o{preserve_eol} = 0;
  $o{fix_final_separator} = 0;
  $o{recursive} = 0;
  $o{input_filter} = undef;
  $o{debug_mode} = 0;
  $o{merge} = 0;
  $o{in_place} = 0;
  $o{output} = undef;
  $o{append} = 0;
  $o{abort} = 0;
  $o{preserve_perl_env} = 0;
  $o{use_safe} = env(PTP_DEFAULT_SAFE => 0);
  return %o;
}

# Resets all the global variables used for the command line parsing. This is
# really useful only in tests.
sub reset_global {
  @inputs = ();
  @pre_merge_pipeline = ();
  @post_merge_pipeline = ();
  %modes = get_default_modes();
  %options = get_default_options();
}

sub set_output {
  my (undef, $f) = @_;
  if (defined $options{output}) {
    die "Only a single occurrence of --output or --append is allowed.\n";
  }
  $options{output} = $f;
}

sub find_cheat_sheet {
  my $cur_file = (__PACKAGE__ =~ s{::}{/}rg).'.pm';
  die "Failed to find file for the current module (${cur_file})\n" unless exists $INC{$cur_file};
  my $p = catfile(dirname($INC{$cur_file}), 'Cheat_Sheet.pod');
  die "Cannot find Cheat Sheet file: ${p}\n" unless -f $p;
  return $p;
}

sub options_flags {
  (
    'help|helplong|h' => sub { pod2usage(-exitval => 0, -verbose => 2) },
    'helpshort' => sub { pod2usage(-exitval => 0, -verbose => 0) },
    'cheat' => sub { pod2usage(-exitval => 0, -verbose => 2, -input => find_cheat_sheet()) },
    'debug|d+' => \$options{debug_mode},
    # Merge is documented to be a pipeline command, but it is implemented as a
    # very custom option.
    'merge|m' => sub {
      die "The --merge option can only be specified once\n" if $options{merge};
      $options{merge} = 1;
      $cur_pipeline = \@post_merge_pipeline;
    },
    'in-place|i!' => \$options{in_place},
    'output|o=s' => \&set_output,
    'append|a=s' => sub { set_output(@_); $options{append} = 1; },
    'abort!' => \$options{abort},

    'recursive|R|r!' => \$options{recursive},
    'input-filter=s' => \$options{input_filter},
    'input-encoding|in-encoding=s' => \$options{input_encoding},
    'output-encoding|out-encoding=s' => \$options{output_encoding},
    'input-separator|in-separator=s' => \$options{input_separator},
    'output-separator|out-separator=s' => \$options{output_separator},
    'fix-final-separator!' => \$options{fix_final_separator},
    # TODO: add more tests for this mode, it’s unclear that the input separator
    # is really used (also work if it’s set to something else).
    '0' => sub {
      $options{input_separator} = "\N{NULL}";
      $options{output_separator} = '';
    },
    '00' => sub { $options{output_separator} = "\N{NULL}" },
    'preserve-input-separator|eol' =>
        sub { $options{preserve_eol} = 1; $options{output_separator} = '' },
    'preserve-perl-env!' => \$options{preserve_perl_env},
    'safe:2' => sub { $options{use_safe} = $_[1] },)
}

sub modes_flags {
  (
    'case-sensitive|S' => sub { $modes{case_sensitive} = 1 },
    'case-insensitive|I' => sub { $modes{case_sensitive} = 0 },
    'quote-regexp|Q' => sub { $modes{quote_regex} = 1 },
    'end-quote-regexp|E' => sub { $modes{quote_regex} = 0 },
    'global-match|G' => sub { $modes{global_match} = 1 },
    'local-match|L' => sub { $modes{global_match} = 0 },
    'comparator|C=s' => sub { $modes{comparator} = $_[1] },
    'regex-engine|re=s' => sub {
      die "Invalid value for --regex-engine: $_[1]\n" if $_[1] !~ /^\w+$/;
      $modes{regex_engine} = $_[1];
    },
    'fatal-error|X' => sub { $modes{fatal_error} = 1 },
    'ignore-error' => sub { $modes{fatal_error} = 0 },  # Find a short option?
    'inverse-match|V' => sub { $modes{inverse_match} = 1 },
    'normal-match|N' => sub { $modes{inverse_match} = 0 },
    'input-field-separator|F=s' => sub { $modes{input_field} = $_[1] },
    'output-field-separator|P=s' => \$modes{output_field},
    'default' => sub {
      $modes{input_field} = $default_input_field;
      $modes{output_field} = $default_output_field;
    },
    'bytes' => sub { $modes{input_field} = ''; $modes{output_field} = ''; },
    'csv' => sub { $modes{input_field} = '\s*,\s*'; $modes{output_field} = ','; },
    'tsv' => sub { $modes{input_field} = '\t'; $modes{output_field} = "\t"; },
    'none' => sub { $modes{input_field} = '(?!)' },
    'single-quote-replacement|single-quote|sq=s' => sub { $modes{single_quote} = $_[1] },
    'double-quote-replacement|double-quote|dq=s' => sub { $modes{double_quote} = $_[1] },
    'dollar-sigil-replacement|dollar-sigil|ds=s' => sub { $modes{dollar_sigil} = $_[1] },)
}

sub input_flags {
  (
    '<>' => sub { push @inputs, $_[0] },  # Any options not matched otherwise.
    '' => sub { push @inputs, \$App::PTP::Files::stdin_marker },  # a single '-'
  )
}

sub is_int {
  my ($str) = @_;
  return looks_like_number($str) && int($str) == $str;
}

sub validate_cut_spec {
  my ($spec) = @_;
  my @fields = split /\s*,\s*/, $spec;
  for my $f (@fields) {
    die "Fields passed to --cut must all be integers: $f\n" unless is_int($f);
    $f-- if $f > 0;
  }
  return \@fields;
}

# The array associated with each action contains the name of the action, the
# method to call for that action, a copy of the current %modes, and all the



( run in 3.608 seconds using v1.01-cache-2.11-cpan-8f98c5d2c55 )