CohortExplorer

 view release on metacpan or  search on metacpan

lib/CohortExplorer/Command/Query.pm  view on Meta::CPAN

package CohortExplorer::Command::Query;

use strict;
use warnings;

our $VERSION = 0.14;
our ( $COMMAND_HISTORY_FILE, $COMMAND_HISTORY_CONFIG, $COMMAND_HISTORY );
our @EXPORT_OK = qw($COMMAND_HISTORY);

my $arg_max = 500;

#-------
BEGIN {
 use base qw(CLI::Framework::Command Exporter);
 use CLI::Framework::Exceptions qw( :all );
 use CohortExplorer::Datasource;
 use Exception::Class::TryCatch;
 use FileHandle;
 use File::HomeDir;
 use File::Spec;
 use Config::General;

 # Untaint and set command history file
 $COMMAND_HISTORY_FILE = $1
 if File::Spec->catfile( File::HomeDir->my_home, ".CohortExplorer_History" ) =~ /^(.+)$/;
 
 my $fh = FileHandle->new(">> $COMMAND_HISTORY_FILE");

 # Throw exception if command history file does not exist or
 # is not readable and writable
 if ( !$fh ) {
  throw_cmd_run_exception( error =>
"'$COMMAND_HISTORY_FILE' must exist with RW enabled (i.e. chmod 766) for CohortExplorer"
  );
 }
 $fh->close;

 # Read command history file
 eval {
  $COMMAND_HISTORY_CONFIG =
    Config::General->new(
                          -ConfigFile            => $COMMAND_HISTORY_FILE,
                          -MergeDuplicateOptions => "false",
                          -StoreDelimiter        => "=",
                          -SaveSorted            => 1
    );
 };
 if ( catch my $e ) {
  throw_cmd_run_exception( error => $e );
 }
 $COMMAND_HISTORY = { $COMMAND_HISTORY_CONFIG->getall };
}

sub option_spec {
 (
   [],
   [ 'cond|c=s%'      => 'impose conditions' ],
   [ 'out|o=s'        => 'provide output directory' ],
   [ 'save-command|s' => 'save command' ],
   [ 'stats|S'        => 'show summary statistics' ],
   [ 'export|e=s@'    => 'export tables by name' ],
   [ 'export-all|a'   => 'export all tables' ],
   []
 );
}

sub validate {
 my ( $self, $opts, @args ) = @_;
 my ( $ds, $csv, $verbose ) = @{ $self->cache->get('cache') }{qw/datasource csv verbose/};
 
 print STDERR "\nValidating command options/arguments ...\n\n" if $verbose;
 
 ##----- VALIDATE ARG LENGTH, EXPORT AND OUT OPTIONS -----##
 if ( !$opts->{out} || !-d $opts->{out} || !-w $opts->{out} ) {
  throw_cmd_validation_exception( error =>
"Option 'out' is required. The directory specified in 'out' option must exist with RWX enabled (i.e. chmod 777) for CohortExplorer"
  );
 }

 
 if ( $opts->{export} && $opts->{export_all} ) {
  throw_cmd_validation_exception( error =>
      'Mutually exclusive options (export and export-all) specified together' );
 }
 if ( @args == 0 || @args > $arg_max ) {
  throw_cmd_validation_exception(
                      error => "At least 1-$arg_max variable(s) are required" );
 }
 
 # Match table names supplied in the export option to
 # datasource tables and throw exception if they don't match
 if ( $opts->{export} ) {
  my @invalid_tables = grep { !$ds->table_info->{$_} } @{ $opts->export };
  if (@invalid_tables) {
   throw_cmd_validation_exception( error => 'Invalid table(s) ' . join ( ', ', @invalid_tables ) . ' in export' );
  }
 }

 # Set export to all tables
 if ( $opts->{export_all} ) {
  $opts->{export} = [ keys %{$ds->table_info} ];
 }

 # --- VALIDATE CONDITION OPTION AND ARGS ---
 # Get valid variables for validation
 my @vars = @{ $self->get_valid_variables };
 
 for my $v (@args) {
  # Throw exception if entity_id/visit are supplied as an argument
  if ( $v =~ /^(?:entity_id|visit)$/ ) {
   throw_cmd_validation_exception( error =>
"'entity_id' and 'visit' (if applicable) need not be supplied as arguments as they are already part of the query set"
   );
  }

  # Throw exception if some invalid variable is supplied as an argument
  if ( !grep( $_ eq $v, @vars ) ) {
   throw_cmd_validation_exception(
                                error => "Invalid variable '$v' in arguments" );
  }
 }

 # Condition can be imposed on all variables including
 # entity_id and visit (if applicable)
 for my $v ( keys %{ $opts->{cond} } ) {

  # Throw exception if some invalid variable is supplied as argument
  if ( !grep( $_ eq $v, @vars ) ) {
   throw_cmd_validation_exception(
                         error => "Invalid variable '$v' in condition option" );
  }

  # Regexp to validate condition option
  if ( $opts->{cond}{$v} =~
/^\s*(=|\!=|>|<|>=|<=|between|not_between|like|not_like|ilike|in|not_in|regexp|not_regexp)\s*,\s*([^\`]+)\s*$/
    )
  {
   my ( $opr, $val ) = ( $1, $2 );
   $opts->{cond}{$v} =~ s/$opr,\s+/$opr,/;

   # Validating SQL conditions
   if ( $opr && $val && $csv->parse($val) ) {
    my @val = grep ( s/^\s*|\s*$//g, $csv->fields );

    # Operators between and not_between require array but for others it is optional
    if ( $opr =~ /(?:between)/ && scalar @val != 2 ) {
     throw_cmd_validation_exception( error =>
         "Expecting min and max for '$opr' in '$v' (i.e. between, min, max )" );
    }
   }
   else {
    throw_cmd_validation_exception(
            error => "Invalid condition '$opts->{cond}{$v}' on variable '$v'" );
   }
  }
  else {
   throw_cmd_validation_exception(
            error => "Invalid condition '$opts->{cond}{$v}' on variable '$v'" );
  }
 }
}

sub run {

 # Overall running of the command
 my ( $self, $opts, @args ) = @_;
 my $rs = $self->process( $opts, @args );
 if ( $opts->{save_command} ) {
      $self->save_command( $opts, @args );
 }

 # If result-set is not empty
 if (@$rs) {
  my $dir;
  if ( $opts->{out} =~ /^(.+)$/ ) {
   $dir = File::Spec->catdir( $1, 'CohortExplorer-' . time . $$ );
  }

  # Create dir to export data
  eval { mkdir $dir };
  if ( catch my $e ) {
   warn $e . "\n";
   $dir = $1;
  }
  else {
   eval { chmod 0777, $dir };
   if ( catch my $e ) {
    warn $e . "\n";
    $dir = $1;
   }
  }
  $self->export( $opts, $rs, $dir, @args );
  
  return {
           headingText => 'summary statistics',
           rows        => $self->summary_stats( $opts, $rs, $dir )
    }
    if $opts->{stats};
 }
 return;
}

sub process {
 my ( $self, $opts, @args ) = @_;
 my ( $ds, $verbose ) = @{ $self->cache->get('cache') }{qw/datasource verbose/};
 
 ##----- PREPARE QUERY PARAMETERS FROM CONDITION OPTION AND ARGS -----##
 # Query parameters can be static, dynamic or both
 # Static type is applicable to 'standard' datasource but it may also apply to
 # 'longitudinal' datasource provided the datasource contains tables which are
 # independent of visits (i.e. static tables).
 # Dynamic type only applies to longitudinal datasources
 my $param = $self->create_query_params( $opts, @args );
 my $aliase_in_having = 1 if $ds->dialect->name eq 'mysql';
 my $dbh = $ds->dbh;
 my ( $stmt, $vars, $sth, @rows );
 
 # Construct sql query for static/dynamic or both types (if applicable)
 for my $p ( keys %$param ) {
  tie my %c, 'Tie::IxHash', @{ $param->{$p}{-columns} };
  $param->{$p}{-columns} = [
   map {
        $c{$_} . ' AS '
      . $dbh->quote_identifier($_)
     } keys %c
  ];
  eval {
   ( $param->{$p}{stmt}, @{ $param->{$p}{bind} } ) =
     $ds->sqla->select( %{ $param->{$p} } );
  };
  if ( catch my $e ) {
   throw_cmd_run_exception( error => $e );
  }

  # Filter literals from @bind. 'Visit' is not treated as a variable, only to
  # avoid clash between the column and table name, what if some table is named as visit? (I saw one!!)
  # Get all indices in @bind containing literals (i.e. variable/column names or undef)
  my @bind = @{ $param->{$p}{bind} };
  my @placeholders;
  for ( 0 .. $#bind ) {
   if ( ( $c{ $bind[$_] } && $bind[$_] ne 'visit' ) || $bind[$_] eq 'undef' ) {
         push @placeholders, $_;
   }
  }

  # Remove variable names from placeholders as they need to be hard coded



( run in 0.588 second using v1.01-cache-2.11-cpan-39bf76dae61 )