App-Egaz
view release on metacpan or search on metacpan
lib/App/Egaz/Command/multiz.pm view on Meta::CPAN
package App::Egaz::Command::multiz;
use strict;
use warnings;
use autodie;
use MCE;
use MCE::Flow;
use App::Egaz -command;
use App::Egaz::Common;
sub abstract {
return 'multiz step by step';
}
sub opt_spec {
return (
[ "outdir|o=s", "Output directory", ],
[ "tree=s", "a rooted newick tree", ],
[ "target=s", "target name, this command can automatically pick one", ],
[ "keeptmp", "keep intermediate files", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz multiz [options] <maf dir> [more dirs]";
}
sub description {
my $desc;
$desc .= ucfirst(abstract) . ".\n";
$desc .= <<'MARKDOWN';
* <maf dirs> are directories containing multiple .maf or .maf.gz files
* `multiz` should be in $PATH
* Use a modified [`multiz`](https://github.com/wang-q/multiz) supports gzipped .maf files
* [Original `multiz`](https://www.bx.psu.edu/miller_lab/)
MARKDOWN
return $desc;
}
sub validate_args {
my ( $self, $opt, $args ) = @_;
if ( @{$args} < 1 ) {
my $message = "This command need one or more input directories.\n\tIt found";
$message .= sprintf " [%s]", $_ for @{$args};
$message .= ".\n";
$self->usage_error($message);
}
for ( @{$args} ) {
if ( !( Path::Tiny::path($_)->is_dir ) ) {
$self->usage_error("The input directory [$_] doesn't exist.");
}
}
if ( $opt->{tree} ) {
if ( !( Path::Tiny::path( $opt->{tree} )->is_file ) ) {
$self->usage_error("The newick tree file [$opt->{tree}] doesn't exist.");
}
}
}
sub execute {
my ( $self, $opt, $args ) = @_;
#----------------------------#
# inputs
#----------------------------#
my $suffix = '.maf';
my @files = File::Find::Rule->file->name("*$suffix")->in( @{$args} );
if ( scalar @files == 0 ) {
$suffix = '.maf.gz';
@files = sort File::Find::Rule->file->name("*$suffix")->in( @{$args} );
}
printf STDERR "* $suffix files: [%d]\n", scalar @files;
if ( scalar @files == 0 ) {
Carp::croak "Can't find .maf or .maf.gz files\n";
}
#----------------------------#
# Gather species list
#----------------------------#
#---
#Q_aliena:
# NC_020152:
# - t/Q_rubravsQ_aliena/mafSynNet/NC_020152.synNet.maf.gz
#Q_aquifolioides:
# NC_020152:
# - t/Q_rubravsQ_aquifolioides/mafSynNet/NC_020152.synNet.maf.gz
my $file_of = {}; # all info here
my %seen; # count
my @potential_targets;
my @species; # species list gathered from maf files; and then shift target out
{
print STDERR "Get species list\n";
my $worker = sub {
my ( $self, $chunk_ref, $chunk_id ) = @_;
my $file = $chunk_ref->[0];
( run in 1.430 second using v1.01-cache-2.11-cpan-0d23b851a93 )