view release on metacpan or search on metacpan
lib/App/Egaz/Command/blastlink.pm view on Meta::CPAN
sub abstract {
return 'link sequences by blastn';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ "coverage|c=f", "coverage of identical matches", { default => 0.9 }, ],
[ "batch=i", "batch size of blast records", { default => 500000 }, ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz blastlink [options] <infile>";
}
sub description {
lib/App/Egaz/Command/blastmatch.pm view on Meta::CPAN
sub abstract {
return 'matched positions by blastn in genome sequences';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ "perchr", "one (fake) runlist per chromosome", ],
[ "coverage|c=f", "coverage of identical matches", { default => 0.9 }, ],
[ "batch=i", "batch size of blast records", { default => 500000 }, ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz blastmatch [options] <infile>";
}
sub description {
lib/App/Egaz/Command/blastn.pm view on Meta::CPAN
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ "evalue=f", "expectation value (E) threshold", { default => 0.01 }, ],
[ "wordsize=i", "length of best perfect match", { default => 40 }, ],
[ "outfmt=s", "out format",
{ default => "7 qseqid sseqid qstart qend sstart send qlen slen nident" },
],
[ "tmp=s", "user defined tempdir", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz blastn [options] <infile> <genome.fa>";
}
sub description {
lib/App/Egaz/Command/blastn.pm view on Meta::CPAN
}
}
{ # blastn
my $cmd
= sprintf "blastn -task megablast"
. " -max_target_seqs 20 -culling_limit 20" # reduce size of reports
. " -dust no -soft_masking false" # disable dust and soft masking
. " -evalue $opt->{evalue} -word_size $opt->{wordsize}"
. " -outfmt '$opt->{outfmt}'"
. " -num_threads $opt->{parallel} -db $basename -query $infiles[0]"
. " -out $basename.blast";
my $blastn_usage = `blastn -h`;
if ( $blastn_usage =~ /\-max_hsps int/ ) {
$cmd .= " -max_hsps 10"; # Nucleotide-Nucleotide BLAST 2.6.0+
}
elsif ( $blastn_usage =~ /\-max_hsps_per_subject int/ ) {
$cmd .= " -max_hsps_per_subject 10"; # Nucleotide-Nucleotide BLAST 2.2.28+
}
lib/App/Egaz/Command/lastz.pm view on Meta::CPAN
[ "E=i", "Scoring: gap-extension penalty", ],
[ "Q=s", "Scoring: matrix file", ],
[ "C=i", "Aligning: chain option", ],
[ "T=i", "Aligning: words option", ],
[ "M=i", "Aligning: mask any base in seq1 hit this many times", ],
[ "K=i", "Dropping hsp: threshold for MSPs for the first pass", ],
[ "L=i", "Dropping hsp: threshold for gapped alignments for the second pass", ],
[ "H=i", "Dropping hsp: threshold to be interpolated between alignments", ],
[ "Y=i", "Dropping hsp: X-drop parameter for gapped extension", ],
[ "Z=i", "Speedup: increment between successive words", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz lastz [options] <path/target> <path/query>";
}
sub description {
lib/App/Egaz/Command/lpcnam.pm view on Meta::CPAN
}
sub opt_spec {
return (
[ "outdir|o=s", "Output directory", ],
[ "lineargap=s", "axtChain linearGap, loose or medium", { default => "loose" }, ],
[ "minscore=i", "minimum score for axtChain", { default => 1000 }, ],
[ "tname|t=s", "target name", ],
[ "qname|q=s", "query name", ],
[ "syn", "create .synNet.maf instead of .net.maf", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz lpcnam [options] <path/target> <path/query> <path/lav>";
}
sub description {
lib/App/Egaz/Command/multiz.pm view on Meta::CPAN
sub abstract {
return 'multiz step by step';
}
sub opt_spec {
return (
[ "outdir|o=s", "Output directory", ],
[ "tree=s", "a rooted newick tree", ],
[ "target=s", "target name, this command can automatically pick one", ],
[ "keeptmp", "keep intermediate files", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz multiz [options] <maf dir> [more dirs]";
}
sub description {
my $desc;
lib/App/Egaz/Command/raxml.pm view on Meta::CPAN
return 'raxml wrapper to construct phylogenetic trees';
}
sub opt_spec {
return (
[ "outfile|o=s", "Output filename. [stdout] for screen", { default => "stdout" }, ],
[ "outgroup=s", "the name of outgroup if exists", ],
[ "seed|s=i", "specify a random number seed", ],
[ "bootstrap|b=i", "the number of alternative runs", { default => 100 }, ],
[ "tmp=s", "user defined tempdir", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz raxml [options] <infile> [more infiles]";
}
sub description {
lib/App/Egaz/Command/repeatmasker.pm view on Meta::CPAN
return 'RepeatMasker wrapper';
}
sub opt_spec {
return (
[ "outdir|o=s", "Output directory", { default => "." }, ],
[ "species=s", "the species or clade of the input sequence", ],
[ "opt=s", "other options be passed to RepeatMasker", ],
[ "gff", "create .rm.gff by rmOutToGFF3.pl", ],
[ "tmp=s", "user defined tempdir", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
{ show_defaults => 1, }
);
}
sub usage_desc {
return "egaz repeatmasker [options] <infile> [more infiles]";
}
sub description {
lib/App/Egaz/Command/template.pm view on Meta::CPAN
[ "self" => "self genome alignments, paralogs" ],
[ "prep" => "prepare sequences" ],
],
}
],
[],
[ "outdir|o=s", "Output directory", { default => "." }, ],
[ "queue=s", "QUEUE_NAME", { default => "mpi" }, ],
[ "separate", "separate each Target-Query groups", ],
[ "tmp=s", "user defined tempdir", ],
[ "parallel|p=i", "number of threads", { default => 2 }, ],
[ "verbose|v", "verbose mode", ],
[],
[ "length=i", "minimal length of alignment fragments", { default => 1000 }, ],
[ "partition", "use partitioned sequences if available", ],
[ "msa=s", "aligning program for refine alignments", { default => "mafft" }, ],
[ "taxon=s", "taxon.csv for this project", ],
[ "aligndb", "create aligndb scripts", ],
[],
[ "multiname=s", "naming multiply alignment", ],
[ "outgroup=s", "the name of outgroup", ],
lib/App/Egaz/Common.pm view on Meta::CPAN
# -b compute forward and reverse complement matches
# -F force 4 column output format regardless of the number of reference sequence inputs
# -n match only the characters a, c, g, or t
#
# sparsemem only
# -k sampled suffix positions (one by default)
my $template;
my $exe;
if ( IPC::Cmd::can_run('sparsemem') ) {
$exe = 'sparsemem';
$template = "%s -maxmatch -F -l %d -b -n -k 4 -threads 4 %s %s > %s";
}
else {
$exe = 'mummer';
$template = "%s -maxmatch -F -l %d -b -n %s %s > %s";
}
my $cmd = sprintf $template, $exe, $length, $genome, $query, $result->stringify;
system $cmd;
}