Bio-MUST-Apps-FortyTwo
view release on metacpan or search on metacpan
bin/debrief-42.pl view on Meta::CPAN
'unclass_contam_seq', 'unknown_seq', @mod_phyla, 'foreign_phyla',
'contam_perc', 'class_contam_perc', 'unclass_contam_perc', 'unknown_perc'
;
say {$out_sum} join "\t", 'bank', 'tested_genes', 'added_ali', 'clean_ali',
'contam_ali', 'completeness', 'added_seq', 'clean_seq', 'contam_seq',
'unclass_contam_seq', 'unknown_seq', 'foreign_phyla', 'contam_perc',
'class_contam_perc', 'unclass_contam_perc', 'unknown_perc';
my @ali_totals;
tie my %line_for, 'Tie::IxHash';
for my $bank (@banks) {
# ali stats
my $ali_c = scalar keys %{ $data_for{$bank}{ali}{c} } // 0;
my $ali_nc = scalar keys %{ $data_for{$bank}{ali}{nc} } // 0;
my $ali_total = scalar keys %{ $data_for{$bank}{ali}{total} } // 0;
my $completeness = eval { ( $ali_nc / $ali_test ) * 100 };
# seq stats
my $seq_c = $data_for{$bank}{seq}{c} // 0;
my $seq_nc = $data_for{$bank}{seq}{nc} // 0;
my $seq_total = $data_for{$bank}{seq}{total} // 0;
# taxonomic data
my @phyla_data = map { $contam_data_for{$bank}{$_} // 0 } @elected_lineages;
my $foreign_phyla = scalar keys %{ $contam_data_for{$bank} } // 0;
my $unclassified = $data_for{$bank}{seq}{uc} // 0;
my $unknown = $data_for{$bank}{seq}{uk} // 0;
my $all_c_p = eval { ( ($seq_c + $unclassified) * 100)/$seq_total } // 0;
my $contam_p = eval { ( $seq_c * 100)/$seq_total } // 0;
my $unclas_p = eval { ( $unclassified * 100)/$seq_total } // 0;
my $unknwn_p = eval { ( $unknown * 100)/$seq_total } // 0;
$bank =~ s/\s/_/xms;
$line_for{$bank} = [
$bank, $ali_test, $ali_total, $ali_nc, $ali_c, $completeness,
$seq_total, $seq_nc, $seq_c, $unclassified, $unknown,
@phyla_data, $foreign_phyla >= 0 ? $foreign_phyla : 0,
$all_c_p, $contam_p, $unclas_p, $unknwn_p
];
push @ali_totals, $ali_total;
}
#### %line_for
my @sort_all_banks = sort { @{ $line_for{$b} }[-3] <=> @{ $line_for{$a} }[-3] } keys %line_for;
#### @sort_all_banks
# Write file contents
say {$out} join "\t", @{ $line_for{$_} } for @sort_all_banks;
say {$out_sum} join "\t", @{ $line_for{$_} }[0..8,-7..-1] for @sort_all_banks;
### Done!
##################################### SUBS #####################################
sub compute_percentage {
my $array = shift;
my $total = shift;
my @results;
my $percentage;
CALC:
for my $value (@$array) {
if ($value == 0) {
$percentage = 0;
push @results, '-/-';
next CALC;
}
else {
$percentage = $value / $total;
# $percentage = $value * 100 / $total;
}
$percentage = sprintf("%.2f", $percentage);
push @results, $value . '/' . $percentage;
}
return \@results;
}
# for testing:
# perl -Ilib bin/debrief-42.pl --indir=xtest/tax_reports/ \
# --in-strip=-42-camera-megan99-tf --taxdir=../Bio-MUST-Core/test/taxdump
# --seq_labeling=xtest/seq-labels.idl --contam_labeling=xtest/contam-labels.idl \
# --outdir=dbout
__END__
=pod
=head1 NAME
debrief-42.pl - Summarize the results of a 42 metagenomic run
=head1 VERSION
version 0.213470
=head1 USAGE
debrief-taxR-42.pl --indir=<indir> --in=<str> [optional arguments]
=head1 REQUIRED ARGUMENTS
=over
=item --indir=<indir>
Path to input directory containing TAX-REPORT files.
=for Euclid: indir.type: str
=item --in[-strip]=<str>
Substring to strip from infile basenames to derive pre-42 filenames. This
often corresponds to the C<out_suffix> option of the YAML C<config> file for 42
(e.g., C<-42>).
=for Euclid: str.type: string
=back
=head1 OPTIONAL ARGUMENTS
=over
=item --outdir=<dir>
Optional output dir that will contain the computed TSV files (will be created if
needed) [default: none]. Otherwise, output files will be written in the working
directory.
=for Euclid: dir.type: writable
=item --out[-suffix]=<suffix>
( run in 2.200 seconds using v1.01-cache-2.11-cpan-8f98c5d2c55 )