Bio-MUST-Apps-FortyTwo

 view release on metacpan or  search on metacpan

bin/debrief-42.pl  view on Meta::CPAN

                        'unclass_contam_seq', 'unknown_seq', @mod_phyla, 'foreign_phyla',
                        'contam_perc', 'class_contam_perc', 'unclass_contam_perc', 'unknown_perc'
                        ;

say {$out_sum} join "\t", 'bank', 'tested_genes', 'added_ali', 'clean_ali',
                            'contam_ali', 'completeness', 'added_seq', 'clean_seq', 'contam_seq',
                            'unclass_contam_seq', 'unknown_seq', 'foreign_phyla', 'contam_perc',
                            'class_contam_perc', 'unclass_contam_perc', 'unknown_perc';

my @ali_totals;
tie my %line_for, 'Tie::IxHash';
for my $bank (@banks) {

    # ali stats
    my $ali_c        = scalar keys %{ $data_for{$bank}{ali}{c} } // 0;
    my $ali_nc       = scalar keys %{ $data_for{$bank}{ali}{nc} } // 0;
    my $ali_total    = scalar keys %{ $data_for{$bank}{ali}{total} } // 0;
    my $completeness = eval { ( $ali_nc / $ali_test ) * 100 };

    # seq stats
    my $seq_c     = $data_for{$bank}{seq}{c} // 0;
    my $seq_nc    = $data_for{$bank}{seq}{nc} // 0;
    my $seq_total = $data_for{$bank}{seq}{total} // 0;

    # taxonomic data
    my @phyla_data    = map { $contam_data_for{$bank}{$_} // 0 } @elected_lineages;
    my $foreign_phyla = scalar keys %{ $contam_data_for{$bank} } // 0;
    my $unclassified  = $data_for{$bank}{seq}{uc} // 0;
    my $unknown       = $data_for{$bank}{seq}{uk} // 0;

    my $all_c_p  = eval { ( ($seq_c + $unclassified) * 100)/$seq_total } // 0;
    my $contam_p = eval { (  $seq_c                  * 100)/$seq_total } // 0;
    my $unclas_p = eval { (  $unclassified           * 100)/$seq_total } // 0;
    my $unknwn_p = eval { (  $unknown                * 100)/$seq_total } // 0;

    $bank =~ s/\s/_/xms;

    $line_for{$bank} = [
                        $bank, $ali_test, $ali_total, $ali_nc, $ali_c, $completeness,
                        $seq_total, $seq_nc, $seq_c,  $unclassified, $unknown,
                        @phyla_data, $foreign_phyla >= 0 ?  $foreign_phyla : 0,
                        $all_c_p, $contam_p, $unclas_p, $unknwn_p
                       ];

    push @ali_totals, $ali_total;
}
#### %line_for

my @sort_all_banks = sort { @{ $line_for{$b} }[-3] <=> @{ $line_for{$a} }[-3] } keys %line_for;
#### @sort_all_banks

# Write file contents
say {$out}     join "\t", @{ $line_for{$_} }              for @sort_all_banks;
say {$out_sum} join "\t", @{ $line_for{$_} }[0..8,-7..-1] for @sort_all_banks;

### Done!


##################################### SUBS #####################################

sub compute_percentage {
    my $array = shift;
    my $total = shift;

    my @results;
    my $percentage;
    CALC:
    for my $value (@$array) {
        if ($value == 0) {
            $percentage = 0;
            push @results, '-/-';
            next CALC;
        }
        else {
            $percentage = $value / $total;
#            $percentage = $value * 100 / $total;
        }
        $percentage = sprintf("%.2f", $percentage);
        push @results, $value . '/' . $percentage;
    }
    return \@results;
}

# for testing:
# perl -Ilib bin/debrief-42.pl --indir=xtest/tax_reports/ \
#   --in-strip=-42-camera-megan99-tf --taxdir=../Bio-MUST-Core/test/taxdump
#   --seq_labeling=xtest/seq-labels.idl --contam_labeling=xtest/contam-labels.idl \
#   --outdir=dbout

__END__

=pod

=head1 NAME

debrief-42.pl - Summarize the results of a 42 metagenomic run

=head1 VERSION

version 0.213470

=head1 USAGE

    debrief-taxR-42.pl --indir=<indir> --in=<str> [optional arguments]

=head1 REQUIRED ARGUMENTS

=over

=item --indir=<indir>

Path to input directory containing TAX-REPORT files.

=for Euclid: indir.type: str

=item --in[-strip]=<str>

Substring to strip from infile basenames to derive pre-42 filenames. This
often corresponds to the C<out_suffix> option of the YAML C<config> file for 42
(e.g., C<-42>).

=for Euclid: str.type: string

=back

=head1 OPTIONAL ARGUMENTS

=over

=item --outdir=<dir>

Optional output dir that will contain the computed TSV files (will be created if
needed) [default: none]. Otherwise, output files will be written in the working
directory.

=for Euclid: dir.type: writable

=item --out[-suffix]=<suffix>



( run in 2.200 seconds using v1.01-cache-2.11-cpan-8f98c5d2c55 )