Bio-MaxQuant-Evidence-Statistics

 view release on metacpan or  search on metacpan

lib/Bio/MaxQuant/Evidence/Statistics.pm  view on Meta::CPAN

    my @expts = $options{experiment} ? ($options{experiment}) : (keys %$data);
    foreach my $e(@expts){
        foreach my $k(keys %{$data->{$e}}){
            my $value = $data->{$e}->{$k}->{$options{column}};
            if(ref($value) eq ''){
                $value = [split /;/, $value];
            }
            my @values = $options{'split'} ? (@$value) : (join(';',@$value));
            foreach (@values){
                next unless $_ ne '' || $options{'emptiesok'};
                if($options{nodup}){
                    $results->{$_} = 1;
                }
                else {
                    push @$results, $_;
                }
            }
        }
    }
#    use Data::Dumper;
#    print STDERR Dumper $results;
    return $options{nodup} ? (keys %$results) : (@$results);
}

=head2 proteinCount

=cut

sub proteinCount {
    my $o = shift;
    return $o->{cache}->{proteinCount} if exists $o->{cache}->{proteinCount};
    my @proteins = $o->getLeadingProteins();
    $o->{cache}->{proteinCount} = scalar @proteins;
    return $o->{cache}->{proteinCount};
}

=head2 getProteinGroupIds

=cut

sub getProteinGroupIds {
    my $o = shift;
    $o->{cache}->{proteinGroupIds} = [sort $o->extractColumnValues(column=>'Protein group IDs')] unless exists $o->{cache}->{proteinGroupIds};
    return @{$o->{cache}->{proteinGroupIds}}
}

=head2 getLeadingProteins

=cut

sub getLeadingProteins {
    my $o = shift;
    $o->{cache}->{leadingProteins} = [sort $o->extractColumnValues(column=>'Leading Proteins')] unless exists $o->{cache}->{proteinGroupIds};
    return @{$o->{cache}->{leadingProteins}};
}

=head2 logRatios

Logs ratios (base 2) throughout the dataset, and sets a flag so it can't get logged again.

Treatment of "special values": empty string, <= 0, NaN, and any other non-number are removed
from the data!

=cut

sub logRatios {
    my $o = shift;
    return 0 if $o->{logged};
    $o->{logged} = 1;
    my $data = $o->{data};
    foreach my $exptname(keys %$data){
        my $experiment = $data->{$exptname};
        foreach my $proteinGroupId(keys %$experiment){
            my $proteinGroup = $experiment->{$proteinGroupId};
            my $ratios = $proteinGroup->{'Ratio H/L'};
            my @newRatios = ();
            foreach (0..$#$ratios){
                $ratios->[$_] = $ratios->[$_] =~ /^\d+\.?\d*$/
                    ? log($ratios->[$_])/log(2)
                    : '';
            }
        }
    }
    return 1;
}

=head2 filter

returns a set of protein records based on filter parameters...

=head3 options

=over

=item experiment - regular expression to match experiment name

=item proteinGroupId - regular expression to match protein group id 

=item leadingProteins - regular expression to match leading protein ids

=item notLeadingProteins - regular expression to not match leading protein ids

=back

Returns a filtered object of the same type, with relevant flags set (e.g. whether
data has been logged, etc).

Warning, intentionally does not perform a deep clone!

=cut

sub filter {
    my ($o,%opts) = @_;
    # options : 
#    use Data::Dumper;
#    print STDERR 'OPTS: ', Dumper \%opts;
    my $data = $o->{data};
    my $result = {};
    foreach my $experiment(keys %$data){
        if(! exists $opts{experiment} || $experiment =~ /$opts{experiment}/){
            $result->{$experiment} = {};



( run in 1.554 second using v1.01-cache-2.11-cpan-39bf76dae61 )