Bio-FdrFet

 view release on metacpan or  search on metacpan

lib/Bio/FdrFet.pm  view on Meta::CPAN

                       "dbacc" => <pathway_accession>,
                       "desc" => <pathway description>);
  $obj->add_to_genes("gene" => <gene_name>,
                     "pval" => <probability_value>);

=head3 Output Methods

  $obj->genes;
  $obj->pathways[($order)];
  $obj->pathway_result($pathway, $data_name[, $all_flag]);
  $obj->pathway_desc($pathway);
  $obj->pathway_genes($pathway);
  $obj->fdr_position($fdr);

=head3 Other Methods

  $obj->gene_count[($fet_gene_count)];
  $obj->calculate;

=head1 DESCRIPTION

Bio::FdrFet implements the False Discovery Rate Fisher Exact Test of gene
expression analysis applied to pathways described in the paper by
Ruiru Ji, Karl-Heinz Ott, Roumyana Yordanova, and Robert E
Bruccoleri.  A copy of the paper is included with the distribution in
the file, Fdr-Fet-Manuscript.pdf.

The module is implemented using a simple object oriented paradigm
where the object stores all the information needed for a calculation
along with a state variable, C<STATE>. The state variable has two
possible values, C<'setup'> and C<'calculated'>. The value of
C<'setup'> indicates that the object is being setup with data, and any
results in the object are inconsistent with the data. The value of
C<'calculated'> indicates that the object's computational results are
consistent with the data, and may be returned to a calling program.

The C<'calculate'> method is used to update all the calculated values
from the input data. It checks the state variable first, and only does
the calculation if the state is C<'setup'>. Once the calculations are
complete, then the state variable is set to C<'calculated'>. Thus, the C<calculate> method
can be called whenever a calculated value is needed, and there is no
performance penalty.

The module initializes the C<Bio::FdrFet> object with a state of
C<'setup'>. Any data input sets the state to C<'setup'>. Any requests
for calculated data, calls C<'calculate'>, which updates the state
variable so futures requests for calculated data return quickly.

=head1 METHODS

The following methods are provided:

=over 4

=cut


=item C<new([$fdrcutoff])>

Creates a new Bio::FdrFet object. The optional parameter is the False
Discovery Rate cutoff in units of percent. See the C<fdr_cutoff>
method below for more details.

=cut

sub new {
    my $pkg;
    my $class = shift;
    eval {($pkg) = caller(0);};
    if ($class ne $pkg) {
	unshift @_, $class;
    }
    my $self = {};
    bless $self;
    my $cutoff = shift;
    $cutoff = 35 if not defined $cutoff;
    $self->{STATE} = "setup";
    $self->{FDR_CUTOFF} = _check_fdr_cutoff($cutoff);
    $self->{PATHWAYS} = {};
    $self->{GENES} = {};
    $self->{INPUT_GENE_COUNT} = 0;
    $self->{GENE_COUNT} = undef;
    $self->{VERBOSE} = 1;
    $self->{UNIVERSE} = "genes";
    return $self;
}

=item C<fdr_cutoff([$fdrcutoff])>

Retrieves the current setting for the False Discovery Rate threshold,
and optionally sets it. This threshold must be an integer greater than
0 and less than or equal to 100, and is divided by 100 for the value
used by the computation.

=cut

sub fdr_cutoff {

    my $self = shift;
    my $new_cutoff = shift;
    if (defined($new_cutoff)) {
	$self->{FDR_CUTOFF} = _check_fdr_cutoff($new_cutoff);
        $self->{STATE} = 'setup';
    }
    return $self->{FDR_CUTOFF};
}

=item C<verbose([$verbose_mode])>

Retrieves the current setting for the verbose parameter
and optionally sets it. It can be either 0, no verbosity, or 1, lots
of messages sent to STDERR.

=cut

sub verbose {

    my $self = shift;
    my $new_verbose = shift;
    if (defined($new_verbose)) {
	$self->{VERBOSE} = $new_verbose;

lib/Bio/FdrFet.pm  view on Meta::CPAN

    my %arg = &_check_args(\@_, "gene", "pval");
    if (exists($self->{GENES}->{$arg{gene}}->{PVAL}) and
	$self->{GENES}->{$arg{gene}}->{PVAL} != $arg{pval}) {
	confess sprintf("Gene %s has a pre-existing pval (%g) different than the current argument = %g\n",
			$arg{gene},
			$self->{GENES}->{$arg{gene}}->{PVAL},
			$arg{pval});
    }
    $self->{GENES}->{$arg{gene}}->{PVAL} = $arg{pval};
    $self->{STATE} = 'setup';
    $self->{INPUT_GENE_COUNT} += 1;
}

=item C<genes()>

Returns the list of gene names in the system.

=cut

sub genes {
    my $self = shift;
    return keys %{$self->{GENES}};
}

=item C<pathways[($order)]>

Returns the list of pathways. If the optional argument, C<$order>, is
specified and contains the word, C<"sorted">, (comparison is case
insensitive), then the object will return the pathways in order of
most significant to least. If sorting is done, then the object will
update the calculation of probability values, whereas if no sorting is
done, then the object does no calculation.

=cut

sub pathways {
    my $self = shift;
    my $order = shift;
    if (lc($order) ne 'sorted') {
	return keys %{$self->{PATHWAYS}};
    }
    else {
	$self->calculate;
	return sort { $self->{PATHWAYS}->{$b}->{BEST_RESULTS}->{LOGPVAL} <=>
		      $self->{PATHWAYS}->{$a}->{BEST_RESULTS}->{LOGPVAL} } keys %{$self->{PATHWAYS}};
    }
}

=item C<pathway_result($pathway, $data_name[, $all_flag])>

Returns a calculated result for a pathway. The following values may be
used for C<$data_name>. Case of C<$data_name> does not matter.

 LOGPVAL   -log10(probability value for pathway).
 PVAL      probability value for pathway
 ODDS      Odds ratio. 
 Q         Number of genes in the pathway passing the FDR cutoff
 M         Number of genes overall passing the FDR cutoff
 N         Number of genes in the system minus C<M> above.
 K         Number of genes in the pathway.
 FDR       FDR cutoff in percent giving the best pvalue.
 LOCI      Reference to an array of gene names in the pathway
           that satisfy FDR cutoff.

If C<$all_flag> is specified and has the value, "all", then this
returns an array of values for all the attempted FDR cutoffs, except
for the c<FDR> cutoff.

=cut

sub pathway_result {
    my $self = shift;
    my $pathway = $self->_check_pathway_arg(shift(@_));
    my $name = uc(shift(@_));
    my $all_option = shift(@_);
    $all_option = "" if not defined $all_option;
    $all_option = uc($all_option);
    
    $self->calculate;
    if (not exists($self->{PATHWAYS}->{$pathway})) {
	confess "Pathway ($pathway) not found.\n";
    }
    if ($all_option eq 'ALL') {
	if ($name eq 'FDR') {
	    confess "All option not valid for FDR result.\n";
	}
	if (not exists($self->{PATHWAYS}->{$pathway}->{ALL_RESULTS}->{$name})) {
	    confess "Pathway all_results ($name) not found.\n";
	}
	return @{$self->{PATHWAYS}->{$pathway}->{ALL_RESULTS}->{$name}};
    }
    else {
	if (not exists($self->{PATHWAYS}->{$pathway}->{BEST_RESULTS}->{$name})) {
	    confess "Pathway results ($name) not found.\n";
	}
	return $self->{PATHWAYS}->{$pathway}->{BEST_RESULTS}->{$name};
    }
}

=item C<pathway_desc($pathway)>

Returns the description field of the specified pathway.

=cut

sub pathway_desc {
    my $self = shift;
    my $pathway = $self->_check_pathway_arg(shift(@_));
    return $self->{PATHWAYS}->{$pathway}->{DESC};
}

=item C<pathway_genes($pathway)>

Returns an array containing the genes of the specified pathway.

=cut

sub pathway_genes {
    my $self = shift;
    my $pathway = $self->_check_pathway_arg(shift(@_));
    return @{$self->{PATHWAYS}->{$pathway}->{GENES}};



( run in 1.992 second using v1.01-cache-2.11-cpan-437f7b0c052 )