Bio-FdrFet
view release on metacpan or search on metacpan
lib/Bio/FdrFet.pm view on Meta::CPAN
"dbacc" => <pathway_accession>,
"desc" => <pathway description>);
$obj->add_to_genes("gene" => <gene_name>,
"pval" => <probability_value>);
=head3 Output Methods
$obj->genes;
$obj->pathways[($order)];
$obj->pathway_result($pathway, $data_name[, $all_flag]);
$obj->pathway_desc($pathway);
$obj->pathway_genes($pathway);
$obj->fdr_position($fdr);
=head3 Other Methods
$obj->gene_count[($fet_gene_count)];
$obj->calculate;
=head1 DESCRIPTION
Bio::FdrFet implements the False Discovery Rate Fisher Exact Test of gene
expression analysis applied to pathways described in the paper by
Ruiru Ji, Karl-Heinz Ott, Roumyana Yordanova, and Robert E
Bruccoleri. A copy of the paper is included with the distribution in
the file, Fdr-Fet-Manuscript.pdf.
The module is implemented using a simple object oriented paradigm
where the object stores all the information needed for a calculation
along with a state variable, C<STATE>. The state variable has two
possible values, C<'setup'> and C<'calculated'>. The value of
C<'setup'> indicates that the object is being setup with data, and any
results in the object are inconsistent with the data. The value of
C<'calculated'> indicates that the object's computational results are
consistent with the data, and may be returned to a calling program.
The C<'calculate'> method is used to update all the calculated values
from the input data. It checks the state variable first, and only does
the calculation if the state is C<'setup'>. Once the calculations are
complete, then the state variable is set to C<'calculated'>. Thus, the C<calculate> method
can be called whenever a calculated value is needed, and there is no
performance penalty.
The module initializes the C<Bio::FdrFet> object with a state of
C<'setup'>. Any data input sets the state to C<'setup'>. Any requests
for calculated data, calls C<'calculate'>, which updates the state
variable so futures requests for calculated data return quickly.
=head1 METHODS
The following methods are provided:
=over 4
=cut
=item C<new([$fdrcutoff])>
Creates a new Bio::FdrFet object. The optional parameter is the False
Discovery Rate cutoff in units of percent. See the C<fdr_cutoff>
method below for more details.
=cut
sub new {
my $pkg;
my $class = shift;
eval {($pkg) = caller(0);};
if ($class ne $pkg) {
unshift @_, $class;
}
my $self = {};
bless $self;
my $cutoff = shift;
$cutoff = 35 if not defined $cutoff;
$self->{STATE} = "setup";
$self->{FDR_CUTOFF} = _check_fdr_cutoff($cutoff);
$self->{PATHWAYS} = {};
$self->{GENES} = {};
$self->{INPUT_GENE_COUNT} = 0;
$self->{GENE_COUNT} = undef;
$self->{VERBOSE} = 1;
$self->{UNIVERSE} = "genes";
return $self;
}
=item C<fdr_cutoff([$fdrcutoff])>
Retrieves the current setting for the False Discovery Rate threshold,
and optionally sets it. This threshold must be an integer greater than
0 and less than or equal to 100, and is divided by 100 for the value
used by the computation.
=cut
sub fdr_cutoff {
my $self = shift;
my $new_cutoff = shift;
if (defined($new_cutoff)) {
$self->{FDR_CUTOFF} = _check_fdr_cutoff($new_cutoff);
$self->{STATE} = 'setup';
}
return $self->{FDR_CUTOFF};
}
=item C<verbose([$verbose_mode])>
Retrieves the current setting for the verbose parameter
and optionally sets it. It can be either 0, no verbosity, or 1, lots
of messages sent to STDERR.
=cut
sub verbose {
my $self = shift;
my $new_verbose = shift;
if (defined($new_verbose)) {
$self->{VERBOSE} = $new_verbose;
lib/Bio/FdrFet.pm view on Meta::CPAN
my %arg = &_check_args(\@_, "gene", "pval");
if (exists($self->{GENES}->{$arg{gene}}->{PVAL}) and
$self->{GENES}->{$arg{gene}}->{PVAL} != $arg{pval}) {
confess sprintf("Gene %s has a pre-existing pval (%g) different than the current argument = %g\n",
$arg{gene},
$self->{GENES}->{$arg{gene}}->{PVAL},
$arg{pval});
}
$self->{GENES}->{$arg{gene}}->{PVAL} = $arg{pval};
$self->{STATE} = 'setup';
$self->{INPUT_GENE_COUNT} += 1;
}
=item C<genes()>
Returns the list of gene names in the system.
=cut
sub genes {
my $self = shift;
return keys %{$self->{GENES}};
}
=item C<pathways[($order)]>
Returns the list of pathways. If the optional argument, C<$order>, is
specified and contains the word, C<"sorted">, (comparison is case
insensitive), then the object will return the pathways in order of
most significant to least. If sorting is done, then the object will
update the calculation of probability values, whereas if no sorting is
done, then the object does no calculation.
=cut
sub pathways {
my $self = shift;
my $order = shift;
if (lc($order) ne 'sorted') {
return keys %{$self->{PATHWAYS}};
}
else {
$self->calculate;
return sort { $self->{PATHWAYS}->{$b}->{BEST_RESULTS}->{LOGPVAL} <=>
$self->{PATHWAYS}->{$a}->{BEST_RESULTS}->{LOGPVAL} } keys %{$self->{PATHWAYS}};
}
}
=item C<pathway_result($pathway, $data_name[, $all_flag])>
Returns a calculated result for a pathway. The following values may be
used for C<$data_name>. Case of C<$data_name> does not matter.
LOGPVAL -log10(probability value for pathway).
PVAL probability value for pathway
ODDS Odds ratio.
Q Number of genes in the pathway passing the FDR cutoff
M Number of genes overall passing the FDR cutoff
N Number of genes in the system minus C<M> above.
K Number of genes in the pathway.
FDR FDR cutoff in percent giving the best pvalue.
LOCI Reference to an array of gene names in the pathway
that satisfy FDR cutoff.
If C<$all_flag> is specified and has the value, "all", then this
returns an array of values for all the attempted FDR cutoffs, except
for the c<FDR> cutoff.
=cut
sub pathway_result {
my $self = shift;
my $pathway = $self->_check_pathway_arg(shift(@_));
my $name = uc(shift(@_));
my $all_option = shift(@_);
$all_option = "" if not defined $all_option;
$all_option = uc($all_option);
$self->calculate;
if (not exists($self->{PATHWAYS}->{$pathway})) {
confess "Pathway ($pathway) not found.\n";
}
if ($all_option eq 'ALL') {
if ($name eq 'FDR') {
confess "All option not valid for FDR result.\n";
}
if (not exists($self->{PATHWAYS}->{$pathway}->{ALL_RESULTS}->{$name})) {
confess "Pathway all_results ($name) not found.\n";
}
return @{$self->{PATHWAYS}->{$pathway}->{ALL_RESULTS}->{$name}};
}
else {
if (not exists($self->{PATHWAYS}->{$pathway}->{BEST_RESULTS}->{$name})) {
confess "Pathway results ($name) not found.\n";
}
return $self->{PATHWAYS}->{$pathway}->{BEST_RESULTS}->{$name};
}
}
=item C<pathway_desc($pathway)>
Returns the description field of the specified pathway.
=cut
sub pathway_desc {
my $self = shift;
my $pathway = $self->_check_pathway_arg(shift(@_));
return $self->{PATHWAYS}->{$pathway}->{DESC};
}
=item C<pathway_genes($pathway)>
Returns an array containing the genes of the specified pathway.
=cut
sub pathway_genes {
my $self = shift;
my $pathway = $self->_check_pathway_arg(shift(@_));
return @{$self->{PATHWAYS}->{$pathway}->{GENES}};
( run in 1.992 second using v1.01-cache-2.11-cpan-437f7b0c052 )