Bio-EBI-RNAseqAPI
view release on metacpan or search on metacpan
lib/Bio/EBI/RNAseqAPI.pm view on Meta::CPAN
#LWP::UserAgent object with current user's proxy settings. Lazy build.
#=cut
has 'user_agent' => (
is => 'rw',
isa => 'LWP::UserAgent',
lazy_build => 1
);
#=item B<log_writer>
#Log::Log4perl::Logger object, used for logging error messages. Lazy build.
#=cut
has 'log_writer' => (
is => 'rw',
isa => 'Log::Log4perl::Logger',
lazy_build => 1
);
=item B<run_organism_list>
An anonymous hash containing allowed organism names for downloading run
information as keys. Access the contents like this:
my $runOrganisms = $rnaseqAPI->get_run_organism_list;
=cut
has 'run_organism_list' => (
is => 'rw',
isa => 'HashRef',
lazy_build => 1
);
=item B<expression_organism_list>
An anonymous hash containing allowed organism names for downloading gene
expression information as keys. Access the contents like this:
my $expressionOrganisms = $rnaseqAPI->get_expression_organism_list;
=cut
has 'expression_organism_list' => (
is => 'rw',
isa => 'HashRef',
lazy_build => 1
);
=back
=head1 METHODS
=head2 Analysis results per sequencing run
These functions take arguments in the form of a hash. These usually
consist of a study accession, or one or more run accessions, plus a value for
"minimum_mapped_reads". This value represents the minimum percentage of mapped
reads to allow for each run in the results. Only information for runs with a
percentage of mapped reads greater than or equal to this value will be
returned. To get all available information, set "minimum_mapped_reads" to zero.
Analysis information for each run is returned in an anonymous hash. Some
functions return anonymous arrays with one anonymous hash per run found. See
below for examples and more information about the results.
=over 2
=item B<get_run>
Accesses the API's C<getRun> JSON endpoint and returns analysis information for
a single run, passed in the arguments.
Arguments should be passed as a hash containing values for "run" and
"minimum_mapped_reads", e.g.:
my $runInfo = $rnaseqAPI->get_run(
run => "ERR030885",
minimum_mapped_reads => 0
);
Run analysis information is returned in an anonymous hash. Returns C<undef> (and
logs errors) if errors are encountered.
An example of the hash returned is as follows:
{
'BIOREP_ID' => 'ERR030885',
'RUN_IDS' => 'ERR030885',
'REFERENCE_ORGANISM' => 'homo_sapiens',
'MAPPING_QUALITY' => 96,
'ASSEMBLY_USED' => 'GRCh38',
'ORGANISM' => 'homo_sapiens',
'BEDGRAPH_LOCATION' => 'ftp://ftp.ebi.ac.uk/pub/databases/arrayexpress/data/atlas/rnaseq/ERR030/ERR030885/ERR030885.bedgraph',
'ENA_LAST_UPDATED' => 'Mon Aug 18 2014 13:40:46',
'STUDY_ID' => 'ERP000546',
'BIGWIG_LOCATION' => 'ftp://ftp.ebi.ac.uk/pub/databases/arrayexpress/data/atlas/rnaseq/ERR030/ERR030885/ERR030885.bw',
'CRAM_LOCATION' => 'ftp://ftp.ebi.ac.uk/pub/databases/arrayexpress/data/atlas/rnaseq/ERR030/ERR030885/ERR030885.cram',
'LAST_PROCESSED_DATE' => 'Sun Jul 12 2015 23:31:47',
'STATUS' => 'Complete',
'SAMPLE_IDS' => 'SAMEA962348'
}
=cut
sub get_run {
my ( $self, %args ) = @_;
my $logger = $self->get_log_writer;
unless( $self->_hash_arguments_ok( \%args, "run", "minimum_mapped_reads" ) ) {
$logger->error(
"Problem with arguments to \"get_run\" function."
);
return;
}
lib/Bio/EBI/RNAseqAPI.pm view on Meta::CPAN
'PCT_OF_ALL_RUNS' => 100,
'NUM_OF_RUNS' => 28,
'TYPE' => 'organism'
},
{
'VALUE' => '7 days after germination',
'STUDY_ID' => 'DRP000391',
'PCT_OF_ALL_RUNS' => 29,
'NUM_OF_RUNS' => 8,
'TYPE' => 'developmental stage'
}
]
=cut
sub get_sample_attributes_coverage_by_study {
my ( $self, $studyAcc ) = @_;
my $logger = $self->get_log_writer;
unless( $studyAcc ) {
$logger->error(
"get_sample_attributes_coverage_by_study requires a study accession as an argument."
);
return;
}
my $restResult = $self->_run_rest_call(
{
function_name => "getSampleAttributesCoverageByStudy",
function_argument => $studyAcc
}
);
if( $restResult ) {
return $restResult;
}
else {
$logger->error(
"Problem retrieving sample attributes for ",
$studyAcc
);
}
}
=back
=head2 Baseline gene expression per tissue, cell type, developmental stage, sex, and strain
=over 2
=item B<get_expression_by_organism_genesymbol>
Accesses the API's C<getExpression> endpoint. Provide arguments as a hash,
passing an organism name and a gene symbol, as well as a value for the minimum
percentage of mapped reads to allow:
my $geneExpressionInfo = $rnaseqAPI->get_expression(
minimum_mapped_reads => 0,
organism => "oryza_sativa",
gene_symbol => "BURP7"
);
Results are returned as an anonymous array of anonymous hashes, with one
anonymous hash per unique combination of tissue, cell type, developmental
stage, sex, and strain. The median expression level of all runs is given in TPM
(transcripts per million). Returns C<undef> (and logs errors) if errors are
encountered.
An example of the results returned is as follows:
[
{
'COEFFICIENT_OF_VARIATION' => '0.3',
'STRAIN' => 'NA',
'DEVELOPMENTAL_STAGE' => 'seedling, two leaves visible, three leaves visible',
'CELL_TYPE' => 'NA',
'SEX' => 'NA',
'GENE_ID' => 'OS05G0217700',
'MEDIAN_EXPRESSION' => '831.1',
'NUMBER_OF_RUNS' => 60,
'ORGANISM' => 'oryza_sativa',
'ALL_SAMPLE_ATTRIBUTES' => 'http://www.ebi.ac.uk/fg/rnaseq/api/tsv/getSampleAttributesByCondition/3238',
'ORGANISM_PART' => 'shoot, vascular leaf'
},
{
'STRAIN' => 'NA',
'DEVELOPMENTAL_STAGE' => '20 days after sowing',
'COEFFICIENT_OF_VARIATION' => '0.3',
'CELL_TYPE' => 'NA',
'GENE_ID' => 'OS05G0217700',
'SEX' => 'NA',
'ORGANISM' => 'oryza_sativa',
'NUMBER_OF_RUNS' => 4,
'MEDIAN_EXPRESSION' => '433.5',
'ALL_SAMPLE_ATTRIBUTES' => 'http://www.ebi.ac.uk/fg/rnaseq/api/tsv/getSampleAttributesByCondition/3192',
'ORGANISM_PART' => 'leaf'
},
=cut
sub get_expression_by_organism_genesymbol {
my ( $self, %args ) = @_;
my $logger = $self->get_log_writer;
unless( $self->_hash_arguments_ok( \%args, "minimum_mapped_reads", "organism", "gene_symbol" ) ) {
$logger->error(
"Problem with arguments to \"get_expression_by_organism_genesymbol\" function."
);
return;
}
# Fail if the organism isn't recognised.
unless( $self->_organism_name_ok( $args{ "organism" }, "expression" ) ) {
return;
}
my $restResult = $self->_run_rest_call(
{
minimum_mapped_reads => $args{ "minimum_mapped_reads" },
function_name => "getExpression",
function_argument => $args{ "organism" } . "/" . $args{ "gene_symbol" }
}
);
if( $restResult ) {
return $restResult;
}
else {
$logger->error(
"Problem retrieving expression information for gene \"",
$args{ "gene_symbol" },
"\" in organism \"",
$args{ "organism" }
);
}
}
=item B<get_expression_by_gene_id>
Accesses the API's C<getExpression> endpoint, but instead of querying by
organism and gene symbol (see L</get_expression_by_organism_genesymbol>), this
function queries by gene identifier. Also expects a value for the minimum
percentage of mapped reads to allow.
my $geneExpressionInfo = $rnaseqAPI->get_expression(
gene_identifer => "ENSG00000172023",
minimum_mapped_reads => 0
);
Results are returned as an anonymous array of anonymous hashes, with one
anonymous hash per unique combination of tissue, cell type, developmental
stage, sex, and strain. See L</get_expression_by_organism_genesymbol> for an
example. The median expression level of all runs is given in TPM (transcripts
per million). Returns C<undef> (and logs errors) if errors are encountered.
=cut
sub get_expression_by_gene_id {
my ( $self, %args ) = @_;
my $logger = $self->get_log_writer;
unless( $self->_hash_arguments_ok( \%args, "minimum_mapped_reads", "gene_identifier" ) ) {
$logger->error(
"Problem with arguments to \"get_expression_by_gene_id\" function."
);
return;
}
my $restResult = $self->_run_rest_call(
{
minimum_mapped_reads => $args{ "minimum_mapped_reads" },
function_name => "getExpression",
function_argument => $args{ "gene_identifier" }
}
);
if( $restResult ) {
return $restResult;
}
else {
$logger->error(
"Problem retrieving expression information for gene \"",
$args{ "gene_identifier" },
"\"."
);
}
}
=back
=cut
# Logger builder.
sub _build_log_writer {
Log::Log4perl->easy_init(
lib/Bio/EBI/RNAseqAPI.pm view on Meta::CPAN
$logger->error(
"Arguments should be provided as a hash. See POD for examples."
);
return;
}
# The rest of the @_ array is the names or the keys that should be present.
my %wantedArgNames = map { $_ => 1 } @_;
# Create a flag to unset if at least one wanted argument is missing.
my $allWantedPresent = 1;
# Check that all the arguments we want are present.
foreach my $wantedArgName ( sort keys %wantedArgNames ) {
unless( defined( $argsHash->{ $wantedArgName } ) ) {
$logger->error(
"Required argument \"",
$wantedArgName,
"\" is missing."
);
$allWantedPresent = 0;
}
}
# Next, check whether there are any unrecognised arguments. Just warn about
# them if so.
foreach my $argName ( sort keys %{ $argsHash } ) {
unless( $wantedArgNames{ $argName } ) {
$logger->warn(
"Argument \"",
$argName,
"\" is not recognised."
);
}
}
# Now return the flag to show whether all wanted arguments are present or
# not.
return $allWantedPresent;
}
# REST call running -- common to all the querying functions.
sub _run_rest_call {
my ( $self, $args ) = @_;
my $logger = $self->get_log_writer;
my $userAgent = $self->get_user_agent;
# Start building the query URL.
my $url = $self->get_api_base . "/json/";
# If we're passed a minimum percentage of mapped reads, add this to the URL
# next.
if( defined( $args->{ "minimum_mapped_reads" } ) ) {
$url .= $args->{ "minimum_mapped_reads" } . "/";
}
# Add the function name and argument to the end of the URL.
$url .= $args->{ "function_name" };
if( $args->{ "function_argument" } ) {
$url .= "/" . $args->{ "function_argument" };
}
# Run HTTP GET request.
my $response = $userAgent->get( $url );
# If the request was successful, return the parsed JSON.
if( $response->is_success ) {
return parse_json( $response->decoded_content );
}
# Otherwise, log an error and return undef.
else {
$logger->error(
"Problem retrieving URL: ",
$url,
" . Response from server: ",
$response->status_line
);
return;
}
}
# Check that the run organism name is allowed. This checks the passed string
# against the keys of the hash stored in the "run_organism_list" attribute.
sub _organism_name_ok {
my ( $self, $organism, $type ) = @_;
my $logger = $self->get_log_writer;
my $organismList = ( $type eq "run" ? $self->get_run_organism_list : $self->get_expression_organism_list );
if( $organismList->{ $organism } ) {
return 1;
}
else {
$logger->error(
"Organism \"",
$organism,
"\" is not an allowed organism. Check organisms against the run_organism_list attribute."
);
return;
( run in 0.733 second using v1.01-cache-2.11-cpan-39bf76dae61 )