Bio-Biblio

 view release on metacpan or  search on metacpan

lib/Bio/DB/Biblio/eutils.pm  view on Meta::CPAN


    # make a hashtable from @args
    my %param = @args;
    @param { map { lc $_ } keys %param } = values %param; # lowercase keys

    # copy all @args into this object (overwriting what may already be
    # there) - changing '-key' into '_key'
    my $new_key;
    foreach my $key (keys %param) {
        ($new_key = $key) =~ s/^-/_/;
        $self->{ $new_key } = $param { $key };
    }


    # set up internal data
    $self->twig(XML::Twig->new());

    # finally add default values for those keys who have default value
    # and who are not yet in the object

    #AOK
    return 1;
}


sub db{
    my($self,$arg) = @_;

    if($arg){
        my %ok = map {$_=>1} qw(pubmed pmc journals);
        if($ok{lc($arg)}){
            $self->{'db'} = lc($arg);
        } else {
            $self->warn("invalid db $arg, keeping value as ".$self->{'db'} || 'pubmed');
        }
    }
    return $self->{'db'};
}



sub get_collection_id {
    return shift->collection_id();
}

sub get_count {
    return shift->count();
}

sub get_by_id {
    my $self = shift;
    my $id = shift;
    my $db = $self->db || 'pubmed';
    $self->throw("must provide valid ID, not undef") unless defined($id);
    my $xml = get($EFETCH.'?rettype=abstract&retmode=xml&db='.$db.'&id='.$id);
    return $xml;
}


sub reset_retrieval {
    shift->cursor(0);
    return 1;
}


sub get_next {
    my $self = shift;

    return unless $self->has_next;

    my $xml = $self->get_by_id( @{ $self->ids }[$self->cursor] );
    $self->cursor( $self->cursor + 1 );

    return $xml;
}


sub get_more {
    my ($self,$more) = @_;

    my @return = ();

    for(1..$more){
        my $next = $self->get_next();
        last unless $next;
        push @return, $next;
    }

    return \@return;
}


sub has_next {
    my $self = shift;
    return ($self->cursor < $self->count) ? 1 : undef;
}




sub find {
    my ($self,$query) = @_;

    $query = uri_escape($query);

    my $db = $self->db || 'pubmed';

    my $url = $ESEARCH."?usehistory=y&db=$db&retmax=$MAX_RECORDS&term=$query";

    my $xml = get($url) or $self->throw("couldn't retrieve results from $ESEARCH: $!");

    $self->twig->parse($xml);

    my @ids = map {$_->text} $self->twig->get_xpath('//IdList//Id');
    $self->ids(\@ids);

    ##
    #should we be using the ids, or the count tag?
    ##
    my($count_element)  = $self->twig->get_xpath('//Count');
    if (defined $count_element) {
        my $count = $count_element->text();
        $self->count(scalar(@ids));
    }

    my($retmax_element) = $self->twig->get_xpath('//RetMax');
    if (defined $retmax_element) {
        my $retmax = $retmax_element->text();
    }

    my($querykey_element) = $self->twig->get_xpath('//QueryKey');
    if (defined $querykey_element) {
        $self->query_key($querykey_element->text());
    }

    my($webenv_element) = $self->twig->get_xpath('//WebEnv');
    if (defined $webenv_element) {
        $self->collection_id($webenv_element->text());
    }

    #initialize/reset cursor
    $self->cursor(0);

    return $self;
}



sub get_all_ids {
    my $self = shift;
    return $self->ids() if $self->ids();
    return ();
}


sub get_all {
    my ($self) = shift;

    my $db = $self->db || 'pubmed';

    my $xml = get($EFETCH.'?rettype=abstract&retmode=xml&db=pubmed&query_key='.
                  $self->query_key.'&WebEnv='.$self->collection_id.
                  '&retstart=1&retmax='.$MAX_RECORDS
                  );

    return $xml;
}


sub exists {
    return;
}


sub destroy {
    return;
}


sub get_vocabulary_names {
    return [];
}


sub contains {
    return;
}


sub get_entry_description {
    return;
}


sub get_all_values {
    return;
}


sub get_all_entries {
    return;
}


sub cursor {
    my $self = shift;
    my $arg  = shift;

    return $self->{'cursor'} = $arg if defined($arg);
    return $self->{'cursor'};
}


sub twig {
    my $self = shift;

    return $self->{'twig'} = shift if @_;
    return $self->{'twig'};
}


sub ids {
    my $self = shift;

    return $self->{'ids'} = shift if @_;
    return $self->{'ids'};
}


sub collection_id {
    my $self = shift;

    return $self->{'collection_id'} = shift if @_;
    return $self->{'collection_id'};
}


sub count {
    my $self = shift;

    return $self->{'count'} = shift if @_;
    return $self->{'count'};
}


sub query_key {
    my $self = shift;

    return $self->{'query_key'} = shift if @_;
    return $self->{'query_key'};
}

1;

__END__
=pod

=encoding utf-8

=head1 NAME

Bio::DB::Biblio::eutils - access to PubMed's bibliographic query service

=head1 VERSION

version 1.70

=head1 SYNOPSIS

Do not use this object directly, it is recommended to access it and use
it through the I<Bio::Biblio> module:

  use Bio::Biblio;
  use Bio::Biblio::IO;

  my $biblio = Bio::Biblio->new( -access => 'eutils' );
  $biblio->find("10336996");
  my $xml = $biblio->get_next;
  my $io = Bio::Biblio::IO->new( -data => $xml,
                                 -format => 'medlinexml' );
  my $article = $io->next_bibref();

The main documentation details are to be found in
L<Bio::DB::BiblioI>.

=head1 ATTRIBUTES

=head2 db

 Title   : db
 Usage   : $obj->db($newval)
 Function: specifies the database to search.  valid values are:

           pubmed, pmc, journals

           it is also possible to add the following, and i will do
           so on request:

           genome, nucleotide, protein, popset, snp, sequence, taxonomy

           pubmed is default.

 Returns : value of db (a scalar)
 Args    : on set, new value (a scalar or undef, optional)

=head2 has_next

  Title   : has_next
  Usage   : $has_next = $biblio->has_next();
  Function: check to see if there are more items to be retrieved
  Returns : 1 on true, undef on false
  Args    : none

=head1 METHODS

=head2 get_collection_id

  Title   : get_collection_id
  Usage   : $id = $biblio->get_collection_id();
  Function: returns WebEnv value from ESearch
  Returns : ESearch WebEnv value as a string
  Args    : none

=head2 reset_retrieval

  Title   : reset_retrieval
  Usage   : $biblio->reset_retrieval();
  Function: reset cursor in id list, see cursor()
  Returns : 1
  Args    : none

=head2 get_next

  Title   : get_next
  Usage   : $xml = $biblio->get_next();
  Function: return next record as xml
  Returns : an xml string
  Args    : none

=head2 get_more

  Title   : get_more
  Usage   : $xml = $biblio->get_more($more);
  Function: returns next $more records concatenated
  Returns : a string containing multiple xml documents
  Args    : an integer representing how many records to retrieve

=head2 find

  Title   : find
  Usage   : $biblio = $biblio->find($pubmed_query_phrase);
  Function: perform a PubMed query using Entrez ESearch
  Returns : a reference to the object on which the method was called
  Args    : a PubMed query phrase.  See
            http://eutils.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
            for help on how to construct a query.

=head2 get_all_ids

  Title   : get_all_ids
  Usage   : @ids = $biblio->get_all_ids();
  Function: return a list of PubMed ids resulting from call to find()
  Returns : a list of PubMed ids, or an empty list
  Args    : none

=head2 get_all

  Title   : get_all
  Usage   : $xml = $biblio->get_all();
  Function: retrieve all records from query
  Returns : return a large concatenated string of PubMed xml documents
  Args    : none

=head2 get_vocabulary_names

  Title   : get_vocabulary_names
  Usage   : do not use
  Function: no-op.  this is here only for interface compatibility
  Returns : empty arrayref
  Args    : none

=head2 get_entry_description

  Title   : get_entry_description
  Usage   : do not use
  Function: no-op.  this is here only for interface compatibility
  Returns : undef
  Args    : none

=head2 get_all_values

  Title   : get_all_values
  Usage   : do not use
  Function: no-op.  this is here only for interface compatibility
  Returns : undef
  Args    : none

=head2 get_all_entries

  Title   : get_all_entries
  Usage   : do not use
  Function: no-op.  this is here only for interface compatibility
  Returns : undef
  Args    : none

=head1 INTERNAL METHODS

=head2 _initialize

 Usage   : my $obj = Bio::Biblio->new(-access => 'eutils' ...);
           (_initialize is internally called from this constructor)
 Returns : 1 on success
 Args    : none

This is an actual new() method (except for the real object creation
and its blessing which is done in the parent class Bio::Root::Root in
method _create_object).

Note that this method is called always as an I<object> method (never as
a I<class> method) - and that the object who calls this method may
already be partly initiated (from Bio::Biblio::new method); so if you
need to do some tricks with the 'class invocation' you need to change
Bio::Biblio::new method, not this one.

=head2 exists

  Title   : exists
  Usage   : do not use
  Function: no-op.  this is here only for interface compatibility
  Returns : undef
  Args    : none

=head2 destroy

  Title   : destroy
  Usage   : do not use
  Function: no-op.  this is here only for interface compatibility
  Returns : undef
  Args    : none

=head2 contains

  Title   : contains
  Usage   : do not use
  Function: no-op.  this is here only for interface compatibility
  Returns : undef
  Args    : none

=head2 cursor

  Title   : cursor
  Usage   : $obj->cursor($newval)
  Function: holds position in reference collection
  Returns : value of cursor (a scalar)
  Args    : on set, new value (a scalar or undef, optional)

=head2 twig

  Title   : twig
  Usage   : $obj->twig($newval)
  Function: holds an XML::Twig instance.
  Returns : value of twig (a scalar)
  Args    : on set, new value (a scalar or undef, optional)

=head2 ids

  Title   : ids
  Usage   : $obj->ids($newval)
  Function: store pubmed ids resulting from find() query
  Returns : value of ids (a scalar)
  Args    : on set, new value (a scalar or undef, optional)

=head2 collection_id

  Title   : collection_id
  Usage   : $obj->collection_id($newval)
  Function:
  Returns : value of collection_id (a scalar)
  Args    : on set, new value (a scalar or undef, optional)

=head2 count

  Title   : count
  Usage   : $obj->count($newval)
  Function:
  Returns : value of count (a scalar)
  Args    : on set, new value (a scalar or undef, optional)

=head2 query_key

  Title   : query_key
  Usage   : $obj->query_key($newval)
  Function: holds query_key from ESearch document
  Returns : value of query_key (a scalar)
  Args    : on set, new value (a scalar or undef, optional)

=head1 BUGS AND LIMITATIONS

=over 4

*
More testing and debugging needed to ensure that returned citations
are properly transferred even if they contain foreign characters.
*
Maximum record count (MAX_RECORDS) returned currently hard coded to
100K.
*
Biblio retrieval methods should be more tightly integrated with
L<Bio::Biblio::Ref> and L<Bio::DB::MeSH>.

=back

=head1 SEE ALSO



( run in 2.417 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )