Bib-Tools

 view release on metacpan or  search on metacpan

lib/Bib/Tools.pm  view on Meta::CPAN

  # nb: doesn't work with google scholar search results
  
  my $self = shift @_;
  my $url = shift @_;
  my $ua = LWP::UserAgent->new;
  $ua->agent('Mozilla/5.0');
  my $req = HTTP::Request->new(GET => $url);
  my $res = $ua->request($req);
  if ($res->is_success) {
    my $tree= HTML::TreeBuilder::XPath->new;
    $tree->parse($res->decoded_content);
    my @atitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/a[@class="gsc_a_at"]');
    my @authors=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][1]');
    my @jtitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][2]');
    my $len1 = @atitles; my $len2 = @authors; my $len3 = @jtitles;
    if (($len1 != $len2) || ($len1 != $len3) || ($len2 != $len3)) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors/$len3 journals.");return []}
    for (my $i = 0; $i<$len1; $i++) {
      # these are already utf8
      $authors[$i] = decode_entities($authors[$i]);
      $atitles[$i] = decode_entities($atitles[$i]);
      $jtitles[$i] = decode_entities($jtitles[$i]);

lib/Bib/Tools.pm  view on Meta::CPAN

  # scrape paper details from google scholar search results -- *not* from persons scholar home page

  my $self = shift @_;
  my $url = shift @_;
  my $ua = LWP::UserAgent->new;
  $ua->agent('Mozilla/5.0');
  my $req = HTTP::Request->new(GET => $url);
  my $res = $ua->request($req);
  if ($res->is_success) {
    my $tree= HTML::TreeBuilder::XPath->new;
    $tree->parse($res->decoded_content);
    my @atitles=$tree->findvalues('//div[@class="gs_ri"]/h3/a');
    my @authors=$tree->findvalues('//div[@class="gs_a"]');
    my $len1 = @atitles; my $len2 = @authors;
    if ($len1 != $len2) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors.");return [];}
    my @cites=();
    for (my $i = 0; $i<$len1; $i++) {
      $authors[$i] = decode_entities($authors[$i]);
      $atitles[$i] = decode_entities($atitles[$i]);
      my $str = $authors[$i].", ".$atitles[$i];
      if (length($str)>5) { # a potentially useful entry ?

lib/Bib/Tools.pm  view on Meta::CPAN

  my $self = shift @_;
  my $url = shift @_;
  my $maxnum = shift @_; if (!defined($maxnum)) {$maxnum=-1;}
  
  my $ua = LWP::UserAgent->new;
  $ua->agent('Mozilla/5.0');
  my $req = HTTP::Request->new(GET => $url);
  my $res = $ua->request($req);
  if ($res->is_success) {
    my $xs = XML::Simple->new();
    my $data = $xs->XMLin($res->decoded_content);
    my @cites; my @ctemp;
    if (defined $data->{'r'}) {
       # a person page
       @cites = $data->{'r'};
    } elsif (defined $data->{'article'}) { 
       # its xml for a single article
       $ctemp[0] = $data;
       push @cites, \@ctemp;
    }
    my $num=0;

lib/Bib/Tools.pm  view on Meta::CPAN

  # get paper details from orcid using API
  
  my $self = shift @_; my $orcid_id = shift @_;
  
  my $ua = LWP::UserAgent->new;
  my $req = HTTP::Request->new(GET => "http://pub.orcid.org/$orcid_id/orcid-works/");
  my $res = $ua->request($req);
  if ($res->is_success) {
    my $xs = XML::Simple->new();
    # the orcid response is utf8 xml
    my $data = $xs->XMLin($res->decoded_content);
    my @cites = $data->{'orcid-profile'}->{'orcid-activities'}->{'orcid-works'}->{'orcid-work'};
    foreach my $cite (@{$cites[0]}) {
      my $entry = undef;
      if ($cite->{'work-citation'}->{'work-citation-type'} =~ m/bibtex/) {
        # we have a bibtex reference, extract some extra info
        my $bibtex = $cite->{'work-citation'}->{'citation'};
        open my $fh, '<', \$bibtex;
        my $parser = BibTeX::Parser->new($fh);
        $entry = $parser->next;
        if (!$entry->parse_ok) {$entry = undef;}

lib/Bib/Tools.pm  view on Meta::CPAN

####################################################################################
sub add_pubmed {
  # add results from a pubmed query
  my ($self,$q) = @_;

  my $ua = LWP::UserAgent->new;
  $q =~ s/\s+/+/g;
  my $req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?usehistory=y&db=pubmed&term=".$q);
  my $res = $ua->request($req);
  if ($res->is_success) {
    my $web = $1 if ($res->decoded_content =~ /<WebEnv>(\S+)<\/WebEnv>/);
    my $key = $1 if ($res->decoded_content =~ /<QueryKey>(\d+)<\/QueryKey>/);
    $req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&query_key=$key&WebEnv=$web");
    $res = $ua->request($req);
    if ($res->is_success) {
      my $xs = XML::Simple->new();
      my $data = $xs->XMLin($res->decoded_content);
      my @cites = $data->{'DocSum'};
      foreach my $cite (@{$cites[0]}) {
        my $c = $cite->{'Item'};
        if (ref($c) ne "ARRAY") {next;}
        my $r = Bib::CrossRef->new;
        my $doi = _find_pubmed($c,'DOI','content');
        if (defined $doi) {
          # PubMed is reliable, no need to call crossref
        #  my $r = Bib::CrossRef->new;
        #  $r->parse_text($doi);



( run in 0.337 second using v1.01-cache-2.11-cpan-a9ef4e587e4 )