Bib-Tools
view release on metacpan or search on metacpan
lib/Bib/Tools.pm view on Meta::CPAN
# nb: doesn't work with google scholar search results
my $self = shift @_;
my $url = shift @_;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $req = HTTP::Request->new(GET => $url);
my $res = $ua->request($req);
if ($res->is_success) {
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse($res->decoded_content);
my @atitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/a[@class="gsc_a_at"]');
my @authors=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][1]');
my @jtitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][2]');
my $len1 = @atitles; my $len2 = @authors; my $len3 = @jtitles;
if (($len1 != $len2) || ($len1 != $len3) || ($len2 != $len3)) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors/$len3 journals.");return []}
for (my $i = 0; $i<$len1; $i++) {
# these are already utf8
$authors[$i] = decode_entities($authors[$i]);
$atitles[$i] = decode_entities($atitles[$i]);
$jtitles[$i] = decode_entities($jtitles[$i]);
lib/Bib/Tools.pm view on Meta::CPAN
# scrape paper details from google scholar search results -- *not* from persons scholar home page
my $self = shift @_;
my $url = shift @_;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $req = HTTP::Request->new(GET => $url);
my $res = $ua->request($req);
if ($res->is_success) {
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse($res->decoded_content);
my @atitles=$tree->findvalues('//div[@class="gs_ri"]/h3/a');
my @authors=$tree->findvalues('//div[@class="gs_a"]');
my $len1 = @atitles; my $len2 = @authors;
if ($len1 != $len2) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors.");return [];}
my @cites=();
for (my $i = 0; $i<$len1; $i++) {
$authors[$i] = decode_entities($authors[$i]);
$atitles[$i] = decode_entities($atitles[$i]);
my $str = $authors[$i].", ".$atitles[$i];
if (length($str)>5) { # a potentially useful entry ?
lib/Bib/Tools.pm view on Meta::CPAN
my $self = shift @_;
my $url = shift @_;
my $maxnum = shift @_; if (!defined($maxnum)) {$maxnum=-1;}
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $req = HTTP::Request->new(GET => $url);
my $res = $ua->request($req);
if ($res->is_success) {
my $xs = XML::Simple->new();
my $data = $xs->XMLin($res->decoded_content);
my @cites; my @ctemp;
if (defined $data->{'r'}) {
# a person page
@cites = $data->{'r'};
} elsif (defined $data->{'article'}) {
# its xml for a single article
$ctemp[0] = $data;
push @cites, \@ctemp;
}
my $num=0;
lib/Bib/Tools.pm view on Meta::CPAN
# get paper details from orcid using API
my $self = shift @_; my $orcid_id = shift @_;
my $ua = LWP::UserAgent->new;
my $req = HTTP::Request->new(GET => "http://pub.orcid.org/$orcid_id/orcid-works/");
my $res = $ua->request($req);
if ($res->is_success) {
my $xs = XML::Simple->new();
# the orcid response is utf8 xml
my $data = $xs->XMLin($res->decoded_content);
my @cites = $data->{'orcid-profile'}->{'orcid-activities'}->{'orcid-works'}->{'orcid-work'};
foreach my $cite (@{$cites[0]}) {
my $entry = undef;
if ($cite->{'work-citation'}->{'work-citation-type'} =~ m/bibtex/) {
# we have a bibtex reference, extract some extra info
my $bibtex = $cite->{'work-citation'}->{'citation'};
open my $fh, '<', \$bibtex;
my $parser = BibTeX::Parser->new($fh);
$entry = $parser->next;
if (!$entry->parse_ok) {$entry = undef;}
lib/Bib/Tools.pm view on Meta::CPAN
####################################################################################
sub add_pubmed {
# add results from a pubmed query
my ($self,$q) = @_;
my $ua = LWP::UserAgent->new;
$q =~ s/\s+/+/g;
my $req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?usehistory=y&db=pubmed&term=".$q);
my $res = $ua->request($req);
if ($res->is_success) {
my $web = $1 if ($res->decoded_content =~ /<WebEnv>(\S+)<\/WebEnv>/);
my $key = $1 if ($res->decoded_content =~ /<QueryKey>(\d+)<\/QueryKey>/);
$req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&query_key=$key&WebEnv=$web");
$res = $ua->request($req);
if ($res->is_success) {
my $xs = XML::Simple->new();
my $data = $xs->XMLin($res->decoded_content);
my @cites = $data->{'DocSum'};
foreach my $cite (@{$cites[0]}) {
my $c = $cite->{'Item'};
if (ref($c) ne "ARRAY") {next;}
my $r = Bib::CrossRef->new;
my $doi = _find_pubmed($c,'DOI','content');
if (defined $doi) {
# PubMed is reliable, no need to call crossref
# my $r = Bib::CrossRef->new;
# $r->parse_text($doi);
( run in 0.260 second using v1.01-cache-2.11-cpan-26ccb49234f )