Bib-Tools
view release on metacpan or search on metacpan
lib/Bib/Tools.pm view on Meta::CPAN
####################################################################################
sub add_google {
# scrape paper details from google scholar personal page -- nb: no doi info on google, so use crossref.org to obtain this
# nb: doesn't work with google scholar search results
my $self = shift @_;
my $url = shift @_;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $req = HTTP::Request->new(GET => $url);
my $res = $ua->request($req);
if ($res->is_success) {
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse($res->decoded_content);
my @atitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/a[@class="gsc_a_at"]');
my @authors=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][1]');
my @jtitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][2]');
my $len1 = @atitles; my $len2 = @authors; my $len3 = @jtitles;
if (($len1 != $len2) || ($len1 != $len3) || ($len2 != $len3)) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors/$len3 journals.");return []}
for (my $i = 0; $i<$len1; $i++) {
lib/Bib/Tools.pm view on Meta::CPAN
}
####################################################################################
sub add_google_search {
# scrape paper details from google scholar search results -- *not* from persons scholar home page
my $self = shift @_;
my $url = shift @_;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $req = HTTP::Request->new(GET => $url);
my $res = $ua->request($req);
if ($res->is_success) {
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse($res->decoded_content);
my @atitles=$tree->findvalues('//div[@class="gs_ri"]/h3/a');
my @authors=$tree->findvalues('//div[@class="gs_a"]');
my $len1 = @atitles; my $len2 = @authors;
if ($len1 != $len2) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors.");return [];}
my @cites=();
for (my $i = 0; $i<$len1; $i++) {
lib/Bib/Tools.pm view on Meta::CPAN
####################################################################################
sub add_dblp {
# get details using DBLP XML API
my $self = shift @_;
my $url = shift @_;
my $maxnum = shift @_; if (!defined($maxnum)) {$maxnum=-1;}
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $req = HTTP::Request->new(GET => $url);
my $res = $ua->request($req);
if ($res->is_success) {
my $xs = XML::Simple->new();
my $data = $xs->XMLin($res->decoded_content);
my @cites; my @ctemp;
if (defined $data->{'r'}) {
# a person page
@cites = $data->{'r'};
} elsif (defined $data->{'article'}) {
# its xml for a single article
lib/Bib/Tools.pm view on Meta::CPAN
}
}
####################################################################################
sub add_orcid {
# get paper details from orcid using API
my $self = shift @_; my $orcid_id = shift @_;
my $ua = LWP::UserAgent->new;
my $req = HTTP::Request->new(GET => "http://pub.orcid.org/$orcid_id/orcid-works/");
my $res = $ua->request($req);
if ($res->is_success) {
my $xs = XML::Simple->new();
# the orcid response is utf8 xml
my $data = $xs->XMLin($res->decoded_content);
my @cites = $data->{'orcid-profile'}->{'orcid-activities'}->{'orcid-works'}->{'orcid-work'};
foreach my $cite (@{$cites[0]}) {
my $entry = undef;
if ($cite->{'work-citation'}->{'work-citation-type'} =~ m/bibtex/) {
# we have a bibtex reference, extract some extra info
lib/Bib/Tools.pm view on Meta::CPAN
return undef;
}
####################################################################################
sub add_pubmed {
# add results from a pubmed query
my ($self,$q) = @_;
my $ua = LWP::UserAgent->new;
$q =~ s/\s+/+/g;
my $req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?usehistory=y&db=pubmed&term=".$q);
my $res = $ua->request($req);
if ($res->is_success) {
my $web = $1 if ($res->decoded_content =~ /<WebEnv>(\S+)<\/WebEnv>/);
my $key = $1 if ($res->decoded_content =~ /<QueryKey>(\d+)<\/QueryKey>/);
$req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&query_key=$key&WebEnv=$web");
$res = $ua->request($req);
if ($res->is_success) {
my $xs = XML::Simple->new();
my $data = $xs->XMLin($res->decoded_content);
my @cites = $data->{'DocSum'};
foreach my $cite (@{$cites[0]}) {
my $c = $cite->{'Item'};
if (ref($c) ne "ARRAY") {next;}
my $r = Bib::CrossRef->new;
my $doi = _find_pubmed($c,'DOI','content');
( run in 0.245 second using v1.01-cache-2.11-cpan-de7293f3b23 )