App-PDFLibrarian

 view release on metacpan or  search on metacpan

lib/App/PDFLibrarian/BibTeX.pm  view on Meta::CPAN

        my $title = $bibentry->get($bibfield);
        $title =~ s/\.+$//;
        my @words = split /\s+/, $title;
        foreach my $word (@words) {
          $word =~ s/[{}]//g;
          $word =~ s/((?:\\.)?[A-Z]+)/\{$1\}/g;
          $word =~ s/\$\{([A-Z]+)\}\$/{\$$1\$}/g;
          $word =~ s/^\{([A-Z])\}/$1/
        }
        $title = join(" ", @words);
        $bibentry->set($bibfield, $title);
      }
    }

    # regularise BibTeX 'doi' field
    if ($bibentry->exists('doi')) {
      my $doi = $bibentry->get('doi');
      $doi =~ s|https?[:]//[\w.]*doi\.org/||g;
      $bibentry->set('doi', $doi);
    }

    # set missing BibTeX 'doi' field from arXiv e-print
    if (!$bibentry->exists('doi') && $bibentry->exists('archiveprefix') && $bibentry->exists('eprint')) {
      my $archiveprefix = $bibentry->get('archiveprefix');
      my $eprint = $bibentry->get('eprint');
      if ($archiveprefix =~ /arxiv/i) {
        my $doi = "10.48550/arXiv.$eprint";
        $bibentry->set('doi', $doi);
      }
    }

    # set missing BibTeX 'doi' field from hyphenated ISBN-13
    if (!$bibentry->exists('doi') && $bibentry->exists('isbn')) {
      my $isbn = $bibentry->get('isbn');
      if ($isbn =~ /^(97[89])-(\d+)-(\d+)-(\d+)-(\d+)/) {
        my $doi = "10.$1.$2$3/$4$5";
        $bibentry->set('doi', $doi);
      }
    }

    # set BibTeX 'url' field
    if ($bibentry->exists('doi')) {
      my $doi = $bibentry->get('doi');
      my $url = "https://doi.org/$doi";
      $bibentry->set('url', $url);
    } else {
      my @urlbibfields = grep { $_ =~ /.url$/ } $bibentry->fieldlist();
      if (@urlbibfields == 1) {
        $bibentry->set('url', $bibentry->get($urlbibfields[0]));
      }
    }

    # set BibTeX 'misc' entry 'howpublished' field
    if ($bibentry->type eq 'misc') {
      if ($bibentry->exists('archiveprefix')) {
        my $archiveprefix = $bibentry->get('archiveprefix');
        $bibentry->set('howpublished', $archiveprefix);
      }
    }

    # escape special characters
    foreach my $bibfield ($bibentry->fieldlist()) {
      my $bibfieldvalue = $bibentry->get($bibfield);
      if ($bibfield =~ /url$/) {

        # encode special URL characters
        $bibfieldvalue = uri_decode($bibfieldvalue);
        $bibfieldvalue = uri_encode($bibfieldvalue, {encode_reserved => 0, double_encode => 0});

      } else {

        # escape special TeX characters
        $bibfieldvalue =~ s{\\*&}{\\&}g;

      }
      $bibentry->set($bibfield, $bibfieldvalue);
    }

    # arrange BibTeX fields in the order given by @fieldorder
    my %order;
    my $orderidx;
    foreach my $bibfield (@fieldorder, sort { $a cmp $b } $bibentry->fieldlist()) {
      $order{$bibfield} = ++$orderidx if $bibentry->exists($bibfield) && !defined($order{$bibfield});
    }
    $order{'file'} = ++$orderidx if $bibentry->exists('file');
    my @fieldlist = sort { $order{$a} <=> $order{$b} } keys(%order);
    $bibentry->set_fieldlist(\@fieldlist);

    # output formatted entry
    push @fmtbibentries, $bibentry;

  }

  return @fmtbibentries;
}

sub write_bib_to_fh {
  my ($opts, @bibentries) = @_;

  # check options
  die unless defined($opts->{fh});
  my $fh = $opts->{fh};

  # print BibTeX entries
  foreach my $bibentry (sort { $a->key cmp $b->key } @bibentries) {

    # create a copy of BibTeX entry
    $bibentry = $bibentry->clone();

    # remove checksum before printing
    $bibentry->delete('checksum');

    # decide if/how to output PDF filename
    my $pdf_file_comment = "";
    if (defined($opts->{pdf_file})) {
      if ($opts->{pdf_file} eq "comment") {
        my $pdf_file = $bibentry->get("file");
        $pdf_file_comment = "% file: $pdf_file\n";
      }
      $bibentry->delete("file");
    }

    # print entry
    my $bibstr = $bibentry->print_s();
    $bibstr =~ s/^\s+//g;
    $bibstr =~ s/\s+$//g;
    print $fh "\n", $pdf_file_comment, encode('iso-8859-1', $bibstr, Encode::FB_CROAK), "\n";

  }

}



( run in 1.719 second using v1.01-cache-2.11-cpan-2398b32b56e )