App-PDFLibrarian

 view release on metacpan or  search on metacpan

lib/App/PDFLibrarian/BibTeX.pm  view on Meta::CPAN

    my $bibentry = read_bib_from_str($bibstr);

    # save name of PDF file
    $bibentry->set('file', $pdffile);

    return $bibentry;
  };
  my @bibentries = parallel_loop("reading BibTeX entries from %i/%i PDF files", \@pdffiles, $body);

  # add checksums to BibTeX entries
  foreach my $bibentry (@bibentries) {
    my $checksum = bib_checksum($bibentry);
    $bibentry->set('checksum', $checksum);
  }

  return @bibentries;
}

sub format_bib {
  my ($opts, @bibentries) = @_;

  # check options
  $opts->{max_authors} = 0 unless defined($opts->{max_authors});
  $opts->{only_first_author} = 0 unless defined($opts->{only_first_author});

  # format BibTeX entries
  my @fmtbibentries;
  foreach my $bibentry (@bibentries) {

    # create a copy of BibTeX entry
    $bibentry = $bibentry->clone();

    # merge BibTeX field names which differ by 's', e.g. 'keyword' and 'keywords'
    foreach my $bibfield ($bibentry->fieldlist()) {
      if ($bibentry->exists($bibfield) && $bibentry->exists($bibfield . "s")) {
        my $bibfieldvalue = $bibentry->get($bibfield);
        my $bibfieldsvalue = $bibentry->get($bibfield . "s");
        if ($bibfieldvalue eq "") {
          $bibfieldvalue = $bibfieldsvalue;
        } elsif ($bibfieldsvalue ne "") {
          $bibfieldvalue .= ", " . $bibfieldsvalue;
        }
        $bibentry->set($bibfield, $bibfieldvalue);
        $bibentry->delete($bibfield . "s");
      }
    }

    # uniformly format authors, editors, and collaborations
    foreach my $bibfield (qw(author editor collaboration)) {
      if ($bibentry->exists($bibfield)) {

        # determine author format
        my $authorformat;
        if ($bibfield eq "collaboration") {
          $authorformat = new Text::BibTeX::NameFormat("l");
          $authorformat->set_text(BTN_LAST, "{", "}", undef, undef);
        } else {
          $authorformat = new Text::BibTeX::NameFormat("vljf");
          $authorformat->set_text(BTN_LAST, "{", "}", undef, undef);
          $authorformat->set_text(BTN_FIRST, undef, undef, undef, ".");
          $authorformat->set_options(BTN_FIRST, 1, BTJ_FORCETIE, BTJ_SPACE);
        }

        # iterate over authors
        my @authors = $bibentry->split($bibfield);
        foreach my $author (@authors) {

          # sanitise author string
          $author =~ s/~/ /g;
          $author =~ s/\.\s-/.-/g;
          $author =~ s/\bet\sal\.?/others/;

          if ($author ne "others") {

            # format author
            $author = Text::BibTeX::Name->new($author);
            $author = $authorformat->apply($author);

            # use braces around special character commands
            $author =~ s/\\(\W)(\w)/\{\\$1$2\}/g;
            $author =~ s/\\(\w+)\s+(\w)/\\$1\{$2\}/g;

            # remove duplicate braces
            my @parts = split(",", $author);
            foreach my $part (@parts) {
              while ($part =~ s{\{\{(.*?(\{(?:(?>[^{}]+)|(?2))*\}.*?)*)\}\}}{\{$1\}}) {
                next;
              }
            }
            $author = join(",", @parts);

          }
        }

        # truncate author list
        if ($opts->{max_authors} > 0 && @authors > $opts->{max_authors}) {
          if ($opts->{only_first_author}) {
            @authors = ($authors[0]);
          } else {
            @authors = @authors[0 .. ($opts->{max_authors} - 1)];
          }
          push @authors, "others";
        }

        # set BibTeX field to concatenated authors
        $bibentry->set($bibfield, join(" and ", @authors));

      }
    }

    # handle e-print journals
    if ($bibentry->exists('journal') && $bibentry->exists('archiveprefix') && $bibentry->exists('eprint')) {
      my $journal = $bibentry->get('journal');
      my $archiveprefix = $bibentry->get('archiveprefix');
      my $eprint = $bibentry->get('eprint');
      if ($journal eq $archiveprefix) {
        $bibentry->set('pages', $eprint);
        $bibentry->set('eid', $eprint);
      }
    }



( run in 0.780 second using v1.01-cache-2.11-cpan-e1769b4cff6 )