CPAN-Metadata-RDF

 view release on metacpan or  search on metacpan

lib/CPAN/Metadata/RDF.pm  view on Meta::CPAN


  $dbh->do("CREATE TABLE dictionary (
 id INTEGER, word,
 primary key (id)
)");

  $dbh->do("CREATE INDEX subject_idx on meta (subject)");
  $dbh->do("CREATE INDEX predicate_idx on meta (predicate)");
  $dbh->do("CREATE INDEX object_idx on meta (object)");
  $dbh->do("CREATE INDEX word_idx on dictionary (word)");
  $dbh->commit;
}

sub generate {
  my($self) = @_;
  my $dbh = $self->dbh;

  my %mirrored;
  my $cpan = $self->cpan;
  my $dir = dir($cpan, "authors", "id");
  foreach my $path (sort File::Find::Rule->new->file->in($dir)) {
    my $suffix = $path;
    $suffix =~ s/^$cpan//;
    $mirrored{$suffix}++;
  }

  my $backpan = $self->backpan;
  $dir = dir($backpan, "authors", "id");

  foreach my $path (sort File::Find::Rule->new->file->in($dir)) {
    my($cpanid, $file);
    if (($cpanid, $file) = $path =~ m{
/BACKPAN/
authors/
id/
(?:.)/    # eg L
(?:..)/   # eg LB
([^/]+)/  # cpanid: LBROCARD
(?:.+/)?  # optionally author subdirectory
([^/]+?)$ # file
}x) {

      next unless ($file =~ s/\.(tar.gz|tgz|zip)$//);

      open(FILE, $path) or die "Can't open '$path': $!";
      binmode(FILE);
      my $distmd5 = Digest::MD5->new->addfile(*FILE)->hexdigest;
      close(FILE);

      my $suffix = $path;
      $suffix =~ s/^$backpan//;

      my $distversion = $file;
      $distversion =~ s{^.+/}{};

      my $t = File::Type->new;
      my $format = $t->mime_type($path);

      my ($dist, $version) = $self->extract_name_version($distversion);

      my $stat = stat($path);
      my $datetime = DateTime->from_epoch(epoch =>$stat->mtime)->datetime;
      my $filesize = $stat->size;

      my $mirrored = "0";
      $mirrored = "1" if exists $mirrored{$suffix};

      my $beta = $self->is_beta($path) ? "developer" : "public";

      my $identifier = "http://search.cpan.org/dist/$distversion/";

#      print "$cpanid: $file / $distversion / $dist / $version / $suffix / $datetime / $format / $filesize / $identifier\n";
#      print "$cpanid: $file\n";

# More meta:
# http://downlode.org/rdf/cpan/0.1/
# Title : main module name
# Creator: author name / email address
# Subject: the thing in the =name
# Description: synopsis
# Contributor: co-maintainers
# Source: ?
# Language: can we guess language?
# Relation:
# Coverage:
# Rights: license from meta.yml?

      $self->insert($identifier, "$CPANNS/suffix", $suffix);
      $self->insert($identifier, "$CPANNS/dist_version", $distversion);
      $self->insert($identifier, "$CPANNS/dist", $dist);
      $self->insert($identifier, "$CPANNS/release_status", $beta);
      $self->insert($identifier, "$CPANNS/version", $version);
      $self->insert($identifier, "$CPANNS/pause_id", $cpanid);
      $self->insert($identifier, "$CPANNS/dist_md5", $distmd5);
      $self->insert($identifier, "$CPANNS/mimetype", $format);
      $self->insert($identifier, "$CPANNS/file_size", $filesize);
      $self->insert($identifier, "$CPANNS/mirrored", $mirrored);
      $self->insert($identifier, "http://purl.org/dc/elements/1.1/date", $datetime);
      $self->insert($identifier, "http://purl.org/dc/elements/1.1/type", "http://purl.org/dc/dcmitype/Software");
      $self->insert($identifier, "http://purl.org/dc/elements/1.1/publisher", "http://www.cpan.org/");
      $self->insert($identifier, "http://purl.org/dc/elements/1.1/format", $format);
      $self->insert($identifier, "http://purl.org/dc/elements/1.1/identifier", $identifier);
      $dbh->commit;
    } else {
      die "Failed to parse path $path\n";
    }
  }
  $dbh->disconnect;
}

sub insert {
  my($self, $subject, $predicate, $object) = @_;
  my $dbh = $self->dbh;

  my $subject_id = $self->dictionary($subject);
  my $predicate_id = $self->dictionary($predicate);
  my $object_id = $self->dictionary($object);

  my $sth = $dbh->prepare("REPLACE INTO meta (subject, predicate, object) VALUES (?, ?, ?)");
  $sth->execute($subject_id, $predicate_id, $object_id);
}



( run in 0.901 second using v1.01-cache-2.11-cpan-39bf76dae61 )