CPAN-Metadata-RDF
view release on metacpan or search on metacpan
lib/CPAN/Metadata/RDF.pm view on Meta::CPAN
$dbh->do("CREATE TABLE dictionary (
id INTEGER, word,
primary key (id)
)");
$dbh->do("CREATE INDEX subject_idx on meta (subject)");
$dbh->do("CREATE INDEX predicate_idx on meta (predicate)");
$dbh->do("CREATE INDEX object_idx on meta (object)");
$dbh->do("CREATE INDEX word_idx on dictionary (word)");
$dbh->commit;
}
sub generate {
my($self) = @_;
my $dbh = $self->dbh;
my %mirrored;
my $cpan = $self->cpan;
my $dir = dir($cpan, "authors", "id");
foreach my $path (sort File::Find::Rule->new->file->in($dir)) {
my $suffix = $path;
$suffix =~ s/^$cpan//;
$mirrored{$suffix}++;
}
my $backpan = $self->backpan;
$dir = dir($backpan, "authors", "id");
foreach my $path (sort File::Find::Rule->new->file->in($dir)) {
my($cpanid, $file);
if (($cpanid, $file) = $path =~ m{
/BACKPAN/
authors/
id/
(?:.)/ # eg L
(?:..)/ # eg LB
([^/]+)/ # cpanid: LBROCARD
(?:.+/)? # optionally author subdirectory
([^/]+?)$ # file
}x) {
next unless ($file =~ s/\.(tar.gz|tgz|zip)$//);
open(FILE, $path) or die "Can't open '$path': $!";
binmode(FILE);
my $distmd5 = Digest::MD5->new->addfile(*FILE)->hexdigest;
close(FILE);
my $suffix = $path;
$suffix =~ s/^$backpan//;
my $distversion = $file;
$distversion =~ s{^.+/}{};
my $t = File::Type->new;
my $format = $t->mime_type($path);
my ($dist, $version) = $self->extract_name_version($distversion);
my $stat = stat($path);
my $datetime = DateTime->from_epoch(epoch =>$stat->mtime)->datetime;
my $filesize = $stat->size;
my $mirrored = "0";
$mirrored = "1" if exists $mirrored{$suffix};
my $beta = $self->is_beta($path) ? "developer" : "public";
my $identifier = "http://search.cpan.org/dist/$distversion/";
# print "$cpanid: $file / $distversion / $dist / $version / $suffix / $datetime / $format / $filesize / $identifier\n";
# print "$cpanid: $file\n";
# More meta:
# http://downlode.org/rdf/cpan/0.1/
# Title : main module name
# Creator: author name / email address
# Subject: the thing in the =name
# Description: synopsis
# Contributor: co-maintainers
# Source: ?
# Language: can we guess language?
# Relation:
# Coverage:
# Rights: license from meta.yml?
$self->insert($identifier, "$CPANNS/suffix", $suffix);
$self->insert($identifier, "$CPANNS/dist_version", $distversion);
$self->insert($identifier, "$CPANNS/dist", $dist);
$self->insert($identifier, "$CPANNS/release_status", $beta);
$self->insert($identifier, "$CPANNS/version", $version);
$self->insert($identifier, "$CPANNS/pause_id", $cpanid);
$self->insert($identifier, "$CPANNS/dist_md5", $distmd5);
$self->insert($identifier, "$CPANNS/mimetype", $format);
$self->insert($identifier, "$CPANNS/file_size", $filesize);
$self->insert($identifier, "$CPANNS/mirrored", $mirrored);
$self->insert($identifier, "http://purl.org/dc/elements/1.1/date", $datetime);
$self->insert($identifier, "http://purl.org/dc/elements/1.1/type", "http://purl.org/dc/dcmitype/Software");
$self->insert($identifier, "http://purl.org/dc/elements/1.1/publisher", "http://www.cpan.org/");
$self->insert($identifier, "http://purl.org/dc/elements/1.1/format", $format);
$self->insert($identifier, "http://purl.org/dc/elements/1.1/identifier", $identifier);
$dbh->commit;
} else {
die "Failed to parse path $path\n";
}
}
$dbh->disconnect;
}
sub insert {
my($self, $subject, $predicate, $object) = @_;
my $dbh = $self->dbh;
my $subject_id = $self->dictionary($subject);
my $predicate_id = $self->dictionary($predicate);
my $object_id = $self->dictionary($object);
my $sth = $dbh->prepare("REPLACE INTO meta (subject, predicate, object) VALUES (?, ?, ?)");
$sth->execute($subject_id, $predicate_id, $object_id);
}
( run in 0.901 second using v1.01-cache-2.11-cpan-39bf76dae61 )