Archive-BagIt
view release on metacpan or search on metacpan
lib/Archive/BagIt.pm view on Meta::CPAN
foreach my $local_entry (@paths) {
my $path_entry = File::Spec->catdir($current_dir, $self->__handle_nonportable_local_entry($local_entry, $dir));
if ((defined $excludedir) && ($path_entry eq $excludedir)) {
# ignore it, because excluded
} elsif (-f $path_entry) {
push @tmp_file_paths, $path_entry;
} elsif (-d $path_entry) {
push @todo, $path_entry;
} else {
croak "not a file nor a dir found '$path_entry'";
}
}
push @file_paths, sort @tmp_file_paths;
foreach my $subdir (sort @todo) {
&$finder($subdir);
}
};
my $absolute = File::Spec->rel2abs( $dir );
&$finder($absolute);
@file_paths = map { File::Spec->abs2rel( $_, $dir)} @file_paths;
return @file_paths;
}
sub _build_payload_files{
my ($self) = @_;
my $payload_dir = $self->payload_path;
my $reldir = File::Spec->abs2rel($payload_dir, $self->bag_path());
$reldir =~ s/^\.$//;
my @payload = map {
$reldir eq "" ? $_ : File::Spec->catfile($reldir, $_)
} $self->__file_find($payload_dir, File::Spec->rel2abs($self->metadata_path));
return wantarray ? @payload : \@payload;
}
sub __build_read_bagit_txt {
my($self) = @_;
my $bagit = $self->metadata_path;
my $file = File::Spec->catfile($bagit, "bagit.txt");
open(my $BAGIT, "<:encoding(UTF-8)", $file) or croak("Cannot read '$file': $!");
my $version_string = <$BAGIT>;
my $encoding_string = <$BAGIT>;
close($BAGIT);
if (defined $version_string) {
$version_string =~ s/[\r\n]//;
}
if (defined $encoding_string) {
$encoding_string =~s/[\r\n]//;
}
return ($version_string, $encoding_string, $file);
}
sub _build_bag_version {
my($self) = @_;
my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
croak "Version line missed in '$file" unless defined $version_string;
if ($version_string =~ /^BagIt-Version: ([01]\.[0-9]+)$/) {
return $1;
} else {
$version_string =~ s/\r/<CR>/;
$version_string =~ s/^\N{U+FEFF}/<BOM>/;
croak "Version string '$version_string' of '$file' is incorrect";
};
}
sub _build_bag_encoding {
my($self) = @_;
my ($version_string, $encoding_string, $file) = $self->__build_read_bagit_txt();
croak "Encoding line missed in '$file" unless defined $encoding_string;
croak "Encoding '$encoding_string' of '$file' not supported by current Archive::BagIt module!" unless ($encoding_string ne "UTF-8");
return $encoding_string;
}
sub __sort_bag_info {
my @sorted = sort {
my %tmpa = %{$a};
my %tmpb = %{$b};
my ($ka, $va) = each %tmpa;
my ($kb, $vb) = each %tmpb;
my $kres = $ka cmp $kb;
if ($kres != 0) {
return $kres;
} else {
return $va cmp $vb;
}
} @_;
return @sorted;
}
sub _extract_key_from_textblob {
my ($self, $textblob) = @_;
if (!defined $textblob) {
push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because textblob for key extraction is undefined";
return (undef, undef);
}
my $key;
my $rx_word = qr{[^: \t\r\n]+};# Hint: this word definition for bag-info.txt-keys differs from word definition of bag-info.txt-values!
my $rx_spc = qr{\s}; #qr{[\t ]};
if ($textblob =~ s/\A($rx_word)$rx_spc*:[\t ]*//m) {
# label if starts with chars not colon or whitespace followed by zero or more spaces, a colon, zero or more spaces
if ($textblob eq "") {
push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because following text blob not fullfill the match requirements for values: '$textblob', empty value detected";
return ($1, undef);
}
$key = $1;
} else {
push @{$self->{errors}}, "the baginfo file '".$self->{bag_info_file}."' could not be parsed correctly, because following text blob not fullfill the match requirements for keys: '$textblob'";
}
return ($key, $textblob);
}
sub _extract_value_from_textblob {
my ($self, $textblob) = @_;
if (!defined $textblob) {
push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because textblob for value extraction is undefined";
return (undef, undef);
}
if ($textblob eq "") {
push @{$self->{errors}}, "the baginfo file '" . $self->{bag_info_file} . "' could not be parsed correctly, because textblob for value extraction is empty";
return (undef, "");
lib/Archive/BagIt.pm view on Meta::CPAN
}
sub make_bag {
my ($class, $bag_path, $options) = @_;
my $isa = ref $class;
if ($isa eq "Archive::BagIt") { # not a class, but an object!
croak "make_bag() only a class subroutine, not useable with objects. Try store() instead!\n";
}
my $self = $class->init_metadata($bag_path, $options);
return $self;
}
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Archive::BagIt - The main module to handle bags.
=head1 VERSION
version 0.101
=head1 NAME
Achive::BagIt - The main module to handle Bags
=head1 SOURCE
The original development version was on github at L<http://github.com/rjeschmi/Archive-BagIt>
and may be cloned from there.
The actual development version is available at L<https://git.fsfe.org/art1pirat/Archive-BagIt>
=head1 Conformance to RFC8493
The module should fulfill the RFC requirements, with following limitations:
=over
=item only encoding UTF-8 is supported
=item version 0.97 or 1.0 allowed
=item version 0.97 requires tag-/manifest-files with md5-fixity
=item version 1.0 requires tag-/manifest-files with sha512-fixity
=item BOM is not supported
=item Carriage Return in bagit-files are not allowed
=item fetch.txt is unsupported
=back
At the moment only filepaths in linux-style are supported.
To get an more detailled overview, see the testsuite under F<t/verify_bag.t> and corresponding test bags from the BagIt conformance testsuite of Library of Congress under F<bagit_conformance_suite/>.
See L<https://datatracker.ietf.org/doc/rfc8493/?include_text=1> for details.
=head1 TODO
=over
=item enhanced testsuite
=item reduce complexity
=item use modern perl code
=item add flag to enable very strict verify
=back
=head1 Backward Compatibility
To reduce the complexity of code in current module the support for
=over
=item parallel processing
=item synchronous I/O
=back
is removed. The existing code is very fast, so there is no performance loss.
In near future the support for L<Archive::BagIt::Fast> will be removed, because it needs hooks, which increase code
complexity in current module without any performance benefit.
=head1 FAQ
=head2 How to access the manifest-entries directly?
Try this:
foreach my $algorithm ( keys %{ $self->manifests }) {
my $entries_ref = $self->manifests->{$algorithm}->manifest_entries();
# $entries_ref returns a hashref like:
# {
# data/hello.txt "e7c22b994c59d9cf2b48e549b1e24666636045930d3da7c1acb299d1c3b7f931f94aae41edda2c2b207a36e10f8bcb8d45223e54878f5b316e7ce3b6bc019629"
# }
}
Similar for tagmanifests
=head2 How fast is L<Archive::BagIt>?
( run in 1.327 second using v1.01-cache-2.11-cpan-524268b4103 )