Archive-BagIt-Fast

 view release on metacpan or  search on metacpan

lib/Archive/BagIt/Fast.pm  view on Meta::CPAN

}

sub sysread_based_digest {
    my $digestobj = shift;
    my $fh = shift;
    my $filesize = shift;
    my $data;
    sysread $fh, $data, $filesize;
    return _XXX_digest( $digestobj, \$data);
}

sub mmap_based_digest {
    my $digestobj = shift;
    my $fh = shift;
    my $filesize = shift;
    my $data='';
    if ($filesize > 0) {
        if (! IO::AIO::mmap $data, $filesize, IO::AIO::PROT_READ, IO::AIO::MAP_SHARED, $fh) {
            carp "mmap fails, fall back to sysread";
            sysread $fh, $data, $filesize;
        };
    }
    return _XXX_digest($digestobj, \$data);
}

has 'digest_callback' => (
    is      => 'ro',
    lazy    => 1,

    builder => sub {
        my ($self) = shift;
        #my $sub = sub {
        #    my ($digestobj, $filename) = @_;
        #    open(my $fh, "<:raw", "$filename") or croak ("Cannot open $filename, $!");
        #    binmode($fh);
        #    my $digest = $digestobj->get_hash_string($fh);
        #    close $fh || croak("could not close file '$filename', $!");
        #    return $digest;
        #};
        my $sub = sub {
            my $digestobj = shift;
            my $filename =shift;
            my $opts = shift;
            my $MMAP_MIN = $opts->{mmap_min} || 8000000;
            my $filesize = -s $filename;
            open(my $fh, "<:raw", "$filename") or croak ("Cannot open $filename, $!");
            $self->{stats}->{files}->{"$filename"}->{size}= $filesize;
            $self->{stats}->{size} += $filesize;
            my $start_time = time();
            my $digest;
            if ($filesize < $MMAP_MIN ) {
                return sysread_based_digest($digestobj, $fh, $filesize);
            }
            elsif ( $filesize < 1500000000) {
                return mmap_based_digest($digestobj, $fh, $filesize);
            }
            else {
                $digest = $digestobj->get_hash_string($fh);
            }
            my $finish_time = time();
            $self->{stats}->{files}->{"$filename"}->{verify_time}= ($finish_time - $start_time);
            $self->{stats}->{verify_time} += ($finish_time-$start_time);
            close($fh);
            return $digest;
        };
        return $sub;
    }
);

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Archive::BagIt::Fast - A module to use L<IO::AIO> to get better performance

=head1 VERSION

version 0.090

=head1 NAME

Archive::BagIt::Fast - For people who are willing to rely on some other modules in order to get better performance

=head1 HINTs

Use this module only if you have *measured* that your environment has a benefit. The results vary highly depending on
typical file size, filesystem and storage systems.

=head1 METHODS

=over

=item digest_callback()

register a callback function with method to calculate and return all digests for a a list of files using a Digest-object. This method implements fast
file access using memory mapped I/O by L<IO::AIO>.

=item mmap_based_digest($digestobj, $fh, $filesize)

internal funtion which uses mmap to calculate digest. Called by C<digest_callback>

=item sysread_based_digest($digestobj, $fh, $filesize)

internal function which uses sysread to calculate digest. Called by C<digest_callback>

=back

=head1 AVAILABILITY

The latest version of this module is available from the Comprehensive Perl
Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
site near you, or see L<https://metacpan.org/module/Archive::BagIt::Fast/>.

=head1 BUGS AND LIMITATIONS

You can make new bug reports, and view existing ones, through the



( run in 2.077 seconds using v1.01-cache-2.11-cpan-99c4e6809bf )