Data-StreamDeserializer

 view release on metacpan or  search on metacpan

benchmark/ds_vs_eval.pl  view on Meta::CPAN

sub max(@);
sub sum(@);
sub avg(@);

$| = 1;
getopts 'hn:b:' => \my %opts or usage;
usage if $opts{h};
my $file = $ARGV[0] or usage;
die "File not found: $file\n" unless -f $file;
my $data = `cat $file`;
my $iterations = $opts{n} || 1000;
my $block_size = $opts{b} || 512;

printf "%s bytes were read\n", length $data;

print "First deserializing by eval...";
my $object = eval $data;
die "Can't eval input data: $@" if $@;
print " done\n";

print "First deserializing by Data::DeSerializer...";

benchmark/ds_vs_eval.pl  view on Meta::CPAN

die "Can't deserialize input data: " . $dsf->error if $dsf->is_error;
print " done\n";

print "Check if deserialized objects are same...";
die "deserialized object aren't the same\n"
    unless compare_object $dsf->result, $object;
print " done\n";

my (@delay, @delay_dsr);
my $time = time;
printf "\nStarting %d iterations for eval...", $iterations;
for (1 .. $iterations) {
    my $start = time;
    my $res = eval $data;
    push @delay, time - $start;
}
my $eval_time = time - $time;
printf " done (%3.3f seconds)\n", $eval_time;

printf "Starting %d iterations for Data::StreamDeserializer...", $iterations;

$time = time;
my $partcounter = 0;
for (1 .. $iterations) {
    my $start = time;
    my $dsr = new Data::StreamDeserializer
        data => $data, block_size => $block_size;

    $partcounter++ until $dsr->next;
    $partcounter++;

    push @delay_dsr, time - $start;
}

my $ds_time = time - $time;
printf " done (%3.3f seconds)\n", $ds_time;

print  "\nEval statistic:\n";
printf "\t%d iterations were done\n", $iterations;
printf "\tmaximum deserialization time: %2.4f seconds\n", max(@delay);
printf "\tminimum deserialization time: %2.4f seconds\n", min(@delay);
printf "\taverage deserialization time: %2.4f seconds\n", avg(@delay);

print  "\nStreamDeserializer statistic:\n";
printf "\t%d iterations were done\n", $iterations;
printf "\t%d SUBiterations were done\n", $partcounter;
printf "\t%d bytes in one block in one iteration\n", $block_size;
printf "\tmaximum deserialization time: %2.4f seconds\n", max(@delay_dsr);
printf "\tminimum deserialization time: %2.4f seconds\n", min(@delay_dsr);
printf "\taverage deserialization time: %2.4f seconds\n", avg(@delay_dsr);
printf "\taverage subiteration time:    %2.5f seconds\n",
    sum(@delay_dsr) / $partcounter;

sub compare_object($$)
{
    my ($o1, $o2) = @_;

benchmark/ds_vs_eval.pl  view on Meta::CPAN


sub usage()
{
    print <<eof;

    usage: perl $0 [OPTIONS] test_file

    OPTIONS:

        -h              - this helpscreen
        -n count        - iterations (default 1000)
        -b count        - bytes in one subiteration (default 512),
                            see perldoc Data::StreamDeserializer
                                hint: block_size
eof
    exit 0;
}

sub min(@) {
    my $min = shift;
    for (@_) {

lib/Data/StreamDeserializer.pm  view on Meta::CPAN

=head2 Array which contains 100 hashes:

It works faster than B<eval>:

    $ perl benchmark/ds_vs_eval.pl -n 1000 -b 512 benchmark/tests/01_100x10
    38296 bytes were read
    First deserializing by eval... done
    First deserializing by Data::DeSerializer... done
    Check if deserialized objects are same... done

    Starting 1000 iterations for eval... done (3.755 seconds)
    Starting 1000 iterations for Data::StreamDeserializer... done (3.059 seconds)

    Eval statistic:
            1000 iterations were done
            maximum deserialization time: 0.0041 seconds
            minimum deserialization time: 0.0035 seconds
            average deserialization time: 0.0036 seconds

    StreamDeserializer statistic:
            1000 iterations were done
            75000 SUBiterations were done
            512 bytes in one block in one iteration
            maximum deserialization time: 0.0045 seconds
            minimum deserialization time: 0.0028 seconds
            average deserialization time: 0.0029 seconds
            average subiteration time:    0.00004 seconds

=head2 Array which contains 1000 hashes:

It works slowly than B<eval>:

    $ perl benchmark/ds_vs_eval.pl -n 1000 -b 512 benchmark/tests/02_1000x10
    355623 bytes were read
    First deserializing by eval... done
    First deserializing by Data::DeSerializer... done
    Check if deserialized objects are same... done

    Starting 1000 iterations for eval... done (43.920 seconds)
    Starting 1000 iterations for Data::StreamDeserializer... done (71.668 seconds)

    Eval statistic:
            1000 iterations were done
            maximum deserialization time: 0.0490 seconds
            minimum deserialization time: 0.0416 seconds
            average deserialization time: 0.0426 seconds

    StreamDeserializer statistic:
            1000 iterations were done
            689000 SUBiterations were done
            512 bytes in one block in one iteration
            maximum deserialization time: 0.0773 seconds
            minimum deserialization time: 0.0656 seconds
            average deserialization time: 0.0690 seconds
            average subiteration time:    0.00010 seconds

You can see, that one block is parsed in a very short time period. So You
can increase L<block_size> value to reduce total parsing time.

If B<block_size> is equal string size the module works two times
faster than eval:

    $ perl benchmark/ds_vs_eval.pl -n 1000 -b 355623 benchmark/tests/02_1000x10
    355623 bytes were read
    First deserializing by eval... done
    First deserializing by Data::DeSerializer... done
    Check if deserialized objects are same... done

    Starting 1000 iterations for eval... done (44.456 seconds)
    Starting 1000 iterations for Data::StreamDeserializer... done (19.702 seconds)

    Eval statistic:
            1000 iterations were done
            maximum deserialization time: 0.0474 seconds
            minimum deserialization time: 0.0423 seconds
            average deserialization time: 0.0431 seconds

    StreamDeserializer statistic:
            1000 iterations were done
            1000 SUBiterations were done
            355623 bytes in one block in one iteration
            maximum deserialization time: 0.0179 seconds
            minimum deserialization time: 0.0168 seconds
            average deserialization time: 0.0171 seconds
            average subiteration time:    0.01705 seconds

=head1 AUTHOR

Dmitry E. Oboukhov, E<lt>unera@debian.orgE<gt>

t/10_memoryleak.t  view on Meta::CPAN

    } elsif ($pcount < 100) {
        $size_end = Data::StreamDeserializer::_memory_size;
        last if $size_end != $size;
    } else {
        last;
    }
}

my $leak = $size_end - $size;
ok $size_end == $size, "Check memory leak ($leak bytes)";
note "$pcount/$i iterations/subiterations were done, $len bytes were parsed";

SKIP: {
    skip "This test is only for my system", 1
        if hostname !~ /^(apache|marish|nbw)$/;
    $size = $size_end;
    for (1 .. 20_000_000 + int rand 50_000_000) {
        push @tests, rand rand 1000;
        $size_end = Data::StreamDeserializer::_memory_size;
        last if $size_end != $size;
    }



( run in 2.400 seconds using v1.01-cache-2.11-cpan-71847e10f99 )