Digest-ssdeep
view release on metacpan or search on metacpan
lib/Digest/ssdeep.pm view on Meta::CPAN
use List::Util qw/max/;
use version;
our $VERSION = qv('0.9.3');
BEGIN {
require Exporter;
use vars qw(@ISA @EXPORT_OK);
@ISA = qw(Exporter);
@EXPORT_OK = qw(
ssdeep_hash
ssdeep_hash_file
ssdeep_compare
ssdeep_dump_last
);
}
use constant FNV_PRIME => 0x01000193;
use constant FNV_INIT => 0x28021967;
use constant MAX_LENGTH => 64;
# Weights:
# same = 0
# insertion/deletion = 1
# mismatch = 2
# swap = N/A (should be 5)
$Text::WagnerFischer::REFC = [ 0, 1, 2 ];
my @b64 = split '',
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
my @DEBUG_LAST;
my @last7chars; # will use character 7 places before
{ # begin rolling hash internals
my $roll_h1; # rolling hash internal
my $roll_h2; # rolling hash internal
my $roll_h3; # rolling hash internal
# Resets the roll hash internal status
sub _reset_rollhash {
@last7chars =
( 0, 0, 0, 0, 0, 0, 0 ); # will use character 7 places before
$roll_h1 = 0;
$roll_h2 = 0;
$roll_h3 = 0;
}
# Updates rolling_hash's internal state and return the rolling_hash value.
# Parameters: the next character.
# Returns: the actual rolling hash value
sub _roll_hash {
my $char = shift;
my $char7bf = shift @last7chars;
push @last7chars, $char;
$roll_h2 += 7 * $char - $roll_h1;
$roll_h1 += $char - $char7bf;
$roll_h3 <<= 5; # 5*7 = 35 (so it vanish after 7 iterations)
$roll_h3 &= 0xffffffff;
$roll_h3 ^= $char;
#printf("c=%d cAnt=%d H1=%u H2=%u H3=%u\n",
# $char, $char7bf,
# $roll_h1, $roll_h2, $roll_h3);
return $roll_h1 + $roll_h2 + $roll_h3;
}
} # end rolling hash internals
# In-place updates the FNV hash using the new character
# _update_fnv($fnvhash, $newchar);
sub _update_fnv {
use integer; # we need integer overflow in multiplication
$_[0] *= FNV_PRIME;
$_[0] &= 0xffffffff;
$_[0] ^= $_[1];
no integer;
}
# Calculates initial blocksize
# Parameter: the length of the whole data
sub _calc_initbs {
my $length = shift;
# MAX_LENGTH * bs < length
# MAX_LENGTH * 3 * 2 * 2 * 2 * ... < length
#my $n = int(log($length / (MAX_LENGTH * 3)) / log(2));
#my $bs = 3 * 2**$n;
my $bs = 3;
$bs *= 2 while ( $bs * MAX_LENGTH < $length );
return $bs > 3 ? $bs : 3;
}
# Calculates the ssdeep fuzzy hash of a string
# Parameters: the string
# Returns: the fuzzy hash in string or array
sub ssdeep_hash {
my $string = shift;
return unless defined $string;
my $bs = _calc_initbs( length $string );
@DEBUG_LAST = ();
my $hash1;
my $hash2;
while (1) {
_reset_rollhash();
my $fnv1 = FNV_INIT; # traditional hash blocksize
my $fnv2 = FNV_INIT; # traditional hash 2*blocksize
$hash1 = '';
$hash2 = '';
for my $i ( 0 .. length($string) - 1 ) {
( run in 0.829 second using v1.01-cache-2.11-cpan-71847e10f99 )