Digest-ssdeep

 view release on metacpan or  search on metacpan

lib/Digest/ssdeep.pm  view on Meta::CPAN

use List::Util qw/max/;

use version; 
our $VERSION = qv('0.9.3');

BEGIN {
    require Exporter;
    use vars qw(@ISA @EXPORT_OK);
    @ISA       = qw(Exporter);
    @EXPORT_OK = qw(
      ssdeep_hash
      ssdeep_hash_file
      ssdeep_compare
      ssdeep_dump_last
    );
}

use constant FNV_PRIME  => 0x01000193;
use constant FNV_INIT   => 0x28021967;
use constant MAX_LENGTH => 64;

# Weights:
#  same                = 0
#  insertion/deletion  = 1
#  mismatch            = 2
#  swap                = N/A (should be 5)
$Text::WagnerFischer::REFC = [ 0, 1, 2 ];

my @b64 = split '',
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
my @DEBUG_LAST;

my @last7chars;    # will use character 7 places before
{                  # begin rolling hash internals

    my $roll_h1;    # rolling hash internal
    my $roll_h2;    # rolling hash internal
    my $roll_h3;    # rolling hash internal

    # Resets the roll hash internal status
    sub _reset_rollhash {
        @last7chars =
          ( 0, 0, 0, 0, 0, 0, 0 );    # will use character 7 places before
        $roll_h1 = 0;
        $roll_h2 = 0;
        $roll_h3 = 0;
    }

    # Updates rolling_hash's internal state and return the rolling_hash value.
    # Parameters: the next character.
    # Returns: the actual rolling hash value
    sub _roll_hash {
        my $char    = shift;
        my $char7bf = shift @last7chars;

        push @last7chars, $char;

        $roll_h2 += 7 * $char - $roll_h1;
        $roll_h1 += $char - $char7bf;

        $roll_h3 <<= 5;    # 5*7 = 35 (so it vanish after 7 iterations)
        $roll_h3 &= 0xffffffff;
        $roll_h3 ^= $char;

        #printf("c=%d  cAnt=%d    H1=%u  H2=%u  H3=%u\n",
        #	$char, $char7bf,
        #	$roll_h1, $roll_h2, $roll_h3);

        return $roll_h1 + $roll_h2 + $roll_h3;
    }

}    # end rolling hash internals

# In-place updates the FNV hash using the new character
# _update_fnv($fnvhash, $newchar);
sub _update_fnv {
    use integer;    # we need integer overflow in multiplication
    $_[0] *= FNV_PRIME;
    $_[0] &= 0xffffffff;
    $_[0] ^= $_[1];
    no integer;
}

# Calculates initial blocksize
# Parameter: the length of the whole data
sub _calc_initbs {
    my $length = shift;

    # MAX_LENGTH * bs < length
    # MAX_LENGTH * 3 * 2 * 2 * 2 * ... < length
    #my $n = int(log($length / (MAX_LENGTH * 3)) / log(2));
    #my $bs = 3 * 2**$n;
    my $bs = 3;
    $bs *= 2 while ( $bs * MAX_LENGTH < $length );

    return $bs > 3 ? $bs : 3;
}

# Calculates the ssdeep fuzzy hash of a string
# Parameters: the string
# Returns: the fuzzy hash in string or array
sub ssdeep_hash {
    my $string = shift;

    return unless defined $string;

    my $bs = _calc_initbs( length $string );
    @DEBUG_LAST = ();

    my $hash1;
    my $hash2;

    while (1) {
        _reset_rollhash();
        my $fnv1 = FNV_INIT;    # traditional hash blocksize
        my $fnv2 = FNV_INIT;    # traditional hash 2*blocksize

        $hash1 = '';
        $hash2 = '';

        for my $i ( 0 .. length($string) - 1 ) {



( run in 0.829 second using v1.01-cache-2.11-cpan-71847e10f99 )