Code-DRY

 view release on metacpan or  search on metacpan

lib/Code/DRY.pm  view on Meta::CPAN

            if ( defined($rRoundedDown_line) ) {
                ${$rRoundedDown_line}
                    = $line == 0 ? $line + 1
                    : ( $offset == ( $lineoffsets->[$line] ) ? $line + 1
                    : $line );
            }
            if ( defined($rRoundedUp_line) ) {
                ${$rRoundedUp_line}
                    = $offset
                    == ( $line == 0 ? 0 : $lineoffsets->[ $line - 1 ] + 1 )
                    ? $line + 1
                    : $line + 2;
            }
            return $line + 1;
        }

        if ( $line > 0 && $lineoffsets->[ $line - 1 ] >= $offset ) {
            $r = $line;
            $line = int( ( $r + $l ) / 2 );
        }
        else {
            $l = $line;
            $line = int( ( $r + $l + 1 ) / 2 );
        }
    }
    return;
}

# line number is 1 based
sub offset2line {
    my ( $offset, $rRoundedUp_line, $rRoundedDown_line ) = @_;
    my $fileindex = offset2fileindex($offset);
    return offsetAndFileindex2line( $offset, $fileindex, $rRoundedUp_line,
        $rRoundedDown_line );
}

sub get_line_offsets_of_fileindex {
    my $fileindex = shift;
    return $file_lineoffsets[$fileindex];
}

sub get_concatenated_text {
    return if (!defined $codetotal);
    my ( $start, $length ) = @_;
    return substr( $codetotal, $start, $length );
}

sub enter_files {
    (my $rfiles) = @_;

    # reset all info
    @fileoffsets = @file_lineoffsets = ();
    %filename2inode = ();
    $codetotal = '';

    my $here = 0;
    for my $file (@{$rfiles}) {
        next if (!defined $file || $file eq '');
        # check metadata
        if ($^O ne 'MSWin32') {
            my @statresult = stat($file);
            if (0 < $#statresult) {
                my $inode = $statresult[1]; # inode
                if (exists $filename2inode{$inode}) {
            	    $file = undef;
            	    next; # avoid hard and symbolic links
                }
                $filename2inode{$inode} = undef; # inode
            }
        }
        # preprocess files content
        if (-z $file) {
            $file = undef;
            next;    # skip empty files
	}

        my ( $code, @lineoffsets ) = __get_text($file);
        if ($code eq '') {
            $file = undef;
            next;    # skip empty files
	}

        # we need the length of $code
        $codetotal .= $code;
        push @fileoffsets, ( length $codetotal ) - 1;

        # save line offsets per file
        push @file_lineoffsets, [@lineoffsets];
        ++$here;
    }
    @{$rfiles} = grep { defined $_ } @{$rfiles};
    return $here;
}

sub find_duplicates_in {
    ( my $minlength, my $ignoreContentFilter, @files ) = @_;
    enter_files(\@files);

    # enter codestring
    build_suffixarray_and_lcp($codetotal) == 0
        or die "Error building suffix array:$!\n";
    warn "analysing content of ", length $codetotal, " bytes out of ",
        scalar @files, " files...\n" if ($verbose);
    clip_lcp_to_fileboundaries( \@fileoffsets );
    reduce_lcp_to_nonoverlapping_lengths();
    set_lcp_to_zero_for_shadowed_substrings();

    my $n = get_size();

    my $cnt = 0;

    my @ranks;
    my $absminlength = abs($minlength);

    my $last_lcp = 0;
    @ranks = sort { get_len_at($b) <=> get_len_at($a) } grep {

# filter out when the lcp for this index is smaller than our requested minimal length
        my $lcp;    # length of match
        my $res = ( $lcp = get_len_at($_) )
            >= $absminlength;    # works for bytes and lines



( run in 1.185 second using v1.01-cache-2.11-cpan-39bf76dae61 )