Code-DRY
view release on metacpan or search on metacpan
lib/Code/DRY.pm view on Meta::CPAN
if ( defined($rRoundedDown_line) ) {
${$rRoundedDown_line}
= $line == 0 ? $line + 1
: ( $offset == ( $lineoffsets->[$line] ) ? $line + 1
: $line );
}
if ( defined($rRoundedUp_line) ) {
${$rRoundedUp_line}
= $offset
== ( $line == 0 ? 0 : $lineoffsets->[ $line - 1 ] + 1 )
? $line + 1
: $line + 2;
}
return $line + 1;
}
if ( $line > 0 && $lineoffsets->[ $line - 1 ] >= $offset ) {
$r = $line;
$line = int( ( $r + $l ) / 2 );
}
else {
$l = $line;
$line = int( ( $r + $l + 1 ) / 2 );
}
}
return;
}
# line number is 1 based
sub offset2line {
my ( $offset, $rRoundedUp_line, $rRoundedDown_line ) = @_;
my $fileindex = offset2fileindex($offset);
return offsetAndFileindex2line( $offset, $fileindex, $rRoundedUp_line,
$rRoundedDown_line );
}
sub get_line_offsets_of_fileindex {
my $fileindex = shift;
return $file_lineoffsets[$fileindex];
}
sub get_concatenated_text {
return if (!defined $codetotal);
my ( $start, $length ) = @_;
return substr( $codetotal, $start, $length );
}
sub enter_files {
(my $rfiles) = @_;
# reset all info
@fileoffsets = @file_lineoffsets = ();
%filename2inode = ();
$codetotal = '';
my $here = 0;
for my $file (@{$rfiles}) {
next if (!defined $file || $file eq '');
# check metadata
if ($^O ne 'MSWin32') {
my @statresult = stat($file);
if (0 < $#statresult) {
my $inode = $statresult[1]; # inode
if (exists $filename2inode{$inode}) {
$file = undef;
next; # avoid hard and symbolic links
}
$filename2inode{$inode} = undef; # inode
}
}
# preprocess files content
if (-z $file) {
$file = undef;
next; # skip empty files
}
my ( $code, @lineoffsets ) = __get_text($file);
if ($code eq '') {
$file = undef;
next; # skip empty files
}
# we need the length of $code
$codetotal .= $code;
push @fileoffsets, ( length $codetotal ) - 1;
# save line offsets per file
push @file_lineoffsets, [@lineoffsets];
++$here;
}
@{$rfiles} = grep { defined $_ } @{$rfiles};
return $here;
}
sub find_duplicates_in {
( my $minlength, my $ignoreContentFilter, @files ) = @_;
enter_files(\@files);
# enter codestring
build_suffixarray_and_lcp($codetotal) == 0
or die "Error building suffix array:$!\n";
warn "analysing content of ", length $codetotal, " bytes out of ",
scalar @files, " files...\n" if ($verbose);
clip_lcp_to_fileboundaries( \@fileoffsets );
reduce_lcp_to_nonoverlapping_lengths();
set_lcp_to_zero_for_shadowed_substrings();
my $n = get_size();
my $cnt = 0;
my @ranks;
my $absminlength = abs($minlength);
my $last_lcp = 0;
@ranks = sort { get_len_at($b) <=> get_len_at($a) } grep {
# filter out when the lcp for this index is smaller than our requested minimal length
my $lcp; # length of match
my $res = ( $lcp = get_len_at($_) )
>= $absminlength; # works for bytes and lines
( run in 1.185 second using v1.01-cache-2.11-cpan-39bf76dae61 )