App-cloc
view release on metacpan or search on metacpan
my $IN = new IO::File $file, "r";
if (!defined $IN) {
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
print "<- first_line($file)\n" if $opt_v > 2;
return $line;
}
chomp($line = <$IN>);
$IN->close;
print "<- first_line($file, '$line')\n" if $opt_v > 2;
return $line;
} # 1}}}
sub peek_at_first_line { # {{{1
my ($file , # in
$rh_Err , # in hash of error codes
$raa_errors , # out
) = @_;
print "-> peek_at_first_line($file)\n" if $opt_v > 2;
my $script_language = "";
my $first_line = first_line($file, $rh_Err, $raa_errors);
if (defined $first_line) {
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
if ($first_line =~ /^#\!\s*(\S.*?)$/) {
#print "peek_at_first_line 1=[$1]\n";
my @pound_bang = split(' ', $1);
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
if (basename($pound_bang[0]) eq "env" and
scalar @pound_bang > 1) {
$script_language = $pound_bang[1];
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
} else {
$script_language = basename $pound_bang[0];
#print "peek_at_first_line pound_bang B $script_language\n";
}
}
}
print "<- peek_at_first_line($file)\n" if $opt_v > 2;
return $script_language;
} # 1}}}
sub different_files { # {{{1
# See which of the given files are unique by computing each file's MD5
# sum. Return the subset of files which are unique.
my ($ra_files , # in
$rh_Err , # in
$raa_errors , # out
$rh_ignored , # out
) = @_;
print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
my %file_hash = (); # file_hash{md5 hash} = [ file1, file2, ... ]
foreach my $F (@{$ra_files}) {
next if is_dir($F); # needed for Windows
my $IN = new IO::File $F, "r";
if (!defined $IN) {
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
$rh_ignored->{$F} = 'cannot read';
} else {
if ($HAVE_Digest_MD5) {
binmode $IN;
my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest;
#print "$F, $MD5\n";
push @{$file_hash{$MD5}}, $F;
} else {
# all files treated unique
push @{$file_hash{$F}}, $F;
}
$IN->close;
}
}
# Loop over file sets having identical MD5 sums. Within
# each set, pick the file that most resembles known source
# code.
my @unique = ();
for my $md5 (sort keys %file_hash) {
my $i_best = 0;
for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) {
my $F = $file_hash{$md5}[$i];
my (@nul_a, %nul_h);
my $language = classify_file($F, $rh_Err,
# don't save these errors; pointless
\@nul_a, \%nul_h);
$i_best = $i if $language ne "(unknown)";
}
# keep the best one found and identify the rest as ignored
for (my $i = 0; $i < scalar(@{$file_hash{$md5}}); $i++) {
if ($i == $i_best) {
push @unique, $file_hash{$md5}[$i_best];
} else {
$rh_ignored->{$file_hash{$md5}[$i]} = "duplicate of " .
$file_hash{$md5}[$i_best];
}
}
}
print "<- different_files(@unique)\n" if $opt_v > 2;
return @unique;
} # 1}}}
sub call_counter { # {{{1
my ($file , # in
$language , # in
$ra_Errors, # out
) = @_;
# Logic: pass the file through the following filters:
# 1. remove blank lines
# 2. remove comments using each filter defined for this language
# (example: SQL has two, remove_starts_with(--) and
# remove_c_comments() )
# 3. compute comment lines as
# total lines - blank lines - lines left over after all
# comment filters have been applied
print "-> call_counter($file, $language)\n" if $opt_v > 2;
#print "call_counter: ", Dumper(@routines), "\n";
my @lines = ();
my $ascii = "";
if (-B $file and $opt_unicode) {
( run in 0.938 second using v1.01-cache-2.11-cpan-5837b0d9d2c )