App-cloc
view release on metacpan or search on metacpan
";
# Help information for options not yet implemented:
# --inline Process comments that appear at the end
# of lines containing code.
# --html Create HTML files of each input file showing
# comment and code lines in different colors.
$| = 1; # flush STDOUT
my $start_time = get_time();
my (
$opt_categorized ,
$opt_found ,
@opt_force_lang ,
$opt_lang_no_ext ,
@opt_script_lang ,
$opt_count_diff ,
$opt_diff ,
$opt_diff_alignment ,
$opt_diff_timeout ,
$opt_html ,
$opt_ignored ,
$opt_counted ,
$opt_show_ext ,
$opt_show_lang ,
$opt_progress_rate ,
$opt_print_filter_stages ,
$opt_v ,
$opt_vcs ,
$opt_version ,
$opt_exclude_lang ,
$opt_exclude_list_file ,
$opt_exclude_dir ,
$opt_explain ,
$opt_include_lang ,
$opt_force_lang_def ,
$opt_read_lang_def ,
$opt_write_lang_def ,
$opt_strip_comments ,
$opt_original_dir ,
$opt_quiet ,
$opt_report_file ,
$opt_sdir ,
$opt_sum_reports ,
$opt_processes ,
$opt_unicode ,
$opt_no3 , # accept it but don't use it
$opt_3 ,
$opt_extract_with ,
$opt_by_file ,
$opt_by_file_by_lang ,
$opt_by_percent ,
$opt_xml ,
$opt_xsl ,
$opt_yaml ,
$opt_csv ,
$opt_csv_delimiter ,
$opt_fullpath ,
$opt_json ,
$opt_md ,
$opt_match_f ,
$opt_not_match_f ,
$opt_match_d ,
$opt_not_match_d ,
$opt_skip_uniqueness ,
$opt_list_file ,
$opt_help ,
$opt_skip_win_hidden ,
$opt_read_binary_files ,
$opt_sql ,
$opt_sql_append ,
$opt_sql_project ,
$opt_sql_style ,
$opt_inline ,
$opt_exclude_ext ,
$opt_ignore_whitespace ,
$opt_ignore_case ,
$opt_follow_links ,
$opt_autoconf ,
$opt_sum_one ,
$opt_stdin_name ,
$opt_force_on_windows ,
$opt_force_on_unix , # actually forces !$ON_WINDOWS
$opt_show_os ,
$opt_skip_archive ,
$opt_max_file_size , # in MB
$opt_use_sloccount ,
$opt_no_autogen ,
$opt_force_git ,
);
my $getopt_success = GetOptions(
"by_file|by-file" => \$opt_by_file ,
"by_file_by_lang|by-file-by-lang" => \$opt_by_file_by_lang ,
"categorized=s" => \$opt_categorized ,
"counted=s" => \$opt_counted ,
"include_lang|include-lang=s" => \$opt_include_lang ,
"exclude_lang|exclude-lang=s" => \$opt_exclude_lang ,
"exclude_dir|exclude-dir=s" => \$opt_exclude_dir ,
"exclude_list_file|exclude-list-file=s" => \$opt_exclude_list_file ,
"explain=s" => \$opt_explain ,
"extract_with|extract-with=s" => \$opt_extract_with ,
"found=s" => \$opt_found ,
"count_and_diff|count-and-diff" => \$opt_count_diff ,
"diff" => \$opt_diff ,
"diff-alignment|diff_alignment=s" => \$opt_diff_alignment ,
"diff-timeout|diff_timeout=i" => \$opt_diff_timeout ,
"html" => \$opt_html ,
"ignored=s" => \$opt_ignored ,
"quiet" => \$opt_quiet ,
"force_lang_def|force-lang-def=s" => \$opt_force_lang_def ,
"read_lang_def|read-lang-def=s" => \$opt_read_lang_def ,
"show_ext|show-ext:s" => \$opt_show_ext ,
"show_lang|show-lang:s" => \$opt_show_lang ,
"progress_rate|progress-rate=i" => \$opt_progress_rate ,
"print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
"report_file|report-file=s" => \$opt_report_file ,
"out=s" => \$opt_report_file ,
"script_lang|script-lang=s" => \@opt_script_lang ,
"sdir=s" => \$opt_sdir ,
"skip_uniqueness|skip-uniqueness" => \$opt_skip_uniqueness ,
"strip_comments|strip-comments=s" => \$opt_strip_comments ,
"original_dir|original-dir" => \$opt_original_dir ,
"sum_reports|sum-reports" => \$opt_sum_reports ,
"processes=n" => \$opt_processes ,
"unicode" => \$opt_unicode ,
"no3" => \$opt_no3 , # ignored
"3" => \$opt_3 ,
"v|verbose:i" => \$opt_v ,
"vcs=s" => \$opt_vcs ,
"version" => \$opt_version ,
"write_lang_def|write-lang-def=s" => \$opt_write_lang_def ,
"xml" => \$opt_xml ,
"xsl=s" => \$opt_xsl ,
"force_lang|force-lang=s" => \@opt_force_lang ,
"lang_no_ext|lang-no-ext=s" => \$opt_lang_no_ext ,
"yaml" => \$opt_yaml ,
"csv" => \$opt_csv ,
"csv_delimeter|csv-delimiter=s" => \$opt_csv_delimiter ,
"json" => \$opt_json ,
"md" => \$opt_md ,
"fullpath" => \$opt_fullpath ,
"match_f|match-f=s" => \$opt_match_f ,
"not_match_f|not-match-f=s" => \$opt_not_match_f ,
"match_d|match-d=s" => \$opt_match_d ,
"not_match_d|not-match-d=s" => \$opt_not_match_d ,
"list_file|list-file=s" => \$opt_list_file ,
"help" => \$opt_help ,
"skip_win_hidden|skip-win-hidden" => \$opt_skip_win_hidden ,
"read_binary_files|read-binary-files" => \$opt_read_binary_files ,
"sql=s" => \$opt_sql ,
"sql_project|sql-project=s" => \$opt_sql_project ,
"sql_append|sql-append" => \$opt_sql_append ,
"sql_style|sql-style=s" => \$opt_sql_style ,
"inline" => \$opt_inline ,
"exclude_ext|exclude-ext=s" => \$opt_exclude_ext ,
"ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace ,
"ignore_case|ignore-case" => \$opt_ignore_case ,
"follow_links|follow-links" => \$opt_follow_links ,
"autoconf" => \$opt_autoconf ,
"sum_one|sum-one" => \$opt_sum_one ,
"by_percent|by-percent=s" => \$opt_by_percent ,
"stdin_name|stdin-name=s" => \$opt_stdin_name ,
"windows" => \$opt_force_on_windows ,
"unix" => \$opt_force_on_unix ,
"show_os|show-os" => \$opt_show_os ,
"skip_archive|skip-archive=s" => \$opt_skip_archive ,
"max_file_size|max-file-size=i" => \$opt_max_file_size ,
"use_sloccount|use-sloccount" => \$opt_use_sloccount ,
"no_autogen|no-autogen" => \$opt_no_autogen ,
"git" => \$opt_force_git ,
);
$opt_by_file = 1 if defined $opt_by_file_by_lang;
my $CLOC_XSL = "cloc.xsl"; # created with --xsl
$CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
die "\n" unless $getopt_success;
print $usage and exit if $opt_help;
my %Exclude_Language = ();
%Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
if $opt_exclude_lang;
my %Exclude_Dir = ();
%Exclude_Dir = map { $_ => 1 } split(/,/, $opt_exclude_dir )
if $opt_exclude_dir ;
die unless exclude_dir_validates(\%Exclude_Dir);
my %Include_Language = ();
%Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang)
if $opt_include_lang;
# Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories. The contents of these
# directories often conflict with files of interest.
$opt_exclude_dir = 1;
$Exclude_Dir{".svn"} = 1;
$Exclude_Dir{".cvs"} = 1;
$Exclude_Dir{".hg"} = 1;
$Exclude_Dir{".git"} = 1;
$Exclude_Dir{".bzr"} = 1;
$Exclude_Dir{".snapshot"} = 1; # NetApp backups
$opt_count_diff = defined $opt_count_diff ? 1 : 0;
$opt_diff = 1 if $opt_diff_alignment;
$opt_exclude_ext = "" unless $opt_exclude_ext;
$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace;
$opt_ignore_case = 0 unless $opt_ignore_case;
$opt_lang_no_ext = 0 unless $opt_lang_no_ext;
$opt_follow_links = 0 unless $opt_follow_links;
$opt_diff_timeout =10 unless $opt_diff_timeout;
$opt_csv = 1 if $opt_csv_delimiter;
$ON_WINDOWS = 1 if $opt_force_on_windows;
foreach my $f (@$files_removed) {
next if $already_counted{$f};
# Don't proceed unless the file (both L and R versions)
# is in a known language.
next if $opt_include_lang
and not $Include_Language{$Language{$fset_a}{$f}};
next if $Language{$fset_a}{$f} eq "(unknown)";
next if $Exclude_Language{$fset_a}{$f};
++$p_dbl{ $Language{$fset_a}{$f} }{'nFiles'}{'removed'};
$p_alignment{"removed"}{sprintf " - %s ; %s\n", $f, $Language{$fset_a}{$f}} = 1;
#printf "%10s -> %s\n", $f, $Language{$fh[$F ]}{$f};
# Additionally, add contents of file $f to
# Delta_by_File{$f}{comment/blank/code}{'removed'}
# Delta_by_Language{$lang}{comment/blank/code}{'removed'}
# via the $p_dbl and $p_dbf variables.
my ($all_line_count,
$blank_count ,
$comment_count ,
) = call_counter($f, $Language{$fset_a}{$f}, \@p_errors);
$p_dbl{ $Language{$fset_a}{$f}}{'comment'}{'removed'} +=
$comment_count;
$p_dbl{ $Language{$fset_a}{$f}}{'blank'}{'removed'} +=
$blank_count;
$p_dbl{ $Language{$fset_a}{$f}}{'code'}{'removed'} +=
$all_line_count - $blank_count - $comment_count;
$p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
$p_dbf{ $f }{'blank'}{'removed'} = $blank_count;
$p_dbf{ $f }{'code'}{'removed'} =
$all_line_count - $blank_count - $comment_count;
}
my $n_file_pairs_compared = 0;
# Don't know ahead of time how many file pairs will be compared
# since duplicates are weeded out below. The answer is
# scalar @file_pairs only if there are no duplicates.
foreach my $pair (@$file_pairs) {
my $file_L = $pair->[0];
my $file_R = $pair->[1];
my $Lang_L = $Language{$fset_a}{$file_L};
my $Lang_R = $Language{$fset_b}{$file_R};
#print "main step 6 file_L=$file_L file_R=$file_R\n";
++$nCounted;
printf "Counting: %d\r", $nCounted
unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
next if $p_ignored{$file_L};
# filter out non-included languages
if ($opt_include_lang and not $Include_Language{$Lang_L}
and not $Include_Language{$Lang_R}) {
$p_ignored{$file_L} = "--include-lang=$Lang_L";
$p_ignored{$file_R} = "--include-lang=$Lang_R";
next;
}
# filter out excluded or unrecognized languages
if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
$p_ignored{$file_L} = "--exclude-lang=$Lang_L";
$p_ignored{$file_R} = "--exclude-lang=$Lang_R";
next;
}
my $not_Filters_by_Language_Lang_LR = 0;
#print "file_LR = [$file_L] [$file_R]\n";
#print "Lang_LR = [$Lang_L] [$Lang_R]\n";
if (!(@{$Filters_by_Language{$Lang_L} }) or
!(@{$Filters_by_Language{$Lang_R} })) {
$not_Filters_by_Language_Lang_LR = 1;
}
if ($not_Filters_by_Language_Lang_LR) {
if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
$p_ignored{$fset_a}{$file_L} = "language unknown (#1)";
$p_ignored{$fset_b}{$file_R} = "language unknown (#1)";
} else {
$p_ignored{$fset_a}{$file_L} = "missing Filters_by_Language{$Lang_L}";
$p_ignored{$fset_b}{$file_R} = "missing Filters_by_Language{$Lang_R}";
}
next;
}
#print "DIFF($file_L, $file_R)\n";
# step 0: compare the two files' contents
chomp ( my @lines_L = read_file($file_L) );
chomp ( my @lines_R = read_file($file_R) );
my $language_file_L = "";
if (defined $Language{$fset_a}{$file_L}) {
$language_file_L = $Language{$fset_a}{$file_L};
} else {
# files $file_L and $file_R do not contain known language
next;
}
my $contents_are_same = 1;
if (scalar @lines_L == scalar @lines_R) {
# same size, must compare line-by-line
for (my $i = 0; $i < scalar @lines_L; $i++) {
if ($lines_L[$i] ne $lines_R[$i]) {
$contents_are_same = 0;
last;
}
}
if ($contents_are_same) {
++$p_dbl{$language_file_L}{'nFiles'}{'same'};
} else {
++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
}
} else {
$contents_are_same = 0;
# different sizes, contents have changed
++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
}
if ($opt_diff_alignment) {
my $str = "$file_L | $file_R ; $language_file_L";
if ($contents_are_same) {
$p_alignment{"pairs"}{" == $str"} = 1;
} else {
$p_alignment{"pairs"}{" != $str"} = 1;
}
++$n_file_pairs_compared;
}
# step 1: identify comments in both files
#print "Diff blank removal L language= $Lang_L";
#print " scalar(lines_L)=", scalar @lines_L, "\n";
my @original_minus_blanks_L
= rm_blanks( \@lines_L, $Lang_L, \%EOL_Continuation_re);
#print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
@lines_L = @original_minus_blanks_L;
#print "2: scalar(lines_L)=", scalar @lines_L, "\n";
$language = $All_One_Language;
} else {
$language = classify_file($file ,
$rh_Err ,
$raa_errors,
$rh_ignored);
}
die "make_file_list($file) undef size" unless defined $size_in_bytes;
die "make_file_list($file) undef lang" unless defined $language;
printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file;
++$nFiles_Categorized;
#printf "classified %d files\n", $nFiles_Categorized
# unless (!$opt_progress_rate or
# ($nFiles_Categorized % $opt_progress_rate));
}
printf "classified %d files\r", $nFiles_Categorized
if !$opt_quiet and $nFiles_Categorized > 1;
print "<- make_file_list()\n" if $opt_v > 2;
return $fh; # handle to the file containing the list of files to process
} # 1}}}
sub invoke_generator { # {{{1
my ($generator, $ra_user_inputs) = @_;
# If user provided file/directory inputs, only return
# generated files that are in user's request.
# Populates global variable %Ignored.
print "-> invoke_generator($generator)\n" if $opt_v > 2;
open(FH, "$generator |") or
die "Failed to pipe $generator: $!";
my @files = ();
while(<FH>) {
chomp;
my $F = $_;
print "VCS input: $F\n" if $opt_v >= 2;
if (!defined $ra_user_inputs or !@{$ra_user_inputs}) {
push @files, $F;
} else {
# is this file desired?
my $want_this_one = 0;
foreach my $file_dir (@{$ra_user_inputs}) {
if (/^$file_dir/) {
$want_this_one = 1;
last;
}
}
push @files, $F if $want_this_one;
}
}
close(FH);
# apply match/not-match file/dir filters to the list so far
my @post_filter = ();
foreach my $F (@files) {
if ($opt_match_f) {
push @post_filter, $F if basename($F) =~ m{$opt_match_f};
next;
}
if ($opt_match_d) {
push @post_filter, $F if $F =~ m{$opt_match_d};
next;
}
if ($opt_not_match_d) {
if ($opt_fullpath and $F =~ m{$opt_not_match_d}) {
$Ignored{$F} = "--not-match-d=$opt_not_match_d";
next;
} elsif (basename($F) =~ m{$opt_not_match_d}) {
$Ignored{$F} = "--not-match-d (basename) =$opt_not_match_d";
next;
}
}
if ($opt_not_match_f) {
push @post_filter, $F unless basename($F) =~ m{$opt_not_match_f};
next;
}
push @post_filter, $F;
}
print "<- invoke_generator\n" if $opt_v > 2;
return @post_filter;
} # 1}}}
sub remove_duplicate_files { # {{{1
my ($fh , # in
$rh_Language , # out
$rh_unique_source_file, # out
$rh_Err , # in
$raa_errors , # out errors encountered
$rh_ignored , # out
) = @_;
# Check for duplicate files by comparing file sizes.
# Where files are equally sized, compare their MD5 checksums.
print "-> remove_duplicate_files\n" if $opt_v > 2;
my $n = 0;
my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
seek($fh, 0, 0); # rewind to beginning of the temp file
while (<$fh>) {
++$n;
my ($size_in_bytes, $language, $file) = split(/,/, $_, 3);
chomp($file);
$rh_Language->{$file} = $language;
push @{$files_by_size{$size_in_bytes}}, $file;
if ($opt_skip_uniqueness) {
$rh_unique_source_file->{$file} = 1;
}
}
return if $opt_skip_uniqueness;
if ($opt_progress_rate and ($n > $opt_progress_rate)) {
printf "Duplicate file check %d files (%d known unique)\r",
$n, scalar keys %files_by_size;
}
$n = 0;
foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
++$n;
printf "Unique: %8d files \r",
$n unless (!$opt_progress_rate or ($n % $opt_progress_rate));
if (scalar @{$files_by_size{$bytes}} == 1) {
# only one file is this big; must be unique
$rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
next;
} else {
#print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n";
# Files in the list @{$files_by_size{$bytes} all are
# $bytes long. Sort the list by file basename.
# # sorting on basename causes repeatability problems
# # if the basename is not unique (eg "includeA/x.h"
# # and "includeB/x.h". Instead, sort on full path.
# # Ref bug #114.
# my @sorted_bn = ();
# my %BN = map { basename($_) => $_ } @{$files_by_size{$bytes}};
# foreach my $F (sort keys %BN) {
# push @sorted_bn, $BN{$F};
# }
my @sorted_bn = sort @{$files_by_size{$bytes}};
foreach my $F (different_files(\@sorted_bn ,
$rh_Err ,
$raa_errors ,
$rh_ignored ) ) {
$rh_unique_source_file->{$F} = 1;
}
}
}
print "<- remove_duplicate_files\n" if $opt_v > 2;
} # 1}}}
sub find_preprocessor { # {{{1
# invoked by File::Find's find() each time it enters a new directory
# Reads global variable %Exclude_Dir.
# Populates global variable %Ignored.
# Reject files/directories in cwd which are in the exclude list.
print "-> find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
my @ok = ();
#printf "TOP find_preprocessor\n";
foreach my $F_or_D (@_) { # pure file or directory name, no separators
next if $F_or_D =~ /^\.{1,2}$/; # skip . and ..
if ($Exclude_Dir{$F_or_D}) {
$Ignored{$File::Find::name} = "--exclude-dir=$Exclude_Dir{$F_or_D}";
} else {
#printf " F_or_D=%-20s File::Find::name=%s\n", $F_or_D, $File::Find::name;
if ($opt_not_match_d) {
if ($opt_fullpath and $File::Find::name =~ m{$opt_not_match_d}) {
$Ignored{$File::Find::name} = "--not-match-d=$opt_not_match_d";
} elsif (!-d $F_or_D and basename($File::Find::name) =~ m{$opt_not_match_d}) {
$Ignored{$File::Find::name} = "--not-match-d (basename) =$opt_not_match_d";
} else {
push @ok, $F_or_D;
}
} else {
push @ok, $F_or_D;
}
}
}
print "<- find_preprocessor(@ok)\n" if $opt_v > 2;
return @ok;
} # 1}}}
sub files { # {{{1
# invoked by File::Find's find() Populates global variable @file_list.
# See also find_preprocessor() which prunes undesired directories.
my $Dir = fastcwd(); # not $File::Find::dir which just gives relative path
if ($opt_fullpath) {
# look at as much of the path as is known
if ($opt_match_f ) {
return unless $File::Find::name =~ m{$opt_match_f};
}
if ($opt_not_match_f) {
return if $File::Find::name =~ m{$opt_not_match_f};
}
} else {
# only look at the basename
if ($opt_match_f ) { return unless /$opt_match_f/; }
if ($opt_not_match_f) { return if /$opt_not_match_f/; }
}
if ($opt_match_d ) { return unless $Dir =~ m{$opt_match_d} }
my $nBytes = -s $_ ;
if (!$nBytes) {
$Ignored{$File::Find::name} = 'zero sized file';
printf "files(%s) zero size\n", $File::Find::name if $opt_v > 5;
}
return unless $nBytes ; # attempting other tests w/pipe or socket will hang
if ($nBytes > $opt_max_file_size*1024**2) {
$Ignored{$File::Find::name} = "file size of " .
$nBytes/1024**2 . " MB exceeds max file size of " .
"$opt_max_file_size MB";
printf "file(%s) exceeds $opt_max_file_size MB\n",
$File::Find::name if $opt_v > 5;
return;
}
my $is_dir = is_dir($_);
my $is_bin = -B $_ ;
printf "files(%s) size=%d is_dir=%d -B=%d\n",
$File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
$is_bin = 0 if $opt_unicode and unicode_file($_);
$is_bin = 0 if $opt_read_binary_files;
return if $is_dir or $is_bin;
++$nFiles_Found;
printf "%8d files\r", $nFiles_Found
unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
push @file_list, $File::Find::name;
} # 1}}}
sub archive_files { # {{{1
# invoked by File::Find's find() Populates global variable @binary_archive
foreach my $ext (keys %Known_Binary_Archives) {
push @binary_archive, $File::Find::name
if $File::Find::name =~ m{$ext$};
}
} # 1}}}
sub is_file { # {{{1
# portable method to test if item is a file
# (-f doesn't work in ActiveState Perl on Windows)
my $item = shift @_;
return (-f $item);
# Was:
####if ($ON_WINDOWS) {
#### my $mode = (stat $item)[2];
#### $mode = 0 unless $mode;
#### if ($mode & 0100000) { return 1; }
#### else { return 0; }
####} else {
#### return (-f $item); # works on Unix, Linux, CygWin, z/OS
####}
} # 1}}}
sub is_dir { # {{{1
my $item = shift @_;
return (-d $item); # should work everywhere now (July 2017)
# Was:
##### portable method to test if item is a directory
##### (-d doesn't work in older versions of ActiveState Perl on Windows)
( run in 2.382 seconds using v1.01-cache-2.11-cpan-5837b0d9d2c )