App-cloc
view release on metacpan or search on metacpan
svn:ignore property.
Alternatively <VCS> may be any system command
that generates a list of files.
Note: cloc must be in a directory which can read
the files as they are returned by <VCS>. cloc will
not download files from remote repositories.
'svn list -R' may refer to a remote repository
to obtain file names (and therefore may require
authentication to the remote repository), but
the files themselves must be local.
--unicode Check binary files to see if they contain Unicode
expanded ASCII text. This causes performance to
drop noticeably.
${BB}Processing Options${NN}
--autoconf Count .in files (as processed by GNU autoconf) of
recognized languages. See also --no-autogen.
--by-file Report results for every source file encountered.
--by-file-by-lang Report results for every source file encountered
in addition to reporting by language.
--count-and-diff <set1> <set2>
logic and run in UNIX mode. See also
--windows, --show-os.
--use-sloccount If SLOCCount is installed, use its compiled
executables c_count, java_count, pascal_count,
php_count, and xml_count instead of cloc's
counters. SLOCCount's compiled counters are
substantially faster than cloc's and may give
a performance improvement when counting projects
with large files. However, these cloc-specific
features will not be available: --diff,
--count-and-diff, --strip-comments, --unicode.
--windows Override the operating system autodetection
logic and run in Microsoft Windows mode.
See also --unix, --show-os.
${BB}Filter Options${NN}
--exclude-dir=<D1>[,D2,] Exclude the given comma separated directories
D1, D2, D3, et cetera, from being scanned. For
example --exclude-dir=.cache,test will skip
all files and subdirectories that have /.cache/
or /test/ as their parent directory.
$opt_force_lang_def ,
$opt_read_lang_def ,
$opt_write_lang_def ,
$opt_strip_comments ,
$opt_original_dir ,
$opt_quiet ,
$opt_report_file ,
$opt_sdir ,
$opt_sum_reports ,
$opt_processes ,
$opt_unicode ,
$opt_no3 , # accept it but don't use it
$opt_3 ,
$opt_extract_with ,
$opt_by_file ,
$opt_by_file_by_lang ,
$opt_by_percent ,
$opt_xml ,
$opt_xsl ,
$opt_yaml ,
$opt_csv ,
"print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
"report_file|report-file=s" => \$opt_report_file ,
"out=s" => \$opt_report_file ,
"script_lang|script-lang=s" => \@opt_script_lang ,
"sdir=s" => \$opt_sdir ,
"skip_uniqueness|skip-uniqueness" => \$opt_skip_uniqueness ,
"strip_comments|strip-comments=s" => \$opt_strip_comments ,
"original_dir|original-dir" => \$opt_original_dir ,
"sum_reports|sum-reports" => \$opt_sum_reports ,
"processes=n" => \$opt_processes ,
"unicode" => \$opt_unicode ,
"no3" => \$opt_no3 , # ignored
"3" => \$opt_3 ,
"v|verbose:i" => \$opt_v ,
"vcs=s" => \$opt_vcs ,
"version" => \$opt_version ,
"write_lang_def|write-lang-def=s" => \$opt_write_lang_def ,
"xml" => \$opt_xml ,
"xsl=s" => \$opt_xsl ,
"force_lang|force-lang=s" => \@opt_force_lang ,
"lang_no_ext|lang-no-ext=s" => \$opt_lang_no_ext ,
$HAVE_SLOCCOUNT_c_count = external_utility_exists("c_count /bin/sh");
}
if ($opt_use_sloccount) {
if (!$HAVE_SLOCCOUNT_c_count) {
warn "c_count could not be found; ignoring --use-sloccount\n";
$opt_use_sloccount = 0;
} else {
warn "Using c_count, php_count, xml_count, pascal_count from SLOCCount\n";
warn "--diff is disabled with --use-sloccount\n" if $opt_diff;
warn "--count-and-diff is disabled with --use-sloccount\n" if $opt_count_diff;
warn "--unicode is disabled with --use-sloccount\n" if $opt_unicode;
warn "--strip-comments is disabled with --use-sloccount\n" if $opt_strip_comments;
$opt_diff = 0;
$opt_count_diff = undef;
$opt_unicode = 0;
$opt_strip_comments = 0;
}
}
$opt_vcs = 0 if $opt_force_git;
my @COUNT_DIFF_ARGV = undef;
my $COUNT_DIFF_report_file = undef;
if ($opt_count_diff) {
die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
if scalar @ARGV != 2;
if (!-r $file_or_dir) {
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir];
next;
}
if (is_file($file_or_dir)) {
if (!(-s $file_or_dir)) { # 0 sized file, named pipe, socket
$rh_ignored->{$file_or_dir} = 'zero sized file';
next;
} elsif (-B $file_or_dir and !$opt_read_binary_files) {
# avoid binary files unless user insists on reading them
if ($opt_unicode) {
# only ignore if not a Unicode file w/trivial
# ASCII transliteration
if (!unicode_file($file_or_dir)) {
$rh_ignored->{$file_or_dir} = 'binary file';
next;
}
} else {
$rh_ignored->{$file_or_dir} = 'binary file';
next;
}
}
push @file_list, "$file_or_dir";
} elsif (is_dir($file_or_dir)) {
$nBytes/1024**2 . " MB exceeds max file size of " .
"$opt_max_file_size MB";
printf "file(%s) exceeds $opt_max_file_size MB\n",
$File::Find::name if $opt_v > 5;
return;
}
my $is_dir = is_dir($_);
my $is_bin = -B $_ ;
printf "files(%s) size=%d is_dir=%d -B=%d\n",
$File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
$is_bin = 0 if $opt_unicode and unicode_file($_);
$is_bin = 0 if $opt_read_binary_files;
return if $is_dir or $is_bin;
++$nFiles_Found;
printf "%8d files\r", $nFiles_Found
unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
push @file_list, $File::Find::name;
} # 1}}}
sub archive_files { # {{{1
# invoked by File::Find's find() Populates global variable @binary_archive
foreach my $ext (keys %Known_Binary_Archives) {
# remove_c_comments() )
# 3. compute comment lines as
# total lines - blank lines - lines left over after all
# comment filters have been applied
print "-> call_counter($file, $language)\n" if $opt_v > 2;
#print "call_counter: ", Dumper(@routines), "\n";
my @lines = ();
my $ascii = "";
if (-B $file and $opt_unicode) {
# was binary so must be unicode
$/ = undef;
my $IN = new IO::File $file, "r";
my $bin_text = <$IN>;
$IN->close;
$/ = "\n";
$ascii = unicode_to_ascii( $bin_text );
@lines = split("\n", $ascii );
foreach (@lines) { $_ = "$_\n"; }
} else {
# regular text file
@lines = read_file($file);
$ascii = join('', @lines);
}
my @original_lines = @lines;
</html>
';
} # 1}}}
sub die_unknown_lang { # {{{1
my ($lang, $option_name) = @_;
die "Unknown language '$lang' used with $option_name option. " .
"The command\n $script --show-lang\n" .
"will print all recognized languages. Language names are " .
"case sensitive.\n" ;
} # 1}}}
sub unicode_file { # {{{1
my $file = shift @_;
print "-> unicode_file($file)\n" if $opt_v > 2;
return 0 if (-s $file > 2_000_000);
# don't bother trying to test binary files bigger than 2 MB
my $IN = new IO::File $file, "r";
if (!defined $IN) {
warn "Unable to read $file; ignoring.\n";
return 0;
}
my @lines = <$IN>;
$IN->close;
if (unicode_to_ascii( join('', @lines) )) {
print "<- unicode_file()\n" if $opt_v > 2;
return 1;
} else {
print "<- unicode_file()\n" if $opt_v > 2;
return 0;
}
} # 1}}}
sub unicode_to_ascii { # {{{1
my $string = shift @_;
# A trivial attempt to convert UTF-16 little or big endian
# files into ASCII. These files exhibit the following byte
# sequence:
# byte 1: 255
# byte 2: 254
# byte 3: ord of ASCII character
# byte 4: 0
# byte 3+i: ord of ASCII character
# byte 1: 255
# byte 2: 254
# byte 3: 0
# byte 4: ord of ASCII character
# byte 3+i: 0
# byte 4+i: ord of ASCII character
my $length = length $string;
#print "length=$length\n";
return '' if $length <= 3;
my @unicode = split(//, $string);
# check the first 100 characters for big or little endian UTF-16 encoding
my $max_peek = $length < 200 ? $length : 200;
my @view_1 = ();
for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
my @view_2 = ();
for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
my $points_1 = 0;
foreach my $C (@view_1) {
++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
or ord($C) == 10
or ord($C) == 9;
}
my $points_2 = 0;
foreach my $C (@view_2) {
}
#print "points 1: $points_1\n";
#print "points 2: $points_2\n";
my $offset = undef;
if ($points_1 > 90) { $offset = 2; }
elsif ($points_2 > 90) { $offset = 3; }
else { return '' } # neither big or little endian UTF-16
my @ascii = ();
for (my $i = $offset; $i < $length; $i += 2) { push @ascii, $unicode[$i]; }
return join("", @ascii);
} # 1}}}
sub uncompress_archive_cmd { # {{{1
my ($archive_file, ) = @_;
# Wrap $archive_file in single or double quotes in the system
# commands below to avoid filename chicanery (including
# spaces in the names).
print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
Alternatively VCS may be any system command
that generates a list of files.
Note: cloc must be in a directory which can read
the files as they are returned by VCS. cloc will
not download files from remote repositories.
'svn list -R' may refer to a remote repository
to obtain file names (and therefore may require
authentication to the remote repository), but
the files themselves must be local.
=item B<--unicode>
Check binary files to see if they contain Unicode expanded ASCII text.
This causes performance to drop noticeably.
=back
=head2 Processing Options
=over 4
=item B<--use-sloccount>
If SLOCCount is installed, use its compiled
executables c_count, java_count, pascal_count,
php_count, and xml_count instead of cloc's
counters. SLOCCount's compiled counters are
substantially faster than cloc's and may give
a performance improvement when counting projects
with large files. However, these cloc-specific
features will not be available: B<--diff>,
B<--count-and-diff>, B<--strip-comments>, B<--unicode>.
=item B<--windows>
Over-ride the operating system detection logic and run in
Microsoft Windows mode. See also B<--unix>, B<--show-os>.
=back
=head2 Filter Options
( run in 1.097 second using v1.01-cache-2.11-cpan-88abd93f124 )