Unicode-Tussle
view release on metacpan or search on metacpan
script/uniwc view on Meta::CPAN
#!/usr/bin/env perl
#
# unicode wc: XXX redo this to do progressive matching
# so that it doesn't slurp in the whole file!!
# use 5.10.0;
use strict;
use warnings FATAL => "all";
use sigtrap qw[ die untrapped normal-signals ];
use Carp;
$SIG{__WARN__} = sub {
confess("FATALIZED WARNING: @_") unless $^S;
};
$SIG{__DIE__} = sub {
confess("UNCAUGHT EXCEPTION: @_") unless $^S;
};
$| = 1;
my $Errors = 0;
my $Headers = 0;
sub yuck($) {
my $errmsg = $_[0];
$errmsg =~ s/(?<=[^\n])\z/\n/;
print STDERR "$0: $errmsg";
}
process_input(\&countem);
sub countem {
local $_ = shift;
my ($file) = @_;
my (
@paras, @lines, @words,
$paracount, $linecount, $wordcount,
$grafcount, $charcount, $bytecount,
);
if ($charcount = length($_)) {
#$wordcount = 0;
$wordcount++ while /\P{Space}+/g;
#$wordcount = eval { @words = split m{ \p{Space}+ }x };
#yuck "error splitting words: $@" if $@;
#$linecount = 0;
$linecount++ while /\R/g;
#$linecount = eval { @lines = split m{ \R }x };
#yuck "error splitting lines: $@" if $@;
#$grafcount = 0;
$grafcount++ while /\X/g;
#$grafcount = eval { @lines = split m{ \R }x };
#yuck "error splitting lines: $@" if $@;
$paracount = 0;
$paracount++ while /\R{2,}/g;
yuck "error splitting paras: $@" if $@;
if ($linecount && !/\R\z/) {
yuck("missing linebreak at end of corrupted textfile $file");
$linecount .= "*";
$paracount .= "*";
}
}
$bytecount = tell;
if (-e $file) {
$bytecount = -s $file;
if ($bytecount != -s $file) {
yuck "filesize of $file differs from bytecount\n";
$Errors++;
}
}
my $mask = "%8s " x 6 . "%s\n";
printf $mask => qw{ Paras Lines Words Graphs Chars Bytes File } unless $Headers++;
printf $mask => map( { show_undef($_) }
$paracount, $linecount,
$wordcount, $grafcount,
$charcount, $bytecount,
), $file;
}
sub show_undef {
my $value = shift;
return defined($value)
? $value
: "undef";
}
END {
close(STDOUT) || die "$0: can't close STDOUT: $!";
exit($Errors != 0);
}
sub process_input {
my $function = shift();
my $enc;
if (@ARGV == 0 && -t) {
warn "$0: reading from stdin, type ^D to end or ^C to kill.\n";
}
unshift(@ARGV, "-") if @ARGV == 0;
FILE:
for my $file (@ARGV) {
( run in 1.003 second using v1.01-cache-2.11-cpan-71847e10f99 )