Unicode-Tussle

 view release on metacpan or  search on metacpan

script/uniwc  view on Meta::CPAN

#!/usr/bin/env perl
#
# unicode wc: XXX redo this to do progressive matching
#             so that it doesn't slurp in the whole file!!

# use 5.10.0;


use strict;
use warnings FATAL => "all";
use sigtrap qw[ die untrapped normal-signals ];

use Carp;

$SIG{__WARN__}  = sub {
    confess("FATALIZED WARNING: @_")  unless $^S;
};

$SIG{__DIE__}  = sub {
    confess("UNCAUGHT EXCEPTION: @_")  unless $^S;
};


$| = 1;

my $Errors = 0;
my $Headers = 0;

sub yuck($) {
    my $errmsg = $_[0];
    $errmsg =~ s/(?<=[^\n])\z/\n/;
    print STDERR "$0: $errmsg";
}

process_input(\&countem);

sub countem {
	local $_ = shift;
    my ($file) = @_;

    my (
	@paras, @lines, @words,
	$paracount, $linecount, $wordcount, 
	$grafcount, $charcount, $bytecount,
    );


    if ($charcount = length($_)) {
	#$wordcount = 0;
	$wordcount++ while /\P{Space}+/g;

	#$wordcount = eval { @words = split m{ \p{Space}+  }x }; 
	#yuck "error splitting words: $@" if $@;

	#$linecount = 0;
	$linecount++ while /\R/g;

	#$linecount = eval { @lines = split m{ \R     }x }; 
	#yuck "error splitting lines: $@" if $@;

	#$grafcount = 0;
	$grafcount++ while /\X/g;

	#$grafcount = eval { @lines = split m{ \R     }x }; 
	#yuck "error splitting lines: $@" if $@;

	$paracount = 0;
	$paracount++ while /\R{2,}/g;

	yuck "error splitting paras: $@" if $@;

	if ($linecount && !/\R\z/) {
	    yuck("missing linebreak at end of corrupted textfile $file");
	    $linecount .= "*";
	    $paracount .= "*";
	} 

    }

    $bytecount = tell;
    if (-e $file) {
	$bytecount = -s $file;
	if ($bytecount != -s $file) {
	    yuck "filesize of $file differs from bytecount\n";
	    $Errors++;
	}
    } 
    my $mask = "%8s " x 6 . "%s\n";
    printf  $mask => qw{ Paras Lines Words Graphs Chars Bytes File } unless $Headers++;

    printf $mask => map( { show_undef($_) } 
				$paracount, $linecount, 
				$wordcount, $grafcount, 
				$charcount, $bytecount,
		       ), $file;
} 

sub show_undef {
    my $value = shift;
    return defined($value)
	     ? $value
	     : "undef";
} 

END { 
    close(STDOUT) || die "$0: can't close STDOUT: $!";
    exit($Errors != 0);
}

sub process_input {

    my $function = shift();

    my $enc;

    if (@ARGV == 0 && -t) {
	warn "$0: reading from stdin, type ^D to end or ^C to kill.\n";
    }

    unshift(@ARGV, "-") if @ARGV == 0;

FILE:

    for my $file (@ARGV) {



( run in 1.003 second using v1.01-cache-2.11-cpan-71847e10f99 )