App-Bin4TSV
view release on metacpan or search on metacpan
scripts/colsummary view on Meta::CPAN
#!/usr/bin/perl
# colsummary : TSVã¾ãã¯CSVãã¡ã¤ã«ã®ååã®å¤ã®æ§åã表示ãããã¨ã¦ã便å©ã
# 2015/05/11 - 2016/07/05 , 2018-03-28 . Shimono Toshiyuki
# 2019/10/24 ããã«å¤§å¹
ã«æ¸ãæ¿ã
use 5.014 ; use warnings ; # also confirmed on 5.011 5.014 5.018
use strict ;
use Time::HiRes qw [ gettimeofday tv_interval ] ; my ${ dt_start } = [ gettimeofday ] ;
my $time0 = time ;
use autodie qw [ open ] ;
use Getopt::Std ; getopts 'g:i:jl:m:suwz=!@:#:0:2:' => \my %o ;
use List::Util qw/max min maxstr minstr/ ;
use POSIX qw/strtod/;
use Scalar::Util qw/looks_like_number/;
use Term::ANSIColor qw/:constants color/ ; $Term::ANSIColor::AUTORESET = 1 ;
use Encode qw[ decode_utf8 encode_utf8 ] ;
use FindBin qw [ $Script ] ;
my $sdt = sprintf '%04d-%02d-%02d %02d:%02d:%02d', do{my @t= @{[localtime]}[5,4,3,2,1,0]; $t[0]+=1900; $t[1]++; @t } ;
eval "use PerlIO::gzip;1" or die "PerlIO::gzip cannot be loaded, so -z does not work. ($Script, $sdt)\n" if $o{z} ;
sub AlignOut ( @ ) ; # åºå ; eachFileã§ãColstatã§ã使ãã
sub ColStat ( $$ ) ; # $colvals->[åçª] 㨠ååã æ¸¡ããããã¦ããã®ä¸èº«ã表示ãããã; eachFileã§ãColstatã§ã使ãã
sub d3 ($) { $_[0] =~ s/(?<=\d)(?=(\d\d\d)+($|\D))/,/gr } ; # æ°ã3æ¡åºåãã«å¤æããã
#sub hhmmss () { sprintf '%02d:%02d:%02d' , @{[localtime]}[2,1,0] } ; # ç¾å¨æå»ã hh:mm:ss ã®å½¢å¼ã§åãåºãã
$/ = "\r\n" if $o{w} ; # -ræå®ã§ æ¹è¡æåãWindowså½¢å¼ã«å¤æ´ã
my $L = ',' ; # åºåã«ããç¾ããåºåãæåå
my $isep = $o{i} // "\t" ; # å
¥åã®åºåãæå $o{','} = do { $o{','} //= "\t" ; eval qq[qq[$o{','}]] } ;
my $nc = 0 ; # è¨æ°å¯¾è±¡ã¨ããªãã£ãã»ã«ã®æ°ãã«ã¦ã³ãã
my $sec = $o{'@'} // 15 ; # ä½ç§ãã¨ã«ã¬ãã¼ãã表示ãããã
my $rl ; # åãã¡ã¤ã«ã®èªãã è¡æ°ãæ ¼ç´ã
$SIG{ ALRM } = sub { say STDERR GREEN + (d3 $rl) . " lines read. " , scalar localtime ; alarm $sec } ;
my ${ INT1 } = sub {
&{ $SIG{ALRM} } ;
print STDERR BRIGHT_RED
'Do you want to get the halfway result? Then type Ctrl + C again within 2 seconds. '. "\n" .
'Really want to Quit? Then press Ctrl + "\" or Ctrl + Yen-Mark. (Ctrl+Z may be what you want.) ' . RESET "\n" ;
$SIG{INT} = sub { select *STDERR ; & ColStat ; select *STDOUT ; return } ;
sleep 2 ;
return ;
} ;
$SIG{ INT } = ${ INT1 } ;
$o{g} = 6 if ( ! defined $o{g} ) ; # åãåºãæ°
$| = 1 if $o{'!'} ;
* decode = $o{u} ? * decode_utf8 : sub ($){ $_[0] } ; #* encode = $o{u} ? * encode_utf8 : sub ($){ $_[0] } ;
$o{'#'} = decode ( $o{'#'} ) if defined $o{'#'} ;
my %fOut = (
j => [ map {UNDERLINE decode($_)} qw[åçªå· å¤ã®ç°ãªã æ°å¤åå¹³å åå å¤ã®ç¯å² æé »å¤ æé »å¤ã®åº¦æ° ..ãã¼ã«ã®åº¦æ°(éãªã) æ¡æ°ç¯å² ] ] ,
e => [ map {UNDERLINE $_ } qw[ cpos diff ave. name range frequent frequency ..lower(x_mul) digits] ] ) ;
binmode *STDOUT , ':utf8' if $o{u} ;
alarm $sec ;
push @ARGV , '-' unless @ARGV ; # æ¨æºå
¥åã®è¿½å
& eachFile ( $_ ) for @ARGV ;
exit 0 ;
sub eachFile ( $ ) {
sub colnames( $ ) ; # -=ã®æã«å
é è¡ã®æ
å ±ãåãåºã
sub filePinfo ; # ãã¡ã¤ã«æ¯ã®2次æ
å ±(ä¸è¡ãµããª)
sub ColFreq ( $$ ) ; # 第ï¼å¤æ°ã¯ãã¡ã¤ã«ãã³ã㫠第ï¼å¤æ°ã¯åç
§ ; ååã®å¤ã®åå¸ãåãåºã
my $FH = do { my $t = *STDIN if $_[0] eq '-' ; open $t, '<', $_[0] if!$t ; binmode $t , ':gzip(gzip)' if $o{z} ; $t } ; # ãã¡ã¤ã«ãã³ãã«ã®åå¾
$rl = 0 ;
my @colnames = colnames $FH if $o{'='} ;
my $maxCols = ColFreq $FH, my $colvals ; #my $colvals ; ååã®åãã¼ã¿å¤ã®åº¦æ°ãéè¨;$colvals->[åçª-1]{ãã¼ã¿å¤}=度æ°
close $FH ;
AlignOut @{ $fOut{$o{j}?'j':'e'} } if 0 ne ($o{0}//'') ;
defined $colvals->[$_] and ColStat $colvals->[ $_ ] , $colnames[$_] for 0 .. $maxCols - 1 ; # ãªãã·ã§ã³ -0 ã«ããå
¨ã¦ã®å¤ãé¤å¤ããããã¨ã¯èµ·ããããã
filePinfo ;
}
# ãããããååãåå¾ããã -= ãæå®ãããå ´åã®ã¿
sub colnames ( $ ) { my @F = split /$isep/, do { my $FH = $_[0] ; my $t = <$FH> ; $rl++ if defined $t ; $t //= '' ; chomp $t ; decode ($t) } , -1 }
sub filePinfo {
exit if ($o{2}//'') eq 0 ;
$rl = d3 ($rl // 0) ; # read lines
my $procsec = tv_interval ${ dt_start } ;
my $out = "$rl line(s) read; ";
$out .= "$nc cells are not counted; " if $nc ;
$out .= sprintf '%0.6f seconds (colsummary)', $procsec ; # ãã¾ã«ãã¤ã¯ãç§åä½ã®$procsecã15æ¡ãããã§è¡¨ç¤ºãããã®ã§sprintfã
say STDERR BOLD DARK ITALIC CYAN $out ;
}
# ååã®å¤ã®åå¸ãåãåºã
sub ColFreq ( $$ ) { # 第ï¼å¤æ°ã¯ãã¡ã¤ã«ãã³ã㫠第ï¼å¤æ°ã¯åç
§
#my %zstr ; # é¤å¤ãããæååã®åºç¾é »åº¦ã(ç¹æ¤ç¨ã§ãããã) #my $intflg ; #$SIG{INT} = sub { $intflg = 1 } ;
my $maxCols = 0 ;
my $col = undef ; # 0ãªãªã¸ã³ã®ã«ã©ã çªå·
* lenlim = defined $o{l} ? sub { grep { $_ = substr $_, 0, $o{l} } @_ } : sub {} ; # -l ã§é·ãå¶é
* tailspacetrim = defined $o{s} ? sub { grep { s/\s+$// } @_ } : sub {} ;
* negcell = defined $o{'#'} ? sub { if (m/$o{'#'}/ ) { $col ++ ; $nc ++ ; goto EACH_CELL } } : sub {} ; # o{'0'} ãããã
for ( my $FH = $_[0] ; <$FH> ; $rl ++ ) {
#$rl ++ ;
chomp ;
my @F = map { decode ( $_ ) } split /$isep/ , $_ , -1 ;
& lenlim ( @F ) ; # åã»ã«ã®é·ãå¶é
& tailspacetrim ( @F ) ;
$col = 0 ;
EACH_CELL :
while ( defined ($_ = shift @F) ) {
#do { $zstr { $F[$_] } ++ ; next } if exists $o{'0'} && $F[$_] =~ m/$o{'0'}/ ;
& negcell ; #next if exists $o{'0'} && $F[$_] =~ m/$o{'0'}/ ;
++ $_[1] -> [ $col ] { $_ } ; # ååã®åãã¼ã¿å¤ã®åº¦æ°ãéè¨
$col ++ ;
}
$maxCols = $col if $maxCols < $col ;
}
# é¤å»ãããå¤ã®é »åº¦ä¸è¦§ã
#if ( $o{'0'} ) {
# print ON_WHITE BLACK "\t Suppressed cell value : " if keys %zstr;
# print ON_WHITE BLACK "\t $zstr{$_} : $_ " for keys %zstr
#} ;
return $maxCols ;
}
# $colvals->[åçª] 㨠ååã æ¸¡ããããã¦ããã®ä¸èº«ã表示ãããã
sub ColStat ( $$ ) {
sub aveft ( $$ ) ; # ååã®å¹³åå¤ãè¨ç®ããå¦çãããã
sub MultSpec ( $$ ) ; # 度æ°(é »åºä¸ä½ã®åæ°åã³ãã¼ã«ã®æ§å) ã«ã¤ãã¦è¡¨ç¤ºæååãæºåãã(..ã®åå¾ã§2åå¼ã³åºããã)
sub minmaxstr ( $ ) ; # é
ååç
§ãããæå°å¤æå¤§å¤ãåãåºã
( run in 1.404 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )