App-colsummary
view release on metacpan or search on metacpan
#!/usr/bin/perl
# colsummary : TSVã¾ãã¯CSVãã¡ã¤ã«ã®ååã®å¤ã®æ§åã表示ãããã¨ã¦ã便å©ã
# 2015/05/11 - 2016/07/05 , 2018-03-28 . Shimono Toshiyuki
# 2019/10/24, 2021/06/08, 2021/06/11 ããã«å¤§å¹
ã«æ¸ãæ¿ã
use 5.014 ;
use strict ;
use warnings ; # also confirmed on 5.011 5.014 5.018
use autodie qw [ open ] ;
use Encode qw[ decode_utf8 encode_utf8 ] ;
use FindBin qw [ $Script ] ;
use Getopt::Std ; getopts 'R:c:g:i:jm:r:su:v:z=!@:#:0:2:' => \my %o ;
use List::Util qw/max min maxstr minstr/ ;
use POSIX qw/strtod/ ; # # å¹³åå¤ã®è¨ç®ã«ç¨ãã str to double.
use Scalar::Util qw/looks_like_number/;
use Term::ANSIColor qw/:constants color/ ; $Term::ANSIColor::AUTORESET = 1 ;
use Time::HiRes qw [ gettimeofday tv_interval ] ; my ${ dt_start } = [ gettimeofday ] ;
my $sdt = sprintf '%04d-%02d-%02d %02d:%02d:%02d', do{my @t= @{[localtime]}[5,4,3,2,1,0]; $t[0]+=1900; $t[1]++; @t } ;
eval "use PerlIO::gzip;1" or die "PerlIO::gzip cannot be loaded, so -z does not work. ($Script, $sdt)\n" if $o{z} ;
sub AlignOut ( @ ) ; # åºå ; eachFileã§ãColstatã§ã使ãã
sub ColStat ( $$ ) ; # $colvals->[åçª] 㨠ååã æ¸¡ããããã¦ããã®ä¸èº«ã表示ãããã; eachFileã§ãColstatã§ã使ãã
sub d3 ($) { $_[0] =~ s/(?<=\d)(?=(\d\d\d)+($|\D))/,/gr } ; # æ°ã3æ¡åºåãã«å¤æããã
sub eachFile ( $ ) ;
# 以ä¸3åã¯eachFileããå¼ã°ããã
sub colnames( $ ) ; # -=ã®æã«å
é è¡ã®æ
å ±ãåãåºã
sub filePinfo ; # ãã¡ã¤ã«æ¯ã®2次æ
å ±(ä¸è¡ãµããª)
sub ColFreq ( $$ ) ; # 第ï¼å¤æ°ã¯ãã¡ã¤ã«ãã³ã㫠第ï¼å¤æ°ã¯åç
§ ; ååã®å¤ã®åå¸ãåãåºã
## å
¨ä½ã§ä½¿ã夿°
my $optu0 = exists $o{u} && $o{u} eq 0 ;
* decode = * decode_utf8 ;
* decode = sub ( $ ) { $_[0] } if $optu0 ;
#* decode = (! $optu0) ? * decode_utf8 : sub ($){ $_[0] } ; #* encode = $o{u} ? * encode_utf8 : sub ($){ $_[0] } ;
my $optR0 = defined $o{R} && $o{R} eq 0 ;
* R0proc = $optR0 ? sub {} : sub { s/\r$// } ; # -R0 ãæå®ãããæã®å¦ç
binmode *STDOUT , ':utf8' unless $optu0 ; # ããã ãå
¨ä½ã§ä½¿ã夿°ã¨ã¯è¨ãããããããããªããããªãã·ã§ã³ã®å¦çã¯ãã®ç¯ã®ååã«ããã®ã§ã
$| = 1 if $o{'!'} ;
$o{g} //= 6 ; # if ( ! defined $o{g} ) ; # åãåºãæ°
$o{r} //= "~" ; # ç¯å²ã表ãè¨å·(åºåã§ä½¿ã)
$o{c} //= '|' ; # ç¯å²ã示ã表示ãè¤æ°ç¹ããè¨å·(åºåã§ä½¿ã)
$o{'#'} = decode ( $o{'#'} ) if defined $o{'#'} ; # é¤å¤ããæ£è¦è¡¨ç¾
push @ARGV , '-' unless @ARGV ; # æ¨æºå
¥åã®è¿½å
my $isep = $o{i} // "\t" ; # å
¥åã®åºåãæå $o{','} = do { $o{','} //= "\t" ; eval qq[qq[$o{','}]] } ;
my $sec = $o{'@'} // 15 ; # ä½ç§ãã¨ã«ã¬ãã¼ãã表示ãããã
my $nc = 0 ; # è¨æ°å¯¾è±¡ã¨ããªãã£ãã»ã«ã®æ°ãã«ã¦ã³ãã
my $rl ; # åãã¡ã¤ã«ã®èªãã è¡æ°ãæ ¼ç´ã
my %fOut = (
j => [map{UNDERLINE decode($_)}map{eval"\"$_\""}qw[åçª ç°ãªãå¤ æ°å¤åå¹³å åå å¤ã®ç¯å² æé »å¤ é »åº¦(éè¤)], q[], 'æ¡æ°'],
e => [ map {UNDERLINE $_ } qw[ cpos diff ave. name range frequent frequency(multi) ] , "", "digits" ] ) ;
my $col = undef ; # 0ãªãªã¸ã³ã®ã«ã©ã çªå· ## sub ColFreq å
ã§ä½¿ãã
* negcell = defined $o{'#'} ? sub { if (m/$o{'#'}/ ) { $col ++ ; $nc ++ ; goto EACH_CELL } } : sub {} ; # o{'0'} ãããã
## ã·ã°ãã«ã«å¯¾ããè¨å®
my ${ INT1 } = sub {
&{ $SIG{ALRM} } ;
print STDERR BRIGHT_RED
'Do you want to get the halfway result? Then type Ctrl + C again within 2 seconds. '. "\n" .
'Really want to Quit? Then press Ctrl + "\" or Ctrl + Yen-Mark. (Ctrl+Z may be what you want.) ' . RESET "\n" ;
$SIG{INT} = sub { select *STDERR ; & ColStat ; select *STDOUT ; return } ;
sleep 2 ;
( run in 2.068 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )