App-colsummary
view release on metacpan or search on metacpan
#!/usr/bin/perl
# colsummary : TSVã¾ãã¯CSVãã¡ã¤ã«ã®ååã®å¤ã®æ§åã表示ãããã¨ã¦ã便å©ã
# 2015/05/11 - 2016/07/05 , 2018-03-28 . Shimono Toshiyuki
# 2019/10/24, 2021/06/08, 2021/06/11 ããã«å¤§å¹
ã«æ¸ãæ¿ã
use 5.014 ;
use strict ;
use warnings ; # also confirmed on 5.011 5.014 5.018
use autodie qw [ open ] ;
use Encode qw[ decode_utf8 encode_utf8 ] ;
use FindBin qw [ $Script ] ;
use Getopt::Std ; getopts 'R:c:g:i:jm:r:su:v:z=!@:#:0:2:' => \my %o ;
use List::Util qw/max min maxstr minstr/ ;
use POSIX qw/strtod/ ; # # å¹³åå¤ã®è¨ç®ã«ç¨ãã str to double.
use Scalar::Util qw/looks_like_number/;
use Term::ANSIColor qw/:constants color/ ; $Term::ANSIColor::AUTORESET = 1 ;
use Time::HiRes qw [ gettimeofday tv_interval ] ; my ${ dt_start } = [ gettimeofday ] ;
my $sdt = sprintf '%04d-%02d-%02d %02d:%02d:%02d', do{my @t= @{[localtime]}[5,4,3,2,1,0]; $t[0]+=1900; $t[1]++; @t } ;
eval "use PerlIO::gzip;1" or die "PerlIO::gzip cannot be loaded, so -z does not work. ($Script, $sdt)\n" if $o{z} ;
sub AlignOut ( @ ) ; # åºå ; eachFileã§ãColstatã§ã使ãã
sub ColStat ( $$ ) ; # $colvals->[åçª] 㨠ååã æ¸¡ããããã¦ããã®ä¸èº«ã表示ãããã; eachFileã§ãColstatã§ã使ãã
sub d3 ($) { $_[0] =~ s/(?<=\d)(?=(\d\d\d)+($|\D))/,/gr } ; # æ°ã3æ¡åºåãã«å¤æããã
sub eachFile ( $ ) ;
# 以ä¸3åã¯eachFileããå¼ã°ããã
sub colnames( $ ) ; # -=ã®æã«å
é è¡ã®æ
å ±ãåãåºã
sub filePinfo ; # ãã¡ã¤ã«æ¯ã®2次æ
å ±(ä¸è¡ãµããª)
sub ColFreq ( $$ ) ; # 第ï¼å¤æ°ã¯ãã¡ã¤ã«ãã³ã㫠第ï¼å¤æ°ã¯åç
§ ; ååã®å¤ã®åå¸ãåãåºã
## å
¨ä½ã§ä½¿ã夿°
my $optu0 = exists $o{u} && $o{u} eq 0 ;
* decode = * decode_utf8 ;
* decode = sub ( $ ) { $_[0] } if $optu0 ;
#* decode = (! $optu0) ? * decode_utf8 : sub ($){ $_[0] } ; #* encode = $o{u} ? * encode_utf8 : sub ($){ $_[0] } ;
my $optR0 = defined $o{R} && $o{R} eq 0 ;
* R0proc = $optR0 ? sub {} : sub { s/\r$// } ; # -R0 ãæå®ãããæã®å¦ç
binmode *STDOUT , ':utf8' unless $optu0 ; # ããã ãå
¨ä½ã§ä½¿ã夿°ã¨ã¯è¨ãããããããããªããããªãã·ã§ã³ã®å¦çã¯ãã®ç¯ã®ååã«ããã®ã§ã
$| = 1 if $o{'!'} ;
$o{g} //= 6 ; # if ( ! defined $o{g} ) ; # åãåºãæ°
$o{r} //= "~" ; # ç¯å²ã表ãè¨å·(åºåã§ä½¿ã)
$o{c} //= '|' ; # ç¯å²ã示ã表示ãè¤æ°ç¹ããè¨å·(åºåã§ä½¿ã)
$o{'#'} = decode ( $o{'#'} ) if defined $o{'#'} ; # é¤å¤ããæ£è¦è¡¨ç¾
push @ARGV , '-' unless @ARGV ; # æ¨æºå
¥åã®è¿½å
my $isep = $o{i} // "\t" ; # å
¥åã®åºåãæå $o{','} = do { $o{','} //= "\t" ; eval qq[qq[$o{','}]] } ;
my $sec = $o{'@'} // 15 ; # ä½ç§ãã¨ã«ã¬ãã¼ãã表示ãããã
my $nc = 0 ; # è¨æ°å¯¾è±¡ã¨ããªãã£ãã»ã«ã®æ°ãã«ã¦ã³ãã
my $rl ; # åãã¡ã¤ã«ã®èªãã è¡æ°ãæ ¼ç´ã
my %fOut = (
j => [map{UNDERLINE decode($_)}map{eval"\"$_\""}qw[åçª ç°ãªãå¤ æ°å¤åå¹³å åå å¤ã®ç¯å² æé »å¤ é »åº¦(éè¤)], q[], 'æ¡æ°'],
e => [ map {UNDERLINE $_ } qw[ cpos diff ave. name range frequent frequency(multi) ] , "", "digits" ] ) ;
my $col = undef ; # 0ãªãªã¸ã³ã®ã«ã©ã çªå· ## sub ColFreq å
ã§ä½¿ãã
* negcell = defined $o{'#'} ? sub { if (m/$o{'#'}/ ) { $col ++ ; $nc ++ ; goto EACH_CELL } } : sub {} ; # o{'0'} ãããã
## ã·ã°ãã«ã«å¯¾ããè¨å®
my ${ INT1 } = sub {
&{ $SIG{ALRM} } ;
print STDERR BRIGHT_RED
'Do you want to get the halfway result? Then type Ctrl + C again within 2 seconds. '. "\n" .
'Really want to Quit? Then press Ctrl + "\" or Ctrl + Yen-Mark. (Ctrl+Z may be what you want.) ' . RESET "\n" ;
$SIG{INT} = sub { select *STDERR ; & ColStat ; select *STDOUT ; return } ;
sleep 2 ;
return ;
} ;
$SIG{ INT } = ${ INT1 } ;
$SIG{ ALRM } = sub { say STDERR GREEN + (d3 $rl) . " lines read. " , scalar localtime ; alarm $sec } ;
alarm $sec ;
eachFile $_ for @ARGV ;
exit 0 ;
## 1åãã¤ãã¡ã¤ã«ãèªã¿åãã
sub eachFile ( $ ) {
my @kcct = sort {$b <=> $a} keys %cct ;
my @kcct1 = splice @kcct , 0, min( $o{g} ,$#kcct+1 ) ; # <- tricky!
my @kcct2 = splice @kcct , - min( $o{g} ,$#kcct+1 ) ; # <- tricky!
push @out , join $o{c}, minmaxstr \@vals ; # åºç¾å¤ã®ç¯å²
push @out , join $o{c}, @skeys ; # æé »å¤ããã¤ã
push @out , join $o{c}, MultSpec \@kcct1, \%cct ; # é »åº¦ã®é«ãæ¹
push @out , (@kcct2? @kcct? $o{r} : $o{c} : '' ) . (join $o{c} , MultSpec \@kcct2, \%cct) ; # é »åº¦ã®ä½ãæ¹
push @out , $digitRg ; # æ¡æ°ç¯å²
AlignOut @out; # <-- å
ã®å
¥åã®1åã®æ
å ±ããåºå1è¡ã«ç¸å½ããã
return ;
}
## åºå
sub AlignOut ( @ ) {
my @p = @_ ;
my @P ;
push @P , $p[0] ; ## (1) åçªå·ã®è¡¨ç¤º1ãã
push @P , GREEN BOLD $p[1] ; ## (2) ä½éãã®å¤ãåºç¾ãããã表示
push @P , BRIGHT_BLUE $p[2] if ($o{m}//'') ne 0 ; ## (3) å¹³åå¤ã®è¡¨ç¤º (å ç®ã¨æ¸ç®ã®é¢ä¿ãææ¡ããç®çãããã®ã§ãå¤ãç¡ãã¨ããã¯0ã¨è¦ãªã)
push @P , BRIGHT_YELLOW $p[3] if $o{'='} ;## (4) åã®åå(åå)ã表示
push @P , BOLD BRIGHT_WHITE $p[4] ; ## (5) å¤ã®æå¤§ã¨æå°ãåãåºãã
push @P , $p[5] ;## (6) å
·ä½çãªå¤ã®è¡¨ç¤º (åºç¾åº¦æ°ã®å¤ãé ã« $o{g} å )
push @P , BRIGHT_GREEN $p[6] . GREEN $p[7] ;## ## (7) æé »åº¦æ°ã®åå¸## (7) ä¸ç¹(ãªãã¦ã)ã®å¦ç (7) ãã¼ã«åº¦æ°ã®åå¸
push @P , BRIGHT_BLUE $p[8] ; ## (8) å¤ã®æååé·ã®ç¯å²ã®è¡¨ç¤º
say join "\t" , @P ;
}
# å¹³åå¤ãè¨ç®ããå¦çãããã
sub aveft ( $$ ) {
my ($rHash,$rKeys) = @_ ;
my ($tval, $freq, $asum, $afreq ) ;
for( @{$rKeys} ) {
( my $num = $_ ) =~ s/(\d),/$1/g ; #s/,//g ; # 3æ¡åºåãã«ç¾ããåºåãã³ã³ããæ¶å»ãã
$tval = POSIX::strtod ( $num ) ; # å¹³åå¤ã®è¨ç®ã«ç¨ãã
$freq = $rHash->{ $_ } ;
$asum += $tval * $freq ;
$afreq += $freq ;
}
return sprintf '%5.3f',$asum/$afreq;
}
# 度æ°(é »åºä¸ä½ã®åæ°åã³ãã¼ã«ã®æ§å) ã«ã¤ãã¦è¡¨ç¤ºæååãæºåãã(..ã®åå¾ã§2åå¼ã³åºããã)
sub MultSpec ( $$ ) {
my ( $p_kc , $p_ccount ) = @_;
my @ostr ;
my $c=0 ;
while ( my $t = shift @$p_kc ) {
$c++ ;
push @ostr , $t if ( $p_ccount->{$t} == 1 ) ;
push @ostr , $t.'('.$p_ccount->{$t} .')' if ( $p_ccount->{$t} >= 2 ) ; # æ¬å¼§å
ã«å¤é度
last if ( $c >= $o{g} ) ;
}
return @ostr ;
} ;
# é
ååç
§ãããæå°å¤æå¤§å¤ãåãåºã
sub minmaxstr ( $ ) {
* uniq = sub (@) {my %hh ;map { $hh{$_}++ != 0 ? () : $_ } @_ } ;
sub part ( &@ ) ;
sub RangeStr ( $$ ) ;
my @gps = part {/^0*$/ ? 0 : looks_like_number $_ ? 1 : 2} @{ $_[0] } ;
my @ostr ;
push @ostr, join $o{r}, sort & uniq ( @{$gps[0]} ) if $gps[0] ; # 空æååãããã¨ãã®å¦ç
push @ostr, RangeStr( min(@{$gps[1]}), max(@{$gps[1]}) ) if $gps[1] ; # æ°ã«è¦ããå¤ãããã¨ãã®å¦ç
push @ostr, RangeStr( minstr(@{$gps[2]}), maxstr(@{$gps[2]}) ) if $gps[2] ; # æ°ã«è¦ããªãå¤ãããã¨ãã®å¦ç
return @ostr;
} ;
sub part ( &@ ) { my ($cd, @l) = @_ ; my @p ; push @{ $p[ $cd->($_) ] } , $_ for @l ; @p } ; # ãã®é¢æ°ã¯ List::MoreUtils
sub RangeStr ( $$ ) { $_[0] eq $_[1] ? "$_[0]" : "$_[0]$o{r}$_[1]" } # 2åã®æ°oræååãã 1..2ã®ãããªæååãçæ
## ãã«ãã®æ±ã
sub VERSION_MESSAGE {}
sub HELP_MESSAGE {
use FindBin qw[ $Script ] ;
$ARGV[1] //= '' ;
open my $FH , '<' , $0 ;
while(<$FH>){
s/\$0/$Script/g ;
print $_ if s/^=head1// .. s/^=cut// and $ARGV[1] =~ /^o(p(t(i(o(ns?)?)?)?)?)?$/i ? m/^\s+\-/ : 1;
}
close $FH ;
exit 0 ;
}
=encoding utf8
=head1 $0
ãã¼ã¿ãã¡ã¤ã«(TSVå½¢å¼ãä¸è¡ç®ã¯ããã)ã«ã¤ãã¦ãååã®æç¨ãªæ
å ±ãåºåããã
åºåé
ç®:
1. åçªå· ; ç½
2. ç°ãªãæ°(ååã«ç°ãªãå¤ãä½ååºç¾ããã) ; æããç·
3. å¹³åå¤ ( -m ã§å¹³åå¤ã®åºåã¯æå¶å¯è½) ; é
4. åå (ãããããåãåºã) ; é»è²
5. å¤ã®ç¯å² ; æããç½
6. å¤ã®é »åºã©ã³ãã³ã° ; æãç½
7. é »åºä¸ä½ã¨ä¸ä½ã«ã¤ãã¦ã®åºç¾åæ° ; æããç·
8. å¤ã®æååé·ã®ç¯å² ; é
[ãªãã·ã§ã³] :
(å
¥åãªãã·ã§ã³)
-= ; å
¥åã®æåã®è¡ãååã®ä¸¦ã³ã¨ä»®å®ããã®æå®ãããªãå ´åã¯ååã¯é£çªã«ãªãã
-i STR : åºåãæåãã¿ãæåã§ã¯ãªãã¦ã str ã«å¤æ´ã
-v N ; åã»ã«ã®å¤ã®é·ããæå®æåæ°ã«å¶éããã(ååã«ã¯é©ç¨ãããªãã)
-s ; åã»ã«ã®æ«å°¾ã®ç©ºç½ãé¤å»ã-u0ã¨ããªãéãåè§ç©ºç½ã ãã§ç¡ãã¦å
¨è§ç©ºç½ãé¤å»ã
-u 0 ; utf-8 ã¨ãã¦å¦çããé常ã®å¦çãããããã¤ãåä½ã®å¦çã¨ãªãã
-z ; å
¥å㯠gzip å§ç¸®ããã¦ãããã¨ãä»®å®ã
-\# REGEX ; é¤å¤ããå¤ã®æ£è¦è¡¨ç¾ã®æå®ã '^é¨åæ£è¦è¡¨ç¾$' ã®ãããªæå®ã®ä»æ¹ããã使ããã¨ã«ãªãã ããã
-@ N : N ç§ãã¨ã«ï¼ä½è¡ãèªãã ããå ±åããã Report how many have read every N seconds.
-R 0 ; æ¹è¡åºåãã\r\nã§ãã£ã¦ãä½ããªãã·ã§ã³æå®ããã«å¯¾å¦ãã¦ããããä¸å
·åãããã°ä½¿ãã
(åºåãªãã·ã§ã³)
-0 0 : åºåã®å¤æ°ã®ååã®ä¸¦ã³ãåºåããªãã
-g N ; å
·ä½çãªå¤ãä½å表示ããããæå®ãããæªæå®ãªã6ã
-j : åºåã®ååã®ååãæ¥æ¬èªã§åºåããã
-m 0 ; å¹³åå¤ã表示ããªãã(å¹³åå¤ã¯ strtod ã使ã£ã¦ããã)
-r str : ç¯å²ã表ãè¨å·(æªæå®ãªã"~"ã®1æå)ã-r ".." ãªã©ã¨æå®ããã(rã¯Rangeã®ã¤ããã)
-c str : ç¯å²ã示ã表示ãè¤æ°ãæããã¨ãã«ä½¿ãè¨å·ãæªæå®ãªã"|"ã-c ','ãªã©ã¨ããã
( run in 3.063 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )