App-Bin4TSV
view release on metacpan or search on metacpan
scripts/colsummary view on Meta::CPAN
#!/usr/bin/perl
# colsummary : TSVã¾ãã¯CSVãã¡ã¤ã«ã®ååã®å¤ã®æ§åã表示ãããã¨ã¦ã便å©ã
# 2015/05/11 - 2016/07/05 , 2018-03-28 . Shimono Toshiyuki
# 2019/10/24 ããã«å¤§å¹
ã«æ¸ãæ¿ã
use 5.014 ; use warnings ; # also confirmed on 5.011 5.014 5.018
use strict ;
use Time::HiRes qw [ gettimeofday tv_interval ] ; my ${ dt_start } = [ gettimeofday ] ;
my $time0 = time ;
use autodie qw [ open ] ;
use Getopt::Std ; getopts 'g:i:jl:m:suwz=!@:#:0:2:' => \my %o ;
use List::Util qw/max min maxstr minstr/ ;
use POSIX qw/strtod/;
use Scalar::Util qw/looks_like_number/;
use Term::ANSIColor qw/:constants color/ ; $Term::ANSIColor::AUTORESET = 1 ;
use Encode qw[ decode_utf8 encode_utf8 ] ;
use FindBin qw [ $Script ] ;
my $sdt = sprintf '%04d-%02d-%02d %02d:%02d:%02d', do{my @t= @{[localtime]}[5,4,3,2,1,0]; $t[0]+=1900; $t[1]++; @t } ;
eval "use PerlIO::gzip;1" or die "PerlIO::gzip cannot be loaded, so -z does not work. ($Script, $sdt)\n" if $o{z} ;
sub AlignOut ( @ ) ; # åºå ; eachFileã§ãColstatã§ã使ãã
sub ColStat ( $$ ) ; # $colvals->[åçª] 㨠ååã æ¸¡ããããã¦ããã®ä¸èº«ã表示ãããã; eachFileã§ãColstatã§ã使ãã
sub d3 ($) { $_[0] =~ s/(?<=\d)(?=(\d\d\d)+($|\D))/,/gr } ; # æ°ã3æ¡åºåãã«å¤æããã
#sub hhmmss () { sprintf '%02d:%02d:%02d' , @{[localtime]}[2,1,0] } ; # ç¾å¨æå»ã hh:mm:ss ã®å½¢å¼ã§åãåºãã
$/ = "\r\n" if $o{w} ; # -ræå®ã§ æ¹è¡æåãWindowså½¢å¼ã«å¤æ´ã
my $L = ',' ; # åºåã«ããç¾ããåºåãæåå
my $isep = $o{i} // "\t" ; # å
¥åã®åºåãæå $o{','} = do { $o{','} //= "\t" ; eval qq[qq[$o{','}]] } ;
my $nc = 0 ; # è¨æ°å¯¾è±¡ã¨ããªãã£ãã»ã«ã®æ°ãã«ã¦ã³ãã
my $sec = $o{'@'} // 15 ; # ä½ç§ãã¨ã«ã¬ãã¼ãã表示ãããã
my $rl ; # åãã¡ã¤ã«ã®èªãã è¡æ°ãæ ¼ç´ã
$SIG{ ALRM } = sub { say STDERR GREEN + (d3 $rl) . " lines read. " , scalar localtime ; alarm $sec } ;
my ${ INT1 } = sub {
&{ $SIG{ALRM} } ;
print STDERR BRIGHT_RED
'Do you want to get the halfway result? Then type Ctrl + C again within 2 seconds. '. "\n" .
'Really want to Quit? Then press Ctrl + "\" or Ctrl + Yen-Mark. (Ctrl+Z may be what you want.) ' . RESET "\n" ;
$SIG{INT} = sub { select *STDERR ; & ColStat ; select *STDOUT ; return } ;
sleep 2 ;
return ;
} ;
$SIG{ INT } = ${ INT1 } ;
$o{g} = 6 if ( ! defined $o{g} ) ; # åãåºãæ°
$| = 1 if $o{'!'} ;
* decode = $o{u} ? * decode_utf8 : sub ($){ $_[0] } ; #* encode = $o{u} ? * encode_utf8 : sub ($){ $_[0] } ;
$o{'#'} = decode ( $o{'#'} ) if defined $o{'#'} ;
my %fOut = (
j => [ map {UNDERLINE decode($_)} qw[åçªå· å¤ã®ç°ãªã æ°å¤åå¹³å åå å¤ã®ç¯å² æé »å¤ æé »å¤ã®åº¦æ° ..ãã¼ã«ã®åº¦æ°(éãªã) æ¡æ°ç¯å² ] ] ,
e => [ map {UNDERLINE $_ } qw[ cpos diff ave. name range frequent frequency ..lower(x_mul) digits] ] ) ;
binmode *STDOUT , ':utf8' if $o{u} ;
alarm $sec ;
push @ARGV , '-' unless @ARGV ; # æ¨æºå
¥åã®è¿½å
& eachFile ( $_ ) for @ARGV ;
exit 0 ;
sub eachFile ( $ ) {
sub colnames( $ ) ; # -=ã®æã«å
é è¡ã®æ
å ±ãåãåºã
sub filePinfo ; # ãã¡ã¤ã«æ¯ã®2次æ
å ±(ä¸è¡ãµããª)
sub ColFreq ( $$ ) ; # 第ï¼å¤æ°ã¯ãã¡ã¤ã«ãã³ã㫠第ï¼å¤æ°ã¯åç
§ ; ååã®å¤ã®åå¸ãåãåºã
my $FH = do { my $t = *STDIN if $_[0] eq '-' ; open $t, '<', $_[0] if!$t ; binmode $t , ':gzip(gzip)' if $o{z} ; $t } ; # ãã¡ã¤ã«ãã³ãã«ã®åå¾
$rl = 0 ;
my @colnames = colnames $FH if $o{'='} ;
my $maxCols = ColFreq $FH, my $colvals ; #my $colvals ; ååã®åãã¼ã¿å¤ã®åº¦æ°ãéè¨;$colvals->[åçª-1]{ãã¼ã¿å¤}=度æ°
close $FH ;
AlignOut @{ $fOut{$o{j}?'j':'e'} } if 0 ne ($o{0}//'') ;
defined $colvals->[$_] and ColStat $colvals->[ $_ ] , $colnames[$_] for 0 .. $maxCols - 1 ; # ãªãã·ã§ã³ -0 ã«ããå
¨ã¦ã®å¤ãé¤å¤ããããã¨ã¯èµ·ããããã
filePinfo ;
scripts/colsummary view on Meta::CPAN
my %cct ; $cct{$_} ++ foreach values %thash ; # 度æ°ã®ãã®ã¾ã度æ°ãæ ¼ç´ããããã®å¤æ°
my @kcct = sort {$b <=> $a} keys %cct ;
my @kcct1 = splice @kcct , 0, min( $o{g} ,$#kcct+1 ) ; # <- tricky!
my @kcct2 = splice @kcct , - min( $o{g} ,$#kcct+1 ) ; # <- tricky!
my @out ;
push @out, $_ + 1, scalar @vals, aveft ( \%thash , \@vals ), $_[1]//($_+1) ;
push @out , (join $L, minmaxstr \@vals) , (join$L, @skeys) ;
push @out , (join $L, MultSpec \@kcct1, \%cct) , (@kcct2? @kcct? '..' : $L : '' ) . (join $L , MultSpec \@kcct2, \%cct) ;
push @out , minmaxstr( \@{[map { length decode ($_) } @vals ]} ) ;
AlignOut @out ; # <-- ãã®1è¡ã§ãå
ã®å
¥åã®1åã®éç ²ãåºå
return ;
}
# åºå
sub AlignOut ( @ ) {
my @p = @_ ;
my @P ;
push @P , $p[0] ; ## (1) åçªå·ã®è¡¨ç¤º1ãã
push @P , GREEN BOLD $p[1] ; ## (2) ä½éãã®å¤ãåºç¾ãããã表示
push @P , BRIGHT_BLUE $p[2] if ($o{m}//'') ne 0 ; ## (3) å¹³åå¤ã®è¡¨ç¤º (å ç®ã¨æ¸ç®ã®é¢ä¿ãææ¡ããç®çãããã®ã§ãå¤ãç¡ãã¨ããã¯0ã¨è¦ãªã)
push @P , BRIGHT_YELLOW $p[3] ;## (4) åã®åå(åå)ã表示
push @P , BRIGHT_WHITE $p[4] ; ## (5) å¤ã®æå¤§ã¨æå°ãåãåºãã
push @P , $p[5] ;## (6) å
·ä½çãªå¤ã®è¡¨ç¤º (åºç¾åº¦æ°ã®å¤ãé ã« $o{g} å )
push @P , BRIGHT_GREEN $p[6] . GREEN $p[7] ;## ## (7) æé »åº¦æ°ã®åå¸## (7) ä¸ç¹(ãªãã¦ã)ã®å¦ç (7) ãã¼ã«åº¦æ°ã®åå¸
push @P , BRIGHT_BLUE $p[8] ; ## (8) å¤ã®æååé·ã®ç¯å²ã®è¡¨ç¤º
say join "\t" , @P ;
}
# å¹³åå¤ãè¨ç®ããå¦çãããã
sub aveft ( $$ ) {
my ($rHash,$rKeys) = @_ ;
my ($tval, $freq, $asum, $afreq ) ;
for( @{$rKeys} ) {
( my $num = $_ ) =~ s/(\d),/$1/g ; #s/,//g ; # 3æ¡åºåãã«ç¾ããåºåãã³ã³ããæ¶å»ãã
$tval = POSIX::strtod ( $num ) ;
$freq = $rHash->{ $_ } ;
$asum += $tval * $freq ;
$afreq += $freq ;
}
return sprintf '%5.3f',$asum/$afreq;
}
# 度æ°(é »åºä¸ä½ã®åæ°åã³ãã¼ã«ã®æ§å) ã«ã¤ãã¦è¡¨ç¤ºæååãæºåãã(..ã®åå¾ã§2åå¼ã³åºããã)
sub MultSpec ( $$ ) {
my ( $p_kc , $p_ccount ) = @_;
my @ostr ;
my $c=0 ;
while ( my $t = shift @$p_kc ) {
$c++ ;
push @ostr , $t if ( $p_ccount->{$t} == 1 ) ;
push @ostr , $t.'(x'.$p_ccount->{$t} .')' if ( $p_ccount->{$t} >= 2 ) ;
last if ( $c >= $o{g} ) ;
}
return @ostr ;
} ;
# é
ååç
§ãããæå°å¤æå¤§å¤ãåãåºã
sub minmaxstr ( $ ) {
sub part ( &@ ) ;
sub RangeStr ( $$ ) ;
my @gps = part {$_ eq '' ? 0 : looks_like_number $_ ? 1 : 2} @{ $_[0] } ;
my @ostr ;
push @ostr, '' if $gps[0] ; # 空æååãããã¨ãã®å¦ç
push @ostr, RangeStr( min(@{$gps[1]}), max(@{$gps[1]}) ) if $gps[1] ; # æ°ã«è¦ããå¤ãããã¨ãã®å¦ç
push @ostr, RangeStr( minstr(@{$gps[2]}), maxstr(@{$gps[2]}) ) if $gps[2] ; # æ°ã«è¦ããªãå¤ãããã¨ãã®å¦ç
return @ostr;
} ;
sub part ( &@ ) { my ($cd, @l) = @_ ; my @p ; push @{ $p[ $cd->($_) ] } , $_ for @l ; @p } ; # ãã®é¢æ°ã¯ List::MoreUtils
sub RangeStr ( $$ ) { $_[0] eq $_[1] ? "$_[0]" : "$_[0]..$_[1]" } # 2åã®æ°oræååãã 1..2ã®ãããªæååãçæ
## ãã«ãã®æ±ã
sub VERSION_MESSAGE {}
sub HELP_MESSAGE {
use FindBin qw[ $Script ] ;
$ARGV[1] //= '' ;
open my $FH , '<' , $0 ;
while(<$FH>){
s/\$0/$Script/g ;
print $_ if s/^=head1// .. s/^=cut// and $ARGV[1] =~ /^o(p(t(i(o(ns?)?)?)?)?)?$/i ? m/^\s+\-/ : 1;
}
close $FH ;
exit 0 ;
}
=encoding utf8
=head1 $0
ãã¼ã¿ãã¡ã¤ã«(TSVå½¢å¼ãä¸è¡ç®ã¯ããã)ã«ã¤ãã¦ãååã®æç¨ãªæ
å ±ãåºåããã
åºåé
ç®:
1. åçªå· ; ç½
2. ç°ãªãæ°(ååã«ç°ãªãå¤ãä½ååºç¾ããã) ; æããç·
3. å¹³åå¤ ( -m ã§å¹³åå¤ã®åºåã¯æå¶å¯è½) ; é
4. åå (ãããããåãåºã) ; é»è²
5. å¤ã®ç¯å² ; æããç½
6. å¤ã®é »åºã©ã³ãã³ã° ; æãç½
7. é »åºä¸ä½ã¨ä¸ä½ã«ã¤ãã¦ã®åºç¾åæ° ; æããç·
8. å¤ã®æååé·ã®ç¯å² ; é
[ãªãã·ã§ã³] :ã
(å
¥åãªãã·ã§ã³)
-= ; å
¥åã®æåã®è¡ãååã®ä¸¦ã³ã¨ä»®å®ããã®æå®ãããªãå ´åã¯ååã¯é£çªã«ãªãã
-i STR : åºåãæåãã¿ãæåã§ã¯ãªãã¦ã str ã«å¤æ´ã
-l 10 ; åã»ã«ã®å¤ã®é·ããæå®æåæ°ã«å¶éããã(ååã«ã¯é©ç¨ãããªãã)
-s ; åã»ã«ã®æ«å°¾ã®ç©ºç½ãé¤å»ã-uãããã¨åè§ç©ºç½ã ãã§ç¡ãã¦å
¨è§ç©ºç½ãé¤å»ã
-u ; utf-8 ã¨ãã¦å¦çãããã¨ã¨ããã -u ãæå®ãããªãã¨ããã¤ãåä½ã®å¦çã¨ãªãã
-w ; å
¥åã®æ¹è¡æåã³ã¼ãã(Windowså½¢å¼ã§ãã)"\r\n"ã¨è¦ãªãã
-z ; å
¥å㯠gzip å§ç¸®ããã¦ãããã¨ãä»®å®ã
-\# REGEX ; é¤å¤ããå¤ã®æ£è¦è¡¨ç¾ã®æå®ã '^é¨åæ£è¦è¡¨ç¾$' ã®ãããªæå®ã®ä»æ¹ããã使ããã¨ã«ãªãã ããã
-@ N : N ç§ãã¨ã«ï¼ä½è¡ãèªãã ããå ±åããã Report how many have read every N seconds.
(åºåãªãã·ã§ã³)
-0 0 : åºåã®å¤æ°ã®ååã®ä¸¦ã³ãåºåããªãã
-g N ; å
·ä½çãªå¤ãä½å表示ããããæå®ãããæªæå®ãªã6ã
-j : åºåã®ååã®ååãæ¥æ¬èªã§åºåããã
-m 0 ; å¹³åå¤ã表示ããªãã(å¹³åå¤ã¯ strtod ã使ã£ã¦ããã)
--help : ãã® $0 ã®ãã«ãã¡ãã»ã¼ã¸ãåºãã perldoc -t $0 | cat ã§ãã»ã¼åãã
--help opt : ãªãã·ã§ã³ã®ã¿ã®ãã«ããåºããopt以å¤ã§ã options ã¨å
é ã1æå以ä¸ä¸è´ããã°è¯ãã
( run in 0.555 second using v1.01-cache-2.11-cpan-39bf76dae61 )