App-Bin4TSV
view release on metacpan or search on metacpan
scripts/chars2code view on Meta::CPAN
#!/usr/bin/perl
use 5.014 ; use warnings ;
use Getopt::Std ; getopts ":01bnuw" , \my%o ;
use Term::ANSIColor qw[ :constants color ] ; $Term::ANSIColor::AUTORESET = 1 ;
use utf8 ;
binmode STDOUT , ":utf8" ; #if ! $o{b} ;
use Encode qw [ decode_utf8 encode_utf8 ] ;
use FindBin qw [ $Script ] ;
my $sdt = sprintf '%04d-%02d-%02d %02d:%02d:%02d', do{my @t= @{[localtime]}[5,4,3,2,1,0]; $t[0]+=1900; $t[1]++; @t } ;
eval "use Encode::JP qw[decode encode];1" or die "Encode::JP cannot be loaded, so -w does not work. ($Script, $sdt)\n" if $o{w} ;
# my $utf8 = Encode::find_encoding('utf8') ;
sub decode ($) ;
sub encode ($) ;
* decode = $o{b} ? sub ($) { $_[0] } : $o{w} ? sub ($) { Encode::JP::decode('cp932',$_[0]) } : * decode_utf8 ;
#* encode = $o{b} ? sub ($) { $_[0] } : * encode_utf8 ;
* encode = $o{w} ? sub($){Encode::JP::encode('cp932',$_[0])} : $o{b} ? sub ($) { $_[0] } : *encode_utf8 ;
my $h = $o{b} ? '0x' : $o{w}? '0x' : 'u+' ; # åºç¹çªå·ãåºåããæã«è¡¨ç¤ºããæ¥é è¾
my $base = $o{0} ? 0 : 1 ; # ã¼ããªãªã¸ã³ã«ãããã1ãªãªã¸ã³ã«ããã
if ( $o{n} ) { & CountChars }
elsif ( $o{1} ) { & OneLineOneChar }
else { & LinePreserve } ;
exit 0 ;
# 1æåãå¶å¾¡æåã«ã¤ãã¦ã¯ã¨ã¹ã±ã¼ãæåãä»ããæ§ãªå½¢ã«ãããããç¨®ã®æåã®æ£è¦åããã
sub norChar($){
state $x = \{ 0,'\0',7,'\a',8,'\b',9,'\t',10,'\n',11,'\v',12,'\f',13,'\r',27,'\e'} ;
return $$x->{ ord $_[0] } // $_[0] ;
}
sub LinePreserve {
say CYAN UNDERLINE +( $o{':'} ? 'lin#:' : '' ) . '#char', DARK '(#bytes)' , RESET UNDERLINE "\tchar ", DARK "u+code .." ;
while (<>) {
my $str = decode ( $_ ) ;
print CYAN +($o{':'}?($.+$base-1).":":''), length $str , DARK '(' , length $_ , ')' , RESET "\t" ;
for ( split //, $str , 0 ) {
my @out = ( BOLD sprintf ( '%s' , norChar $_ ) , RESET DARK sprintf ( " $h%02x " , ord $_ ) ) ;
print @out ;
}
say '' ;
}
}
# åºåããåè¡ã¯ãå
¥åã®åæåã«ç¸å½ããã¦ãããåä½
sub OneLineOneChar {
my ($posC,$posL) = ($base) x2 ; #æåã®å
é ããã®ä½ç½®ã è¡çªå·
* codify = $o{u} ? sub { sprintf 'u+%04x' , ord $_[0] } : sub { sprintf '0x%s', unpack 'H12', encode($_) } ;
for( ; <> ; $posL ++ ){
my $posC0 = $posC ;
for ( @_ = split //, decode($_), 0 ; defined($_=shift) ; $posC++ ) {
#my @out = ( sprintf ( "$h%04x" , ord encode($_) ) , sprintf ('[%s]' , norChar $_ ) ) ;
#my @out = ( sprintf ( "$h%s" , (unpack 'h12' , encode($_) ) ) , sprintf ('[%s]' , norChar $_ ) ) ;
my @out = ( &codify ($_) , sprintf ('[%s]' , norChar $_ ) ) ;
#unshift @out , sprintf ("$posC:$posL-$posCL(%s)", encode($_) ) if $o{':'} ;
unshift @out , sprintf "%d:%d-%d" , $posC, $posL, $posC - $posC0 + $base if $o{':'} ;
say join "\t" , @out ;
}
#$posL ++ ;
}
say STDERR CYAN ITALIC "Lines: " , $. , RESET '' ;
}
# åºç¾ããæåã®éè¨è¡¨
sub CountChars {
my %chars ; # åæåã®é »åº¦ãæ ¼ç´
my %f0l ; # åæåã®æåã®åºç¾ã®è¡çªå·
my %f1l ; # åæåã®æå¾ã®åºç¾ã®è¡çªå·
my $line = 0 ; # å
¨ä½ã®è¡æ°
while( <> ) {
for ( split // , decode ( $_ ) , 0 ) {
$chars{ $_ } ++ ;
$f0l { $_ } //= $line ;
$f1l { $_ } = $line ;
}
$line ++ ;
}
my @out = ( "freq", "code_point", "char" ) ;
push @out , "linum_first" , "linum_last" if $o{':'} ;
say UNDERLINE join "\t" , @out ;
for( sort {$chars{$b} <=> $chars{$a} } keys %chars ) {
my @out = ( $chars{ $_ } , sprintf( "U+%02X" ,ord $_) , sprintf ('[%s]' , norChar $_ ) ) ;
push @out , $f0l{$_} + $base , $f1l{$_} + $base if $o{':'} ;
say join "\t" , @out ;
}
say STDERR CYAN ITALIC "Lines: " , $line , RESET '' ;
}
sub VERSION_MESSAGE {}
sub HELP_MESSAGE { $0 =~ s|.*/|| ; while(<DATA>){s/\$0/$0/g;print $_ if s/^=head1// .. s/^=cut// } exit 0 }
no utf8 ;
__END__
=encoding utf8
=head1 $0
(utf8ã¨ã¿ãªãã¦) å
¥åãã¼ã¿ãã²ã¨ã¤ã²ã¨ã¤ã®æåã«ã°ããã¦ã符å·ä½ç½®ã表ãè¶³ããé »åº¦ãæ°ãããããã
[ãªãã·ã§ã³]
-b : utf8ã§ã¯ãªãã¦ãã¤ãåä½ã§å¦çããã
-u : 0x 表è¨ã§ã¯ãªã u+ ã§è¡¨ç¤ºããã¤ããªã§ç¡ãã¦ãã¦ãã³ã¼ãã®åºç¹çªå·ã¨ãªãã
-1 : å
¥åã1æåãã¤åºå1è¡ã«åæ ã
-: ; æåã®ä½ç½®ã表示
-0 : ãããããªä½ç½®ã表ãã®ã«ã1å§ã¾ãã§ã¯ãªãã¦ã0å§ã¾ãã«ããã
-n : åæåã®é »åº¦ãæ°ããã
[ç¨é]
* é »åºããæåãç¥ãã
* ããä¼¼ã2è¡ãã©ãã«éããããã®ãè¦ã¤ãããã¨ãã«ãsdiff ã§ä¸¦ã¹ãæä½ã®åã«ä½¿ãã
[éçºä¸ã®ã¡ã¢]
* grep -o . ã¨åãç¨åº¦ã®å¦çé度ãããã ãããã
* é »åº¦ã®å¤ãé ã«è¡¨ç¤ºããã¦ãããã表示é ãé¸ã¹ãããã«ãªãã·ã§ã³ãä½ãããã
* æåã®åºç¾é ã§ã¾ãã¯æ ¼ç´ãããããã
* freq, code-point, [normalized-char] çãå
é ã«åºåãã¹ãã
* æåã®åºç¾ä½å¤, æå¾ã®åºç¾ä½å¤ãåºåãããã
* é常ã«é·ãè¡ãèªãã¨ãã«ãéä¸ã§çµæã表示ããããã«ãããã
* ãã®ããã°ã©ã ã®ååã®åè£ã¨ãã¦ã¯ordcharsã¨ã ordutf8ã¨ã utf8ord ãèãããã2019-10-16ã«chars2code ã¨ããã
=cut
( run in 0.434 second using v1.01-cache-2.11-cpan-5511b514fd6 )