Unicode-Tussle
view release on metacpan or search on metacpan
script/unichars view on Meta::CPAN
################################################################
#
# This is an sh wrapper to run the script under
# whichever perl occurs first in your path. See
# CHOICEs 1 and 2 below for alternate strategies.
# The -x will throw off your line numbers otherwise.
#
######################################################################
#
# The next line is legal in both shell and perl,
# but perl sees the if 0 so doesn't execute it.
#
eval 'exec perl -x -S $0 ${1+"$@"}'
if 0;
### CHOICE 1:
######################################################################
### MAKE FOLLOWING #! line THE TOP LINE, REPLACING /usr/local/bin ###
### with wherever you have a late enough version of Perl is ###
### installed. Will run under 5.10, but prefers 5.12 or better. ###
######################################################################
#!/usr/local/bin/perl
# ^^^^^^^^^^^^^^ <=== CHANGE ME ###
######################################################################
### CHOICE 2:
######################################################################
### ALTERNATELY, the following #! line does the same thing as ###
### the tricksy sh eval exec line: it finds whichever Perl is ###
### first in your path. However, it works only on BSD systems ###
### (including MacOS), but breaks under Solaris and Linux. ###
######################################################################
#!/usr/bin/env perl -CLA
######################################################################
use strict;
use warnings; # qw[ FATAL all ];
use charnames qw[ :full :short latin greek ];
use 5.10.1;
use File::Basename qw[ basename ];
use Getopt::Long qw[ GetOptions ];
use File::Spec;
use Carp;
use Pod::Usage qw[ pod2usage ];
use Encode qw[ decode ];
use Unicode::UCD qw(charinfo casefold);
use if $^V >= v5.11.3, qw[ feature unicode_strings ];
# don't need to import this
sub utf::is_utf8($);
################################################################
sub ARGCOUNT;
sub CF();
sub IT();
sub NAME();
sub NOT_REACHED;
sub NUM();
sub am_running_perldb;
sub check_options();
sub compile_filter();
sub deQ($);
sub deQQ($);
sub debug($);
sub dequeue($$);
sub display;
sub fork_pager;
sub genfuncs;
sub is_runnable;
sub locate_program;
sub main();
sub panic;
sub run_filter();
sub start_pager;
sub stupid_evil_and_wrong;
sub titlecase;
sub underscore;
################################################################
our $VERSION = "1.4 (2011-04-11)";
$| = 1; # command buffering quick-feeds piped stdout
$0 = basename($0); # shorten up warnings/errors
our %Opt;
our $CF;
our $CI;
our $Shown_Count = 0;
main();
exit;
################################################################
sub IT() { $_ }
sub NAME() { charnames::viacode(ord $_) || "" }
sub genfuncs {
for my $nf ( qw< NFD NFC NFKD NFKC FCD FCC > ) {
no strict "refs";
*$nf = sub(_) {
require Unicode::Normalize;
"Unicode::Normalize::$nf"->($_);
};
}
for my $check ( qw< checkNFD checkNFC checkNFKD checkNFKC checkFCD checkFCC > ) {
no strict "refs";
*$check = sub(_) {
require Unicode::Normalize;
my $stat = "Unicode::Normalize::$check"->($_);
if (defined $stat) {
return $stat || "0 but true";
} else {
# trick to quiet zero-conversion under -w
return 0 == 1;
}
}
}
for my $nf ( qw< Singleton Exclusion NonStDecomp Comp_Ex
NFD_NO NFC_NO NFC_MAYBE
NFKD_NO NFKC_NO NFKC_MAYBE >
)
{
no strict "refs";
*$nf = sub() {
require Unicode::Normalize;
"Unicode::Normalize::is$nf"->(ord);
};
}
for my $nl ( 1 .. 4 ) {
no strict "refs";
*{ "UCA$nl" } = sub(_) {
require Unicode::Collate;
my $class = Unicode::Collate:: ;
my @args = (level => $nl, variable => "Non-Ignorable");
if ($Opt{locale}) {
require Unicode::Collate::Locale;
$class = Unicode::Collate::Locale:: ;
push @args, locale => $Opt{locale};
}
state $coll = $class->new(@args);
return $coll->getSortKey($_[0]);
};
}
no warnings "once";
*UCA = \&UCA1;
}
sub CF() {
$CF = casefold(ord);
return ($CF && $CF->{status}) || "";
}
sub NUM() {
require Unicode::UCD;
Unicode::UCD->VERSION(0.32);
my $n = Unicode::UCD::num($_);
if (defined $n) {
return $n || "0 but true";
} else {
# trick to quiet zero-conversion under -w
return 0 == 1;
}
}
################################################################
sub main() {
for my $fh ( qw[STDOUT STDERR] ) {
binmode($fh, ":utf8")
|| die "can't binmode($fh) to :utf8 encoding: $!";
}
check_options();
genfuncs();
compile_filter();
$SIG{PIPE} = sub {exit 0};
run_filter();
if ($Opt{verbose}) {
print STDERR "$0: $Shown_Count code points matched.\n";
}
close(STDOUT) || warn "$0: close stdout failed: $!\n";
if ($Shown_Count) {
exit 0;
} else {
exit 1;
}
}
################################################################
sub debug($) {
return unless $Opt{debug};
my $msg = shift();
print STDERR "$msg\n";
}
sub check_options() {
Getopt::Long::Configure qw[ bundling auto_version ];
if (@ARGV == 0) {
@ARGV = qw{
( run in 1.485 second using v1.01-cache-2.11-cpan-524268b4103 )