Unicode-Tussle

 view release on metacpan or  search on metacpan

script/unichars  view on Meta::CPAN

################################################################
#
#   This is an sh wrapper to run the script under
#   whichever perl occurs first in your path.  See
#   CHOICEs 1 and 2 below for alternate strategies.
#   The -x will throw off your line numbers otherwise.
#
######################################################################
#
#   The next line is legal in both shell and perl,
#   but perl sees the if 0 so doesn't execute it.
#

eval 'exec perl -x -S $0 ${1+"$@"}'
      if 0;

### CHOICE 1:
######################################################################
### MAKE FOLLOWING #! line THE TOP LINE, REPLACING /usr/local/bin  ###
###   with wherever you have a late enough version of Perl is      ###
###   installed.  Will run under 5.10, but prefers 5.12 or better. ###
######################################################################
#!/usr/local/bin/perl
# ^^^^^^^^^^^^^^  <=== CHANGE ME                                   ###
######################################################################

### CHOICE 2:
######################################################################
### ALTERNATELY, the following #! line does the same thing as      ###
###   the tricksy sh eval exec line:  it finds whichever Perl is   ###
###   first in your path.  However, it works only on BSD systems   ###
###   (including MacOS), but breaks under Solaris and Linux.       ###
######################################################################
#!/usr/bin/env perl -CLA
######################################################################

use strict;
use warnings;     # qw[ FATAL all ];
use charnames qw[ :full :short latin greek ];

use 5.10.1;

use File::Basename  qw[ basename    ];
use Getopt::Long    qw[ GetOptions  ];
use File::Spec;
use Carp;

use Pod::Usage      qw[ pod2usage   ];
use Encode          qw[ decode      ];

use Unicode::UCD qw(charinfo casefold);

use if $^V >= v5.11.3, qw[ feature unicode_strings ];

# don't need to import this
sub utf::is_utf8($);

################################################################

sub ARGCOUNT;
sub CF();
sub IT();
sub NAME();
sub NOT_REACHED;
sub NUM();

sub am_running_perldb;
sub check_options();
sub compile_filter();
sub deQ($);
sub deQQ($);
sub debug($);
sub dequeue($$);
sub display;
sub fork_pager;
sub genfuncs;
sub is_runnable;
sub locate_program;
sub main();
sub panic;
sub run_filter();
sub start_pager;
sub stupid_evil_and_wrong;
sub titlecase;
sub underscore;

################################################################

our $VERSION = "1.4 (2011-04-11)";

$| = 1;             # command buffering quick-feeds piped stdout
$0 = basename($0);  # shorten up warnings/errors

our %Opt;
our $CF;
our $CI;
our $Shown_Count = 0;

main();
exit;

################################################################

sub   IT() { $_ }
sub NAME() { charnames::viacode(ord $_) || "" }

sub genfuncs  {

    for my $nf ( qw< NFD NFC NFKD NFKC FCD FCC > ) {
	no strict "refs";
	*$nf = sub(_) {
	    require Unicode::Normalize;
	    "Unicode::Normalize::$nf"->($_);
	};
    }

    for my $check ( qw< checkNFD checkNFC checkNFKD checkNFKC checkFCD checkFCC > ) {
	no strict "refs";
	*$check = sub(_) {
	    require Unicode::Normalize;
	    my $stat = "Unicode::Normalize::$check"->($_);
	    if (defined $stat) {
		return $stat || "0 but true";
	    } else {
		# trick to quiet zero-conversion under -w
		return 0 == 1;
	    }
	}
    }

    for my $nf ( qw< Singleton Exclusion NonStDecomp Comp_Ex
		     NFD_NO  NFC_NO  NFC_MAYBE
		    NFKD_NO NFKC_NO NFKC_MAYBE >
		)
    {
	no strict "refs";
	*$nf = sub() {
	    require Unicode::Normalize;
	    "Unicode::Normalize::is$nf"->(ord);
	};
    }


    for my $nl ( 1 .. 4 ) {
	no strict "refs";
	*{ "UCA$nl" } = sub(_) {
	    require Unicode::Collate;
	    my $class = Unicode::Collate:: ;
	    my @args = (level => $nl, variable => "Non-Ignorable");
	    if ($Opt{locale}) {
		require Unicode::Collate::Locale;
		$class = Unicode::Collate::Locale:: ;
		push @args, locale => $Opt{locale};
	    }
	    state $coll = $class->new(@args);
	    return $coll->getSortKey($_[0]);
	};
    }

    no warnings "once";
    *UCA = \&UCA1;

}

sub CF() {
    $CF = casefold(ord);
    return ($CF && $CF->{status}) || "";
}

sub NUM() {
    require Unicode::UCD;
    Unicode::UCD->VERSION(0.32);
    my $n = Unicode::UCD::num($_);
    if (defined $n) {
	return $n || "0 but true";
    } else {
	# trick to quiet zero-conversion under -w
	return 0 == 1;
    }
}

################################################################

sub main() {

    for my $fh ( qw[STDOUT STDERR] ) {
        binmode($fh, ":utf8")
            || die "can't binmode($fh) to :utf8 encoding: $!";
    }

    check_options();
    genfuncs();
    compile_filter();

    $SIG{PIPE} = sub {exit 0};

    run_filter();

    if ($Opt{verbose}) {
	print STDERR "$0: $Shown_Count code points matched.\n";
    }

    close(STDOUT) || warn "$0: close stdout failed: $!\n";

    if ($Shown_Count) {
	exit 0;
    } else {
	exit 1;
    }
}

################################################################

sub debug($) {
    return unless $Opt{debug};
    my $msg = shift();
    print STDERR "$msg\n";
}

sub check_options() {

    Getopt::Long::Configure qw[ bundling auto_version ];

    if (@ARGV == 0) {
	@ARGV = qw{



( run in 1.485 second using v1.01-cache-2.11-cpan-524268b4103 )