Big5
view release on metacpan or search on metacpan
lib/Ebig5.pm view on Meta::CPAN
package Ebig5;
use strict;
BEGIN { $INC{'warnings.pm'} = '' if $] < 5.006 } use warnings;
######################################################################
#
# Ebig5 - Run-time routines for Big5.pm
#
# http://search.cpan.org/dist/Char-Big5/
#
# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2018, 2019 INABA Hitoshi <ina@cpan.org>
######################################################################
use 5.00503; # Galapagos Consensus 1998 for primetools
# use 5.008001; # Lancaster Consensus 2013 for toolchains
# 12.3. Delaying use Until Runtime
# in Chapter 12. Packages, Libraries, and Modules
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
# (and so on)
# Version numbers should be boring
# http://www.dagolden.com/index.php/369/version-numbers-should-be-boring/
# For the impatient, the disinterested or those who just want to follow
# a recipe, my advice for all modules is this:
# our $VERSION = "0.001"; # or "0.001_001" for a dev release
# $VERSION = eval $VERSION; # No!! because '1.10' makes '1.1'
use vars qw($VERSION);
$VERSION = '1.22';
$VERSION = $VERSION;
BEGIN {
if ($^X =~ / jperl /oxmsi) {
die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)\n";
}
if (CORE::ord('A') == 193) {
die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).\n";
}
if (CORE::ord('A') != 0x41) {
die __FILE__, ": is not US-ASCII script (must be US-ASCII script).\n";
}
}
BEGIN {
# instead of utf8.pm
CORE::eval q{
no warnings qw(redefine);
*utf8::upgrade = sub { CORE::length $_[0] };
*utf8::downgrade = sub { 1 };
*utf8::encode = sub { };
*utf8::decode = sub { 1 };
*utf8::is_utf8 = sub { };
*utf8::valid = sub { 1 };
};
if ($@) {
*utf8::upgrade = sub { CORE::length $_[0] };
*utf8::downgrade = sub { 1 };
*utf8::encode = sub { };
*utf8::decode = sub { 1 };
*utf8::is_utf8 = sub { };
*utf8::valid = sub { 1 };
}
}
# instead of Symbol.pm
BEGIN {
sub gensym () {
if ($] < 5.006) {
return \do { local *_ };
}
else {
return undef;
}
}
sub qualify ($$) {
my($name) = @_;
lib/Ebig5.pm view on Meta::CPAN
# special character, "^xyz"
elsif ($name =~ /\A \^ [ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_]+ \z/x) {
# RGS 2001-11-05 : translate leading ^X to control-char
$name =~ s{\A \^ ([ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_]) }{'qq(\c'.$1.')'}xee;
return 'main::' . $name;
}
# Global names
elsif ($name =~ /\A (?: ARGV | ARGVOUT | ENV | INC | SIG | STDERR | STDIN | STDOUT ) \z/x) {
return 'main::' . $name;
}
# or other
elsif ($name =~ /\A [^ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz] \z/x) {
return 'main::' . $name;
}
elsif (defined $_[1]) {
return $_[1] . '::' . $name;
}
else {
return (caller)[0] . '::' . $name;
}
}
sub qualify_to_ref ($;$) {
if (defined $_[1]) {
no strict qw(refs);
return \*{ qualify $_[0], $_[1] };
}
else {
no strict qw(refs);
return \*{ qualify $_[0], (caller)[0] };
}
}
}
# P.714 29.2.39. flock
# in Chapter 29: Functions
# of ISBN 0-596-00027-8 Programming Perl Third Edition.
# P.863 flock
# in Chapter 27: Functions
# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
sub LOCK_SH() {1}
sub LOCK_EX() {2}
sub LOCK_UN() {8}
sub LOCK_NB() {4}
# instead of Carp.pm
sub carp;
sub croak;
sub cluck;
sub confess;
# 6.18. Matching Multiple-Byte Characters
# in Chapter 6. Pattern Matching
# of ISBN 978-1-56592-243-3 Perl Perl Cookbook.
# (and so on)
# regexp of character
my $your_char = q{[\x81-\xFE][\x00-\xFF]|[\x00-\xFF]};
use vars qw($qq_char); $qq_char = qr/\\c[\x40-\x5F]|\\?(?:$your_char)/oxms;
use vars qw($q_char); $q_char = qr/$your_char/oxms;
#
# Big5 character range per length
#
my %range_tr = ();
#
# Big5 case conversion
#
my %lc = ();
@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
my %uc = ();
@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
my %fc = ();
@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
if (0) {
}
elsif (__PACKAGE__ =~ / \b Ebig5 \z/oxms) {
%range_tr = (
1 => [ [0x00..0x80],
[0xFF..0xFF],
],
2 => [ [0x81..0xFE],[0x40..0x7E],
[0x81..0xFE],[0xA1..0xFE],
],
);
}
else {
croak "Don't know my package name '@{[__PACKAGE__]}'";
}
#
# @ARGV wildcard globbing
#
sub import {
if ($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
my @argv = ();
for (@ARGV) {
# has space
if (/\A (?:$q_char)*? [ ] /oxms) {
if (my @glob = Ebig5::glob(qq{"$_"})) {
push @argv, @glob;
}
else {
push @argv, $_;
}
lib/Ebig5.pm view on Meta::CPAN
}
#
# Big5 upper case with parameter
#
sub Ebig5::uc(@) {
if (@_) {
my $s = shift @_;
if (@_ and wantarray) {
return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
}
else {
return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
}
}
else {
return Ebig5::uc_();
}
}
#
# Big5 upper case without parameter
#
sub Ebig5::uc_() {
my $s = $_;
return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
}
#
# Big5 fold case with parameter
#
sub Ebig5::fc(@) {
if (@_) {
my $s = shift @_;
if (@_ and wantarray) {
return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
}
else {
return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
}
}
else {
return Ebig5::fc_();
}
}
#
# Big5 fold case without parameter
#
sub Ebig5::fc_() {
my $s = $_;
return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
}
#
# Big5 regexp capture
#
{
# 10.3. Creating Persistent Private Variables
# in Chapter 10. Subroutines
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
my $last_s_matched = 0;
sub Ebig5::capture {
if ($last_s_matched and ($_[0] =~ /\A (?>[1-9][0-9]*) \z/oxms)) {
return $_[0] + 1;
}
return $_[0];
}
# Big5 mark last regexp matched
sub Ebig5::matched() {
$last_s_matched = 0;
}
# Big5 mark last s/// matched
sub Ebig5::s_matched() {
$last_s_matched = 1;
}
# P.854 31.17. use re
# in Chapter 31. Pragmatic Modules
# of ISBN 0-596-00027-8 Programming Perl Third Edition.
# P.1026 re
# in Chapter 29. Pragmatic Modules
# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
$Ebig5::matched = qr/(?{Ebig5::matched})/;
}
#
# Big5 regexp ignore case modifier
#
sub Ebig5::ignorecase {
my @string = @_;
my $metachar = qr/[\@\\|[\]{]/oxms;
# ignore case of $scalar or @array
for my $string (@string) {
# split regexp
my @char = $string =~ /\G (?>\[\^|\\$q_char|$q_char) /oxmsg;
# unescape character
for (my $i=0; $i <= $#char; $i++) {
next if not defined $char[$i];
# open character class [...]
if ($char[$i] eq '[') {
my $left = $i;
# [] make die "unmatched [] in regexp ...\n"
if ($char[$i+1] eq ']') {
$i++;
}
while (1) {
lib/Ebig5.pm view on Meta::CPAN
}
elsif (_MSWin32_5Cended_path($_)) {
if (-d "$_/.") {
return wantarray ? (-C _,@_) : -C _;
}
else {
my $fh = gensym();
if (_open_r($fh, $_)) {
my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = CORE::stat $fh;
close($fh) or die "Can't close file: $_: $!";
my $C = ($^T - $ctime) / (24*60*60);
return wantarray ? ($C,@_) : $C;
}
}
}
return wantarray ? (undef,@_) : undef;
}
#
# Big5 stacked file test $_
#
sub Ebig5::filetest_ {
my $filetest = substr(pop @_, 1);
unless (CORE::eval qq{Ebig5::${filetest}_}) {
return '';
}
for my $filetest (CORE::reverse @_) {
unless (CORE::eval qq{ $filetest _ }) {
return '';
}
}
return 1;
}
#
# Big5 file test -r $_
#
sub Ebig5::r_() {
if (-e $_) {
return -r _ ? 1 : '';
}
elsif (_MSWin32_5Cended_path($_)) {
if (-d "$_/.") {
return -r _ ? 1 : '';
}
else {
my $fh = gensym();
if (_open_r($fh, $_)) {
my $r = -r $fh;
close($fh) or die "Can't close file: $_: $!";
return $r ? 1 : '';
}
}
}
# 10.10. Returning Failure
# in Chapter 10. Subroutines
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
# (and so on)
# 2010-01-26 The difference of "return;" and "return undef;"
# http://d.hatena.ne.jp/gfx/20100126/1264474754
#
# "Perl Best Practices" recommends to use "return;"*1 to return nothing, but
# it might be wrong in some cases. If you use this idiom for those functions
# which are expected to return a scalar value, e.g. searching functions, the
# user of those functions will be surprised at what they return in list
# context, an empty list - note that many functions and all the methods
# evaluate their arguments in list context. You'd better to use "return undef;"
# for such scalar functions.
#
# sub search_something {
# my($arg) = @_;
# # search_something...
# if(defined $found){
# return $found;
# }
# return; # XXX: you'd better to "return undef;"
# }
#
# # ...
#
# # you'll get what you want, but ...
# my $something = search_something($source);
#
# # you won't get what you want here.
# # @_ for doit() is (-foo => $opt), not (undef, -foo => $opt).
# $obj->doit(search_something($source), -option=> $optval);
#
# # you have to use the "scalar" operator in such a case.
# $obj->doit(scalar search_something($source), ...);
#
# *1: it returns an empty list in list context, or returns undef in scalar
# context
#
# (and so on)
return undef;
}
#
# Big5 file test -w $_
#
sub Ebig5::w_() {
if (-e $_) {
return -w _ ? 1 : '';
}
elsif (_MSWin32_5Cended_path($_)) {
if (-d "$_/.") {
return -w _ ? 1 : '';
}
else {
my $fh = gensym();
if (_open_a($fh, $_)) {
my $w = -w $fh;
close($fh) or die "Can't close file: $_: $!";
return $w ? 1 : '';
lib/Ebig5.pm view on Meta::CPAN
}
#
# Big5 path globbing (with parameter)
#
sub Ebig5::glob($) {
if (wantarray) {
my @glob = _DOS_like_glob(@_);
for my $glob (@glob) {
$glob =~ s{ \A (?:\./)+ }{}oxms;
}
return @glob;
}
else {
my $glob = _DOS_like_glob(@_);
$glob =~ s{ \A (?:\./)+ }{}oxms;
return $glob;
}
}
#
# Big5 path globbing (without parameter)
#
sub Ebig5::glob_() {
if (wantarray) {
my @glob = _DOS_like_glob();
for my $glob (@glob) {
$glob =~ s{ \A (?:\./)+ }{}oxms;
}
return @glob;
}
else {
my $glob = _DOS_like_glob();
$glob =~ s{ \A (?:\./)+ }{}oxms;
return $glob;
}
}
#
# Big5 path globbing via File::DosGlob 1.10
#
# Often I confuse "_dosglob" and "_doglob".
# So, I renamed "_dosglob" to "_DOS_like_glob".
#
my %iter;
my %entries;
sub _DOS_like_glob {
# context (keyed by second cxix argument provided by core)
my($expr,$cxix) = @_;
# glob without args defaults to $_
$expr = $_ if not defined $expr;
# represents the current user's home directory
#
# 7.3. Expanding Tildes in Filenames
# in Chapter 7. File Access
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
#
# and File::HomeDir, File::HomeDir::Windows module
# DOS-like system
if ($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
$expr =~ s{ \A ~ (?= [^/\\] ) }
{ my_home_MSWin32() }oxmse;
}
# UNIX-like system
else {
$expr =~ s{ \A ~ ( (?:[^\x81-\xFE/]|[\x81-\xFE][\x00-\xFF])* ) }
{ $1 ? (CORE::eval(q{(getpwnam($1))[7]})||my_home()) : my_home() }oxmse;
}
# assume global context if not provided one
$cxix = '_G_' if not defined $cxix;
$iter{$cxix} = 0 if not exists $iter{$cxix};
# if we're just beginning, do it all first
if ($iter{$cxix} == 0) {
$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
}
# chuck it all out, quick or slow
if (wantarray) {
delete $iter{$cxix};
return @{delete $entries{$cxix}};
}
else {
if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
return shift @{$entries{$cxix}};
}
else {
# return undef for EOL
delete $iter{$cxix};
delete $entries{$cxix};
return undef;
}
}
}
#
# Big5 path globbing subroutine
#
sub _do_glob {
my($cond,@expr) = @_;
my @glob = ();
my $fix_drive_relative_paths = 0;
OUTER:
for my $expr (@expr) {
next OUTER if not defined $expr;
next OUTER if $expr eq '';
my @matched = ();
my @globdir = ();
my $head = '.';
my $pathsep = '/';
lib/Ebig5.pm view on Meta::CPAN
if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^\x81-\xFE/\\]|[\x81-\xFE][\x00-\xFF]) #$1./$2#oxms) {
$fix_drive_relative_paths = 1;
}
}
if (($head, $tail) = _parse_path($expr,$pathsep)) {
if ($tail eq '') {
push @glob, $expr;
next OUTER;
}
if ($head =~ / \A (?:$q_char)*? [*?] /oxms) {
if (@globdir = _do_glob('d', $head)) {
push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
next OUTER;
}
}
if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
$head .= $pathsep;
}
$expr = $tail;
}
# If file component has no wildcards, we can avoid opendir
if ($expr !~ / \A (?:$q_char)*? [*?] /oxms) {
if ($head eq '.') {
$head = '';
}
if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
$head .= $pathsep;
}
$head .= $expr;
if ($cond eq 'd') {
if (Ebig5::d $head) {
push @glob, $head;
}
}
else {
if (Ebig5::e $head) {
push @glob, $head;
}
}
next OUTER;
}
Ebig5::opendir(*DIR, $head) or next OUTER;
my @leaf = readdir DIR;
closedir DIR;
if ($head eq '.') {
$head = '';
}
if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
$head .= $pathsep;
}
my $pattern = '';
while ($expr =~ / \G ($q_char) /oxgc) {
my $char = $1;
# 6.9. Matching Shell Globs as Regular Expressions
# in Chapter 6. Pattern Matching
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
# (and so on)
if ($char eq '*') {
$pattern .= "(?:$your_char)*",
}
elsif ($char eq '?') {
$pattern .= "(?:$your_char)?", # DOS style
# $pattern .= "(?:$your_char)", # UNIX style
}
elsif ((my $fc = Ebig5::fc($char)) ne $char) {
$pattern .= $fc;
}
else {
$pattern .= quotemeta $char;
}
}
my $matchsub = sub { Ebig5::fc($_[0]) =~ /\A $pattern \z/xms };
# if ($@) {
# print STDERR "$0: $@\n";
# next OUTER;
# }
INNER:
for my $leaf (@leaf) {
if ($leaf eq '.' or $leaf eq '..') {
next INNER;
}
if ($cond eq 'd' and not Ebig5::d "$head$leaf") {
next INNER;
}
if (&$matchsub($leaf)) {
push @matched, "$head$leaf";
next INNER;
}
# [DOS compatibility special case]
# Failed, add a trailing dot and try again, but only...
if (Ebig5::index($leaf,'.') == -1 and # if name does not have a dot in it *and*
CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars *and*
Ebig5::index($pattern,'\\.') != -1 # pattern has a dot.
) {
if (&$matchsub("$leaf.")) {
push @matched, "$head$leaf";
next INNER;
}
}
}
if (@matched) {
push @glob, @matched;
}
}
if ($fix_drive_relative_paths) {
for my $glob (@glob) {
$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
}
}
return @glob;
lib/Ebig5.pm view on Meta::CPAN
[^\x81-\xFE()] |
[\x81-\xFE][\x00-\xFF] |
\( (?{$nest++}) |
\) (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $q_brace = qr{(?{local $nest=0}) (?>(?:
[^\x81-\xFE\{\}] |
[\x81-\xFE][\x00-\xFF] |
\{ (?{$nest++}) |
\} (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $q_bracket = qr{(?{local $nest=0}) (?>(?:
[^\x81-\xFE\[\]] |
[\x81-\xFE][\x00-\xFF] |
\[ (?{$nest++}) |
\] (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $q_angle = qr{(?{local $nest=0}) (?>(?:
[^\x81-\xFE<>] |
[\x81-\xFE][\x00-\xFF] |
\< (?{$nest++}) |
\> (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $matched = '';
my $s_matched = '';
$matched = q{$Ebig5::matched};
$s_matched = q{ Ebig5::s_matched();};
my $tr_variable = ''; # variable of tr///
my $sub_variable = ''; # variable of s///
my $bind_operator = ''; # =~ or !~
my @heredoc = (); # here document
my @heredoc_delimiter = ();
my $here_script = ''; # here script
#
# escape Big5 script
#
sub Big5::escape(;$) {
local($_) = $_[0] if @_;
# P.359 The Study Function
# in Chapter 7: Perl
# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
study $_; # Yes, I studied study yesterday.
# while all script
# 6.14. Matching from Where the Last Pattern Left Off
# in Chapter 6. Pattern Matching
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
# (and so on)
# one member of Tag-team
#
# P.128 Start of match (or end of previous match): \G
# P.130 Advanced Use of \G with Perl
# in Chapter 3: Overview of Regular Expression Features and Flavors
# P.255 Use leading anchors
# P.256 Expose ^ and \G at the front expressions
# in Chapter 6: Crafting an Efficient Expression
# P.315 "Tag-team" matching with /gc
# in Chapter 7: Perl
# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
my $e_script = '';
while (not /\G \z/oxgc) { # member
$e_script .= Big5::escape_token();
}
return $e_script;
}
#
# escape Big5 token of script
#
sub Big5::escape_token {
# \n output here document
my $ignore_modules = join('|', qw(
utf8
bytes
charnames
I18N::Japanese
I18N::Collate
I18N::JExt
File::DosGlob
Wild
Wildcard
Japanese
));
# another member of Tag-team
#
# P.315 "Tag-team" matching with /gc
# in Chapter 7: Perl
# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
if (/\G ( \n ) /oxgc) { # another member (and so on)
my $heredoc = '';
if (scalar(@heredoc_delimiter) >= 1) {
$slash = 'm//';
$heredoc = join '', @heredoc;
@heredoc = ();
# skip here document
for my $heredoc_delimiter (@heredoc_delimiter) {
/\G .*? \n $heredoc_delimiter \n/xmsgc;
}
( run in 0.822 second using v1.01-cache-2.11-cpan-39bf76dae61 )