Arabic
view release on metacpan or search on metacpan
lib/Earabic.pm view on Meta::CPAN
package Earabic;
use strict;
BEGIN { $INC{'warnings.pm'} = '' if $] < 5.006 } use warnings;
######################################################################
#
# Earabic - Run-time routines for Arabic.pm
#
# http://search.cpan.org/dist/Char-Arabic/
#
# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2018, 2019 INABA Hitoshi <ina@cpan.org>
######################################################################
use 5.00503; # Galapagos Consensus 1998 for primetools
# use 5.008001; # Lancaster Consensus 2013 for toolchains
# 12.3. Delaying use Until Runtime
# in Chapter 12. Packages, Libraries, and Modules
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
# (and so on)
# Version numbers should be boring
# http://www.dagolden.com/index.php/369/version-numbers-should-be-boring/
# For the impatient, the disinterested or those who just want to follow
# a recipe, my advice for all modules is this:
# our $VERSION = "0.001"; # or "0.001_001" for a dev release
# $VERSION = eval $VERSION; # No!! because '1.10' makes '1.1'
use vars qw($VERSION);
$VERSION = '1.22';
$VERSION = $VERSION;
BEGIN {
if ($^X =~ / jperl /oxmsi) {
die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)\n";
}
if (CORE::ord('A') == 193) {
die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).\n";
}
if (CORE::ord('A') != 0x41) {
die __FILE__, ": is not US-ASCII script (must be US-ASCII script).\n";
}
}
BEGIN {
# instead of utf8.pm
CORE::eval q{
no warnings qw(redefine);
*utf8::upgrade = sub { CORE::length $_[0] };
*utf8::downgrade = sub { 1 };
*utf8::encode = sub { };
*utf8::decode = sub { 1 };
*utf8::is_utf8 = sub { };
*utf8::valid = sub { 1 };
};
if ($@) {
*utf8::upgrade = sub { CORE::length $_[0] };
*utf8::downgrade = sub { 1 };
*utf8::encode = sub { };
*utf8::decode = sub { 1 };
*utf8::is_utf8 = sub { };
*utf8::valid = sub { 1 };
}
}
# instead of Symbol.pm
BEGIN {
sub gensym () {
if ($] < 5.006) {
return \do { local *_ };
}
else {
return undef;
}
}
sub qualify ($$) {
my($name) = @_;
lib/Earabic.pm view on Meta::CPAN
# special character, "^xyz"
elsif ($name =~ /\A \^ [ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_]+ \z/x) {
# RGS 2001-11-05 : translate leading ^X to control-char
$name =~ s{\A \^ ([ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_]) }{'qq(\c'.$1.')'}xee;
return 'main::' . $name;
}
# Global names
elsif ($name =~ /\A (?: ARGV | ARGVOUT | ENV | INC | SIG | STDERR | STDIN | STDOUT ) \z/x) {
return 'main::' . $name;
}
# or other
elsif ($name =~ /\A [^ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz] \z/x) {
return 'main::' . $name;
}
elsif (defined $_[1]) {
return $_[1] . '::' . $name;
}
else {
return (caller)[0] . '::' . $name;
}
}
sub qualify_to_ref ($;$) {
if (defined $_[1]) {
no strict qw(refs);
return \*{ qualify $_[0], $_[1] };
}
else {
no strict qw(refs);
return \*{ qualify $_[0], (caller)[0] };
}
}
}
# P.714 29.2.39. flock
# in Chapter 29: Functions
# of ISBN 0-596-00027-8 Programming Perl Third Edition.
# P.863 flock
# in Chapter 27: Functions
# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
sub LOCK_SH() {1}
sub LOCK_EX() {2}
sub LOCK_UN() {8}
sub LOCK_NB() {4}
# instead of Carp.pm
sub carp;
sub croak;
sub cluck;
sub confess;
# 6.18. Matching Multiple-Byte Characters
# in Chapter 6. Pattern Matching
# of ISBN 978-1-56592-243-3 Perl Perl Cookbook.
# (and so on)
# regexp of character
my $your_char = q{[\x00-\xFF]};
use vars qw($qq_char); $qq_char = qr/\\c[\x40-\x5F]|\\?(?:$your_char)/oxms;
use vars qw($q_char); $q_char = qr/$your_char/oxms;
#
# Arabic character range per length
#
my %range_tr = ();
#
# Arabic case conversion
#
my %lc = ();
@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
my %uc = ();
@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
my %fc = ();
@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
if (0) {
}
elsif (__PACKAGE__ =~ / \b Earabic \z/oxms) {
%range_tr = (
1 => [ [0x00..0xFF],
],
);
}
else {
croak "Don't know my package name '@{[__PACKAGE__]}'";
}
#
# @ARGV wildcard globbing
#
sub import {
if ($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
my @argv = ();
for (@ARGV) {
# has space
if (/\A (?:$q_char)*? [ ] /oxms) {
if (my @glob = Earabic::glob(qq{"$_"})) {
push @argv, @glob;
}
else {
push @argv, $_;
}
}
# has wildcard metachar
elsif (/\A (?:$q_char)*? [*?] /oxms) {
lib/Earabic.pm view on Meta::CPAN
}
#
# Arabic path globbing (with parameter)
#
sub Earabic::glob($) {
if (wantarray) {
my @glob = _DOS_like_glob(@_);
for my $glob (@glob) {
$glob =~ s{ \A (?:\./)+ }{}oxms;
}
return @glob;
}
else {
my $glob = _DOS_like_glob(@_);
$glob =~ s{ \A (?:\./)+ }{}oxms;
return $glob;
}
}
#
# Arabic path globbing (without parameter)
#
sub Earabic::glob_() {
if (wantarray) {
my @glob = _DOS_like_glob();
for my $glob (@glob) {
$glob =~ s{ \A (?:\./)+ }{}oxms;
}
return @glob;
}
else {
my $glob = _DOS_like_glob();
$glob =~ s{ \A (?:\./)+ }{}oxms;
return $glob;
}
}
#
# Arabic path globbing via File::DosGlob 1.10
#
# Often I confuse "_dosglob" and "_doglob".
# So, I renamed "_dosglob" to "_DOS_like_glob".
#
my %iter;
my %entries;
sub _DOS_like_glob {
# context (keyed by second cxix argument provided by core)
my($expr,$cxix) = @_;
# glob without args defaults to $_
$expr = $_ if not defined $expr;
# represents the current user's home directory
#
# 7.3. Expanding Tildes in Filenames
# in Chapter 7. File Access
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
#
# and File::HomeDir, File::HomeDir::Windows module
# DOS-like system
if ($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
$expr =~ s{ \A ~ (?= [^/\\] ) }
{ my_home_MSWin32() }oxmse;
}
# UNIX-like system
else {
$expr =~ s{ \A ~ ( (?:[^/])* ) }
{ $1 ? (CORE::eval(q{(getpwnam($1))[7]})||my_home()) : my_home() }oxmse;
}
# assume global context if not provided one
$cxix = '_G_' if not defined $cxix;
$iter{$cxix} = 0 if not exists $iter{$cxix};
# if we're just beginning, do it all first
if ($iter{$cxix} == 0) {
$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
}
# chuck it all out, quick or slow
if (wantarray) {
delete $iter{$cxix};
return @{delete $entries{$cxix}};
}
else {
if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
return shift @{$entries{$cxix}};
}
else {
# return undef for EOL
delete $iter{$cxix};
delete $entries{$cxix};
return undef;
}
}
}
#
# Arabic path globbing subroutine
#
sub _do_glob {
my($cond,@expr) = @_;
my @glob = ();
my $fix_drive_relative_paths = 0;
OUTER:
for my $expr (@expr) {
next OUTER if not defined $expr;
next OUTER if $expr eq '';
my @matched = ();
my @globdir = ();
my $head = '.';
my $pathsep = '/';
lib/Earabic.pm view on Meta::CPAN
if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
$fix_drive_relative_paths = 1;
}
}
if (($head, $tail) = _parse_path($expr,$pathsep)) {
if ($tail eq '') {
push @glob, $expr;
next OUTER;
}
if ($head =~ / \A (?:$q_char)*? [*?] /oxms) {
if (@globdir = _do_glob('d', $head)) {
push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
next OUTER;
}
}
if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
$head .= $pathsep;
}
$expr = $tail;
}
# If file component has no wildcards, we can avoid opendir
if ($expr !~ / \A (?:$q_char)*? [*?] /oxms) {
if ($head eq '.') {
$head = '';
}
if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
$head .= $pathsep;
}
$head .= $expr;
if ($cond eq 'd') {
if (-d $head) {
push @glob, $head;
}
}
else {
if (-e $head) {
push @glob, $head;
}
}
next OUTER;
}
opendir(*DIR, $head) or next OUTER;
my @leaf = readdir DIR;
closedir DIR;
if ($head eq '.') {
$head = '';
}
if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
$head .= $pathsep;
}
my $pattern = '';
while ($expr =~ / \G ($q_char) /oxgc) {
my $char = $1;
# 6.9. Matching Shell Globs as Regular Expressions
# in Chapter 6. Pattern Matching
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
# (and so on)
if ($char eq '*') {
$pattern .= "(?:$your_char)*",
}
elsif ($char eq '?') {
$pattern .= "(?:$your_char)?", # DOS style
# $pattern .= "(?:$your_char)", # UNIX style
}
elsif ((my $fc = Earabic::fc($char)) ne $char) {
$pattern .= $fc;
}
else {
$pattern .= quotemeta $char;
}
}
my $matchsub = sub { Earabic::fc($_[0]) =~ /\A $pattern \z/xms };
# if ($@) {
# print STDERR "$0: $@\n";
# next OUTER;
# }
INNER:
for my $leaf (@leaf) {
if ($leaf eq '.' or $leaf eq '..') {
next INNER;
}
if ($cond eq 'd' and not -d "$head$leaf") {
next INNER;
}
if (&$matchsub($leaf)) {
push @matched, "$head$leaf";
next INNER;
}
# [DOS compatibility special case]
# Failed, add a trailing dot and try again, but only...
if (Earabic::index($leaf,'.') == -1 and # if name does not have a dot in it *and*
CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars *and*
Earabic::index($pattern,'\\.') != -1 # pattern has a dot.
) {
if (&$matchsub("$leaf.")) {
push @matched, "$head$leaf";
next INNER;
}
}
}
if (@matched) {
push @glob, @matched;
}
}
if ($fix_drive_relative_paths) {
for my $glob (@glob) {
$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
}
}
return @glob;
lib/Earabic.pm view on Meta::CPAN
my $qq_substr = qr{(?> Char::substr | Arabic::substr | CORE::substr | substr ) (?>\s*) \( $qq_paren \)
}xms;
# regexp of nested parens in qXX
my $q_paren = qr{(?{local $nest=0}) (?>(?:
[^()] |
\( (?{$nest++}) |
\) (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $q_brace = qr{(?{local $nest=0}) (?>(?:
[^\{\}] |
\{ (?{$nest++}) |
\} (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $q_bracket = qr{(?{local $nest=0}) (?>(?:
[^\[\]] |
\[ (?{$nest++}) |
\] (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $q_angle = qr{(?{local $nest=0}) (?>(?:
[^<>] |
\< (?{$nest++}) |
\> (?(?{$nest>0})(?{$nest--})|(?!)))*) (?(?{$nest!=0})(?!)) |
[\x00-\xFF]
}xms;
my $matched = '';
my $s_matched = '';
my $tr_variable = ''; # variable of tr///
my $sub_variable = ''; # variable of s///
my $bind_operator = ''; # =~ or !~
my @heredoc = (); # here document
my @heredoc_delimiter = ();
my $here_script = ''; # here script
#
# escape Arabic script
#
sub Arabic::escape(;$) {
local($_) = $_[0] if @_;
# P.359 The Study Function
# in Chapter 7: Perl
# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
study $_; # Yes, I studied study yesterday.
# while all script
# 6.14. Matching from Where the Last Pattern Left Off
# in Chapter 6. Pattern Matching
# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
# (and so on)
# one member of Tag-team
#
# P.128 Start of match (or end of previous match): \G
# P.130 Advanced Use of \G with Perl
# in Chapter 3: Overview of Regular Expression Features and Flavors
# P.255 Use leading anchors
# P.256 Expose ^ and \G at the front expressions
# in Chapter 6: Crafting an Efficient Expression
# P.315 "Tag-team" matching with /gc
# in Chapter 7: Perl
# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
my $e_script = '';
while (not /\G \z/oxgc) { # member
$e_script .= Arabic::escape_token();
}
return $e_script;
}
#
# escape Arabic token of script
#
sub Arabic::escape_token {
# \n output here document
my $ignore_modules = join('|', qw(
utf8
bytes
charnames
I18N::Japanese
I18N::Collate
I18N::JExt
File::DosGlob
Wild
Wildcard
Japanese
));
# another member of Tag-team
#
# P.315 "Tag-team" matching with /gc
# in Chapter 7: Perl
# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
if (/\G ( \n ) /oxgc) { # another member (and so on)
my $heredoc = '';
if (scalar(@heredoc_delimiter) >= 1) {
$slash = 'm//';
$heredoc = join '', @heredoc;
@heredoc = ();
# skip here document
for my $heredoc_delimiter (@heredoc_delimiter) {
/\G .*? \n $heredoc_delimiter \n/xmsgc;
}
( run in 0.902 second using v1.01-cache-2.11-cpan-39bf76dae61 )