view release on metacpan or search on metacpan
- created by INABA Hitoshi
0.39 2009-08-06 00:00:00
- support Sjis::length, Sjis::substr, Sjis::index, and Sjis::rindex
- created by INABA Hitoshi
0.38 2009-08-05 00:00:00
- improve qw
- filetest operator before parenthesis '-X ('
- escape sequence of hexadecimal number of one digit
- fix bugs of function charlist_not_qr
- created by INABA Hitoshi
0.37 2009-07-09 00:00:00
- separate Tk modules, make-*.pl, and test scripts
- remove perl55.bat, and perl56.bat
- created by INABA Hitoshi
0.36 2009-05-17 00:00:00
- do local $@ before eval
- support Tk::getOpenFile and Tk::getSaveFile
lib/Arabic.pm view on Meta::CPAN
[:graph:] [\x21-\x7F]
[:lower:] [\x61-\x7A]
[\x41-\x5A\x61-\x7A] (/i modifier)
[:print:] [\x20-\x7F]
[:punct:] [\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]
[:space:] [\s\x0B]
[:upper:] [\x41-\x5A]
[\x41-\x5A\x61-\x7A] (/i modifier)
[:word:] [\x30-\x39\x41-\x5A\x5F\x61-\x7A]
[:xdigit:] [\x30-\x39\x41-\x46\x61-\x66]
[:^alnum:] ${Earabic::not_alnum}
[:^alpha:] ${Earabic::not_alpha}
[:^ascii:] ${Earabic::not_ascii}
[:^blank:] ${Earabic::not_blank}
[:^cntrl:] ${Earabic::not_cntrl}
[:^digit:] ${Earabic::not_digit}
[:^graph:] ${Earabic::not_graph}
[:^lower:] ${Earabic::not_lower}
${Earabic::not_lower_i} (/i modifier)
[:^print:] ${Earabic::not_print}
[:^punct:] ${Earabic::not_punct}
[:^space:] ${Earabic::not_space}
[:^upper:] ${Earabic::not_upper}
${Earabic::not_upper_i} (/i modifier)
[:^word:] ${Earabic::not_word}
[:^xdigit:] ${Earabic::not_xdigit}
---------------------------------------------------------------
\b and \B are redefined as follows to backward compatibility.
---------------------------------------------------------------
Before After
---------------------------------------------------------------
\b ${Earabic::eb}
\B ${Earabic::eB}
---------------------------------------------------------------
lib/Arabic.pm view on Meta::CPAN
---------------------------------------------------------------------------------------------------------------------------------------------------------
${Earabic::dot} qr{(?>[^\x0A])};
${Earabic::dot_s} qr{(?>[\x00-\xFF])};
${Earabic::eD} qr{(?>[^0-9])};
${Earabic::eS} qr{(?>[^\s])};
${Earabic::eW} qr{(?>[^0-9A-Z_a-z])};
${Earabic::eH} qr{(?>[^\x09\x20])};
${Earabic::eV} qr{(?>[^\x0A\x0B\x0C\x0D])};
${Earabic::eR} qr{(?>\x0D\x0A|[\x0A\x0D])};
${Earabic::eN} qr{(?>[^\x0A])};
${Earabic::not_alnum} qr{(?>[^\x30-\x39\x41-\x5A\x61-\x7A])};
${Earabic::not_alpha} qr{(?>[^\x41-\x5A\x61-\x7A])};
${Earabic::not_ascii} qr{(?>[^\x00-\x7F])};
${Earabic::not_blank} qr{(?>[^\x09\x20])};
${Earabic::not_cntrl} qr{(?>[^\x00-\x1F\x7F])};
${Earabic::not_digit} qr{(?>[^\x30-\x39])};
${Earabic::not_graph} qr{(?>[^\x21-\x7F])};
${Earabic::not_lower} qr{(?>[^\x61-\x7A])};
${Earabic::not_lower_i} qr{(?>[^\x41-\x5A\x61-\x7A])}; # Perl 5.16 compatible
# ${Earabic::not_lower_i} qr{(?>[\x00-\xFF])}; # older Perl compatible
${Earabic::not_print} qr{(?>[^\x20-\x7F])};
${Earabic::not_punct} qr{(?>[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
${Earabic::not_space} qr{(?>[^\s\x0B])};
${Earabic::not_upper} qr{(?>[^\x41-\x5A])};
${Earabic::not_upper_i} qr{(?>[^\x41-\x5A\x61-\x7A])}; # Perl 5.16 compatible
# ${Earabic::not_upper_i} qr{(?>[\x00-\xFF])}; # older Perl compatible
${Earabic::not_word} qr{(?>[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
${Earabic::not_xdigit} qr{(?>[^\x30-\x39\x41-\x46\x61-\x66])};
# This solution is not perfect. I beg better solution from you who are reading this.
${Earabic::eb} qr{(?:\A(?=[0-9A-Z_a-z])|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]|\z))};
${Earabic::eB} qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
---------------------------------------------------------------------------------------------------------------------------------------------------------
=head1 Un-Escaping \ of \b{}, \B{}, \N{}, \p{}, \P{}, and \X (Arabic software provides)
Arabic software removes '\' at head of alphanumeric regexp metasymbols \b{}, \B{},
\N{}, \p{}, \P{} and \X. By this method, you can avoid the trap of the abstraction.
lib/Earabic.pm view on Meta::CPAN
use vars qw(
$dot
$dot_s
$eD
$eS
$eW
$eH
$eV
$eR
$eN
$not_alnum
$not_alpha
$not_ascii
$not_blank
$not_cntrl
$not_digit
$not_graph
$not_lower
$not_lower_i
$not_print
$not_punct
$not_space
$not_upper
$not_upper_i
$not_word
$not_xdigit
$eb
$eB
);
${Earabic::dot} = qr{(?>[^\x0A])};
${Earabic::dot_s} = qr{(?>[\x00-\xFF])};
${Earabic::eD} = qr{(?>[^0-9])};
# Vertical tabs are now whitespace
# \s in a regex now matches a vertical tab in all circumstances.
# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
# ${Earabic::eS} = qr{(?>[^\x09\x0A \x0C\x0D\x20])};
# ${Earabic::eS} = qr{(?>[^\x09\x0A\x0B\x0C\x0D\x20])};
${Earabic::eS} = qr{(?>[^\s])};
${Earabic::eW} = qr{(?>[^0-9A-Z_a-z])};
${Earabic::eH} = qr{(?>[^\x09\x20])};
${Earabic::eV} = qr{(?>[^\x0A\x0B\x0C\x0D])};
${Earabic::eR} = qr{(?>\x0D\x0A|[\x0A\x0D])};
${Earabic::eN} = qr{(?>[^\x0A])};
${Earabic::not_alnum} = qr{(?>[^\x30-\x39\x41-\x5A\x61-\x7A])};
${Earabic::not_alpha} = qr{(?>[^\x41-\x5A\x61-\x7A])};
${Earabic::not_ascii} = qr{(?>[^\x00-\x7F])};
${Earabic::not_blank} = qr{(?>[^\x09\x20])};
${Earabic::not_cntrl} = qr{(?>[^\x00-\x1F\x7F])};
${Earabic::not_digit} = qr{(?>[^\x30-\x39])};
${Earabic::not_graph} = qr{(?>[^\x21-\x7F])};
${Earabic::not_lower} = qr{(?>[^\x61-\x7A])};
${Earabic::not_lower_i} = qr{(?>[^\x41-\x5A\x61-\x7A])}; # Perl 5.16 compatible
# ${Earabic::not_lower_i} = qr{(?>[\x00-\xFF])}; # older Perl compatible
${Earabic::not_print} = qr{(?>[^\x20-\x7F])};
${Earabic::not_punct} = qr{(?>[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
${Earabic::not_space} = qr{(?>[^\s\x0B])};
${Earabic::not_upper} = qr{(?>[^\x41-\x5A])};
${Earabic::not_upper_i} = qr{(?>[^\x41-\x5A\x61-\x7A])}; # Perl 5.16 compatible
# ${Earabic::not_upper_i} = qr{(?>[\x00-\xFF])}; # older Perl compatible
${Earabic::not_word} = qr{(?>[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
${Earabic::not_xdigit} = qr{(?>[^\x30-\x39\x41-\x46\x61-\x66])};
${Earabic::eb} = qr{(?:\A(?=[0-9A-Z_a-z])|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]|\z))};
${Earabic::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
# avoid: Name "Earabic::foo" used only once: possible typo at here.
${Earabic::dot} = ${Earabic::dot};
${Earabic::dot_s} = ${Earabic::dot_s};
${Earabic::eD} = ${Earabic::eD};
${Earabic::eS} = ${Earabic::eS};
${Earabic::eW} = ${Earabic::eW};
${Earabic::eH} = ${Earabic::eH};
${Earabic::eV} = ${Earabic::eV};
${Earabic::eR} = ${Earabic::eR};
${Earabic::eN} = ${Earabic::eN};
${Earabic::not_alnum} = ${Earabic::not_alnum};
${Earabic::not_alpha} = ${Earabic::not_alpha};
${Earabic::not_ascii} = ${Earabic::not_ascii};
${Earabic::not_blank} = ${Earabic::not_blank};
${Earabic::not_cntrl} = ${Earabic::not_cntrl};
${Earabic::not_digit} = ${Earabic::not_digit};
${Earabic::not_graph} = ${Earabic::not_graph};
${Earabic::not_lower} = ${Earabic::not_lower};
${Earabic::not_lower_i} = ${Earabic::not_lower_i};
${Earabic::not_print} = ${Earabic::not_print};
${Earabic::not_punct} = ${Earabic::not_punct};
${Earabic::not_space} = ${Earabic::not_space};
${Earabic::not_upper} = ${Earabic::not_upper};
${Earabic::not_upper_i} = ${Earabic::not_upper_i};
${Earabic::not_word} = ${Earabic::not_word};
${Earabic::not_xdigit} = ${Earabic::not_xdigit};
${Earabic::eb} = ${Earabic::eb};
${Earabic::eB} = ${Earabic::eB};
#
# Arabic split
#
sub Earabic::split(;$$$) {
# P.794 29.2.161. split
# in Chapter 29: Functions
lib/Earabic.pm view on Meta::CPAN
if ($char[$i+1] eq ']') {
$i++;
}
while (1) {
if (++$i > $#char) {
croak "Unmatched [] in regexp";
}
if ($char[$i] eq ']') {
my $right = $i;
my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
# escape character
for my $char (@charlist) {
if (0) {
}
elsif ($char =~ /\A [.|)] \z/oxms) {
$char = '\\' . $char;
}
}
lib/Earabic.pm view on Meta::CPAN
}->{$1};
}
# POSIX-style character classes
elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower|upper) :\] ) \z/oxms)) {
$char[$i] = {
'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
'[:^lower:]' => '${Earabic::not_lower_i}',
'[:^upper:]' => '${Earabic::not_upper_i}',
}->{$1};
}
elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print|punct|space|upper|word|xdigit) :\] ) \z/oxms) {
$char[$i] = {
'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
'[:ascii:]' => '[\x00-\x7F]',
'[:blank:]' => '[\x09\x20]',
lib/Earabic.pm view on Meta::CPAN
# P.210 POSIX-Style Character Classes
# in Chapter 5: Pattern Matching
# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
'[:upper:]' => '[\x41-\x5A]',
'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
'[:^alnum:]' => '${Earabic::not_alnum}',
'[:^alpha:]' => '${Earabic::not_alpha}',
'[:^ascii:]' => '${Earabic::not_ascii}',
'[:^blank:]' => '${Earabic::not_blank}',
'[:^cntrl:]' => '${Earabic::not_cntrl}',
'[:^digit:]' => '${Earabic::not_digit}',
'[:^graph:]' => '${Earabic::not_graph}',
'[:^lower:]' => '${Earabic::not_lower}',
'[:^print:]' => '${Earabic::not_print}',
'[:^punct:]' => '${Earabic::not_punct}',
'[:^space:]' => '${Earabic::not_space}',
'[:^upper:]' => '${Earabic::not_upper}',
'[:^word:]' => '${Earabic::not_word}',
'[:^xdigit:]' => '${Earabic::not_xdigit}',
}->{$1};
}
elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
$char[$i] = $1;
}
}
# open character list
my @singleoctet = ();
lib/Earabic.pm view on Meta::CPAN
elsif (scalar(@{$range}) == 2) {
push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
}
else {
push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
}
}
}
}
my $not_anchor = '';
push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
}
if (scalar(@multipleoctet) >= 2) {
return '(?:' . join('|', @multipleoctet) . ')';
}
else {
return $multipleoctet[0];
}
}
#
# Arabic open character list for not qr
#
sub charlist_not_qr {
my $modifier = pop @_;
my @char = @_;
my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
my @singleoctet = @$singleoctet;
my @multipleoctet = @$multipleoctet;
# with /i modifier
if ($modifier =~ m/i/oxms) {
lib/Earabic.pm view on Meta::CPAN
while (1) {
if (++$i > $#char) {
die __FILE__, ": Unmatched [] in regexp\n";
}
if ($char[$i] eq ']') {
my $right = $i;
# [^...]
if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
splice @char, $left, $right-$left+1, sprintf(q{@{[Earabic::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
}
else {
splice @char, $left, $right-$left+1, Earabic::charlist_not_qr(@char[$left+1..$right-1], $modifier);
}
$i = $left;
last;
}
}
}
# rewrite character class or escape character
elsif (my $char = character_class($char[$i],$modifier)) {
lib/Earabic.pm view on Meta::CPAN
$i++;
}
while (1) {
if (++$i > $#char) {
die __FILE__, ": Unmatched [] in regexp\n";
}
if ($char[$i] eq ']') {
my $right = $i;
# [^...]
splice @char, $left, $right-$left+1, Earabic::charlist_not_qr(@char[$left+1..$right-1], $modifier);
$i = $left;
last;
}
}
}
# escape $ @ / and \
elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
$char[$i] = '\\' . $char[$i];
lib/Earabic.pm view on Meta::CPAN
}
while (1) {
if (++$i > $#char) {
die __FILE__, ": Unmatched [] in regexp\n";
}
if ($char[$i] eq ']') {
my $right = $i;
# [^...]
if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
splice @char, $left, $right-$left+1, sprintf(q{@{[Earabic::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
}
else {
splice @char, $left, $right-$left+1, Earabic::charlist_not_qr(@char[$left+1..$right-1], $modifier);
}
$i = $left;
last;
}
}
}
# rewrite character class or escape character
elsif (my $char = character_class($char[$i],$modifier)) {
lib/Earabic.pm view on Meta::CPAN
$i++;
}
while (1) {
if (++$i > $#char) {
die __FILE__, ": Unmatched [] in regexp\n";
}
if ($char[$i] eq ']') {
my $right = $i;
# [^...]
splice @char, $left, $right-$left+1, Earabic::charlist_not_qr(@char[$left+1..$right-1], $modifier);
$i = $left;
last;
}
}
}
# escape $ @ / and \
elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
$char[$i] = '\\' . $char[$i];
lib/Earabic.pm view on Meta::CPAN
}
while (1) {
if (++$i > $#char) {
die __FILE__, ": Unmatched [] in regexp\n";
}
if ($char[$i] eq ']') {
my $right = $i;
# [^...]
if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
splice @char, $left, $right-$left+1, sprintf(q{@{[Earabic::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
}
else {
splice @char, $left, $right-$left+1, Earabic::charlist_not_qr(@char[$left+1..$right-1], $modifier);
}
$i = $left;
last;
}
}
}
# rewrite character class or escape character
elsif (my $char = character_class($char[$i],$modifier)) {
lib/Earabic.pm view on Meta::CPAN
$i++;
}
while (1) {
if (++$i > $#char) {
die __FILE__, ": Unmatched [] in regexp\n";
}
if ($char[$i] eq ']') {
my $right = $i;
# [^...]
splice @char, $left, $right-$left+1, Earabic::charlist_not_qr(@char[$left+1..$right-1], $modifier);
$i = $left;
last;
}
}
}
# rewrite character class or escape character
elsif (my $char = character_class($char[$i],$modifier)) {
$char[$i] = $char;
lib/Earabic.pm view on Meta::CPAN
to lowercase, use:
$titlecase = Earabic::ucfirst(substr($word,0,1)) . Earabic::lc(substr($word,1));
or
$string =~ s/(\w)((?>\w*))/\u$1\L$2/g;
Do not use:
$do_not_use = Earabic::ucfirst(Earabic::lc($word));
or "\u\L$word", because that can produce a different and incorrect answer with
certain characters. The titlecase of something that's been lowercased doesn't
always produce the same thing titlecasing the original produces.
Because titlecasing only makes sense at the start of a string that's followed
by lowercase characters, we can't think of any reason you might want to titlecase
every character in a string.
See also P.287 A Case of Mistaken Identity
#-----------------------------------------------------------------------------
# $tar->add_files("$tardir/$file");
#-----------------------------------------------------------------------------
open(FH, $file) || die "Can't open file: $file\n"; #'
binmode FH;
local $/ = undef; # slurp mode
my $data = <FH>;
close FH;
#-----------------------------------------------------------------------------
# Kwalitee Indicator: buildtool_not_executable core
# The build tool (Build.PL/Makefile.PL) is executable. This is bad because
# you should specify which perl you want to use while installing.
#
# How to fix
# Change the permissions of Build.PL/Makefile.PL to not-executable.
#-----------------------------------------------------------------------------
my $tar = Archive::Tar->new;
if ($file =~ m/ (?: Build\.PL | Makefile\.PL ) \z/oxmsi) {
$tar->add_data("$tardir/$file", $data, {'mode' => 0664});
}
return $_[0];
}
}
# Test::Harness::runtests cannot work heavy load.
sub _runtests {
my @script = @_;
my @fail_testno = ();
my $ok_script = 0;
my $not_ok_script = 0;
my $total_ok = 0;
my $total_not_ok = 0;
# cygwin warning:
# MS-DOS style path detected: C:/cpan/Char-X.XX
# Preferred POSIX equivalent is: /cygdrive/c/cpan/Char-X.XX
# CYGWIN environment variable option "nodosfilewarning" turns off this warning.
# Consult the user's guide for more details about POSIX paths: #'
# http://cygwin.com/cygwin-ug-net/using.html#using-pathnames
if (exists $ENV{'CYGWIN'}) {
if ($ENV{'CYGWIN'} !~ /\b nodosfilewarning \b/x) {
}
my $scriptno = 0;
for my $script (@script) {
next if not -e $script;
my @result = qx{$^X $script};
my($tests) = shift(@result) =~ /^1..([0-9]+)/;
my $testno = 1;
my $ok = 0;
my $not_ok = 0;
for my $result (@result) {
if ($result =~ /^ok /) {
$ok++;
}
elsif ($result =~ /^not ok /) {
push @{$fail_testno[$scriptno]}, $testno;
$not_ok++;
}
$testno++;
}
if ($ok == $tests) {
printf("$script ok\n");
$ok_script++;
}
else {
printf("$script Failed %d/%d subtests\n", $not_ok, $ok+$not_ok);
$not_ok_script++;
}
$total_ok += $ok;
$total_not_ok += $not_ok;
$scriptno++;
}
if (scalar(@script) == $ok_script) {
printf <<'END', scalar(@script), $total_ok + $total_not_ok;
All tests successful.
Files=%d, Tests=%d
Result: PASS
END
}
else {
print <<'END';
Test Summary Report
-------------------
END
my $scriptno = 0;
for my $fail_testno (@fail_testno) {
if (defined $fail_testno) {
print $script[$scriptno], "\n";
print ' Failed test: ', join(', ', @{$fail_testno[$scriptno]}), "\n";
}
$scriptno++;
}
printf("Files=%d, Tests=%d\n", scalar(@script), $total_ok + $total_not_ok);
printf("Result: FAIL\n");
printf("Failed %d/%d test programs. %d/%d subtests failed.\n", $not_ok_script, scalar(@script), $total_not_ok, $total_ok + $total_not_ok);
}
}
sub check_usascii {
my($file) = @_;
if (open(FILE,$file)) {
while (<FILE>) {
if (not /^[\x0A\x20-\x7E]+$/) {
die "error not US-ASCII: $file, q(;_;)bad!!";
}