UTF8-R2
view release on metacpan or search on metacpan
lib/UTF8/R2.pm view on Meta::CPAN
}
# $^X($EXECUTABLE_NAME) for execute MBCS Perl script
$UTF8::R2::PERL = $^X;
$UTF8::R2::PERL = $UTF8::R2::PERL; # to avoid: Name "UTF8::R2::PERL" used only once: possible typo at ...
# original $0($PROGRAM_NAME)
$UTF8::R2::ORIG_PROGRAM_NAME = $0;
$UTF8::R2::ORIG_PROGRAM_NAME = $UTF8::R2::ORIG_PROGRAM_NAME; # to avoid: Name "UTF8::R2::ORIG_PROGRAM_NAME" used only once: possible typo at ...
}
#---------------------------------------------------------------------
# confess() for this module
sub confess {
my $i = 0;
my @confess = ();
while (my($package, $filename, $line, $subroutine) = caller($i)) {
push @confess, "[$i] $filename($line) $subroutine\n";
$i++;
}
print STDERR "\n", @_, "\n";
print STDERR CORE::reverse @confess;
die;
}
#---------------------------------------------------------------------
# chop() for UTF-8 codepoint string
sub UTF8::R2::chop (@) {
my $chop = '';
for (@_ ? @_ : $_) {
if (my @x = /\G$x/g) {
$chop = pop @x;
$_ = join '', @x;
}
}
return $chop;
}
#---------------------------------------------------------------------
# chr() for UTF-8 codepoint string
sub UTF8::R2::chr (;$) {
my $number = @_ ? $_[0] : $_;
# Negative values give the Unicode replacement character (chr(0xfffd)),
# except under the bytes pragma, where the low eight bits of the value
# (truncated to an integer) are used.
my @octet = ();
CORE::do {
unshift @octet, ($number % 0x100);
$number = int($number / 0x100);
} while ($number > 0);
return pack 'C*', @octet;
}
#---------------------------------------------------------------------
# mb::do() like do(), mb.pm compatible
sub UTF8::R2::do ($) {
# run as Perl script
return CORE::eval sprintf(<<'END', (caller)[0, 2, 1]);
package %s;
#line %s "%s"
CORE::do "$_[0]";
END
}
#---------------------------------------------------------------------
# mb::eval() like eval(), mb.pm compatible
sub UTF8::R2::eval (;$) {
local $_ = @_ ? $_[0] : $_;
# run as Perl script in caller package
return CORE::eval sprintf(<<'END', (caller)[0, 2, 1], $_);
package %s;
#line %s "%s"
%s
END
}
#---------------------------------------------------------------------
# getc() for UTF-8 codepoint string
sub UTF8::R2::getc (;*) {
my $fh = @_ ? Symbol::qualify_to_ref($_[0], caller()) : \*STDIN;
my $getc = CORE::getc $fh;
if ($getc =~ /\A [\x00-\x7F\x80-\xC1\xF5-\xFF] \z/xms) {
}
elsif ($getc =~ /\A [\xC2-\xDF] \z/xms) {
$getc .= CORE::getc $fh;
}
elsif ($getc =~ /\A [\xE0-\xEF] \z/xms) {
$getc .= CORE::getc $fh;
$getc .= CORE::getc $fh;
}
elsif ($getc =~ /\A [\xF0-\xF4] \z/xms) {
$getc .= CORE::getc $fh;
$getc .= CORE::getc $fh;
$getc .= CORE::getc $fh;
}
return $getc;
}
#---------------------------------------------------------------------
# index() for UTF-8 codepoint string
sub UTF8::R2::index ($$;$) {
my $index = 0;
if (@_ == 3) {
$index = CORE::index $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2]));
}
else {
$index = CORE::index $_[0], $_[1];
}
if ($index == -1) {
return -1;
}
else {
return UTF8::R2::length(CORE::substr $_[0], 0, $index);
}
}
#---------------------------------------------------------------------
# JPerl like index() for UTF-8 codepoint string
sub UTF8::R2::index_byte ($$;$) {
if (@_ == 3) {
return CORE::index $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2]));
}
else {
return CORE::index $_[0], $_[1];
}
}
#---------------------------------------------------------------------
# universal lc() for UTF-8 codepoint string
sub UTF8::R2::lc (;$) {
lib/UTF8/R2.pm view on Meta::CPAN
elsif ($after[-1] =~ /\A \\ [\x00-\xFF] \z/x) { } # \) \} \] \" \0 \1 \D \E \F \G \H \K \L \N \Q \R \S \U \V \W \\ \a \d \e \f \h \l \n \r \s \t \u \v \w
elsif ($after[-1] =~ /\A [\x00-\xFF] \z/x) { } # (a) a{1} [a] a . \012 \x12 \o{12} \g{1}
elsif ($after[-1] =~ / [\x00-\xFF] [)}\]] \z/x) { } # (any) any{1} [any]
else { # XBCS
$after[-1] = '(?:' . $after[-1] . ')';
}
push @after, $before;
}
# \x{UTF8hex}
elsif ($before =~ /\A \\x \{ ([0123456789ABCDEFabcdef]+) \} \z/x) {
push @after, UTF8::R2::chr(hex $1);
}
# else
else {
push @after, $before;
}
}
my $after = join '', @after;
return qr/$after/;
}
#---------------------------------------------------------------------
# mb::require() like require(), mb.pm compatible
sub UTF8::R2::require (;$) {
local $_ = @_ ? $_[0] : $_;
# require perl version
if (/^[0-9]/) {
if ($] < $_) {
confess "Perl $_ required--this is only version $], stopped";
}
else {
undef $@;
return 1;
}
}
# require expr
else {
# find expr in @INC
my $file = $_;
if (($file =~ s{::}{/}g) or ($file !~ m{[\./\\]})) {
$file .= '.pm';
}
if (exists $INC{$file}) {
undef $@;
return 1 if $INC{$file};
confess "Compilation failed in require";
}
for my $prefix_file ($file, map { "$_/$file" } @INC) {
if (-f $prefix_file) {
$INC{$_} = $prefix_file;
# run as Perl script
# must use CORE::do to use <DATA>, because CORE::eval cannot do it.
local $@;
my $result = CORE::eval sprintf(<<'END', (caller)[0, 2, 1]);
package %s;
#line %s "%s"
CORE::do "$prefix_file";
END
# return result
if ($@) {
$INC{$_} = undef;
confess $@;
}
elsif (not $result) {
delete $INC{$_};
confess "$_ did not return true value";
}
else {
return $result;
}
}
}
confess "Can't find $_ in \@INC";
}
}
#---------------------------------------------------------------------
# reverse() for UTF-8 codepoint string
sub UTF8::R2::reverse (@) {
# in list context,
if (wantarray) {
# returns a list value consisting of the elements of @_ in the opposite order
return CORE::reverse @_;
}
# in scalar context,
else {
# returns a string value with all characters in the opposite order of
return (join '',
CORE::reverse(
@_ ?
join('', @_) =~ /\G$x/g : # concatenates the elements of @_
/\G$x/g # $_ when without arguments
)
);
}
}
#---------------------------------------------------------------------
# rindex() for UTF-8 codepoint string
sub UTF8::R2::rindex ($$;$) {
my $rindex = 0;
if (@_ == 3) {
$rindex = CORE::rindex $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2]));
}
else {
$rindex = CORE::rindex $_[0], $_[1];
}
if ($rindex == -1) {
return -1;
( run in 1.003 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )