UTF8-R2

 view release on metacpan or  search on metacpan

lib/UTF8/R2.pm  view on Meta::CPAN

    }

    # $^X($EXECUTABLE_NAME) for execute MBCS Perl script
    $UTF8::R2::PERL = $^X;
    $UTF8::R2::PERL = $UTF8::R2::PERL; # to avoid: Name "UTF8::R2::PERL" used only once: possible typo at ...

    # original $0($PROGRAM_NAME)
    $UTF8::R2::ORIG_PROGRAM_NAME = $0;
    $UTF8::R2::ORIG_PROGRAM_NAME = $UTF8::R2::ORIG_PROGRAM_NAME; # to avoid: Name "UTF8::R2::ORIG_PROGRAM_NAME" used only once: possible typo at ...
}

#---------------------------------------------------------------------
# confess() for this module
sub confess {
    my $i = 0;
    my @confess = ();
    while (my($package, $filename, $line, $subroutine) = caller($i)) {
        push @confess, "[$i] $filename($line) $subroutine\n";
        $i++;
    }
    print STDERR "\n", @_, "\n";
    print STDERR CORE::reverse @confess;
    die;
}

#---------------------------------------------------------------------
# chop() for UTF-8 codepoint string
sub UTF8::R2::chop (@) {
    my $chop = '';
    for (@_ ? @_ : $_) {
        if (my @x = /\G$x/g) {
            $chop = pop @x;
            $_ = join '', @x;
        }
    }
    return $chop;
}

#---------------------------------------------------------------------
# chr() for UTF-8 codepoint string
sub UTF8::R2::chr (;$) {
    my $number = @_ ? $_[0] : $_;

# Negative values give the Unicode replacement character (chr(0xfffd)),
# except under the bytes pragma, where the low eight bits of the value
# (truncated to an integer) are used.

    my @octet = ();
    CORE::do {
        unshift @octet, ($number % 0x100);
        $number = int($number / 0x100);
    } while ($number > 0);
    return pack 'C*', @octet;
}

#---------------------------------------------------------------------
# mb::do() like do(), mb.pm compatible
sub UTF8::R2::do ($) {

    # run as Perl script
    return CORE::eval sprintf(<<'END', (caller)[0, 2, 1]);
package %s;
#line %s "%s"
CORE::do "$_[0]";
END
}

#---------------------------------------------------------------------
# mb::eval() like eval(), mb.pm compatible
sub UTF8::R2::eval (;$) {
    local $_ = @_ ? $_[0] : $_;

    # run as Perl script in caller package
    return CORE::eval sprintf(<<'END', (caller)[0, 2, 1], $_);
package %s;
#line %s "%s"
%s
END
}

#---------------------------------------------------------------------
# getc() for UTF-8 codepoint string
sub UTF8::R2::getc (;*) {
    my $fh = @_ ? Symbol::qualify_to_ref($_[0], caller()) : \*STDIN;
    my $getc = CORE::getc $fh;
    if ($getc =~ /\A [\x00-\x7F\x80-\xC1\xF5-\xFF] \z/xms) {
    }
    elsif ($getc =~ /\A [\xC2-\xDF] \z/xms) {
        $getc .= CORE::getc $fh;
    }
    elsif ($getc =~ /\A [\xE0-\xEF] \z/xms) {
        $getc .= CORE::getc $fh;
        $getc .= CORE::getc $fh;
    }
    elsif ($getc =~ /\A [\xF0-\xF4] \z/xms) {
        $getc .= CORE::getc $fh;
        $getc .= CORE::getc $fh;
        $getc .= CORE::getc $fh;
    }
    return $getc;
}

#---------------------------------------------------------------------
# index() for UTF-8 codepoint string
sub UTF8::R2::index ($$;$) {
    my $index = 0;
    if (@_ == 3) {
        $index = CORE::index $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2]));
    }
    else {
        $index = CORE::index $_[0], $_[1];
    }
    if ($index == -1) {
        return -1;
    }
    else {
        return UTF8::R2::length(CORE::substr $_[0], 0, $index);
    }
}

#---------------------------------------------------------------------
# JPerl like index() for UTF-8 codepoint string
sub UTF8::R2::index_byte ($$;$) {
    if (@_ == 3) {
        return CORE::index $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2]));
    }
    else {
        return CORE::index $_[0], $_[1];
    }
}

#---------------------------------------------------------------------
# universal lc() for UTF-8 codepoint string
sub UTF8::R2::lc (;$) {

lib/UTF8/R2.pm  view on Meta::CPAN

            elsif ($after[-1] =~ /\A \\  [\x00-\xFF]        \z/x) { } # \) \} \] \" \0 \1 \D \E \F \G \H \K \L \N \Q \R \S \U \V \W \\ \a \d \e \f \h \l \n \r \s \t \u \v \w
            elsif ($after[-1] =~ /\A     [\x00-\xFF]        \z/x) { } # (a) a{1} [a] a . \012 \x12 \o{12} \g{1}
            elsif ($after[-1] =~ /       [\x00-\xFF] [)}\]] \z/x) { } # (any) any{1} [any]
            else {                                                    # XBCS
                $after[-1] = '(?:' . $after[-1] . ')';
            }
            push @after, $before;
        }

        # \x{UTF8hex}
        elsif ($before =~ /\A \\x \{ ([0123456789ABCDEFabcdef]+) \} \z/x) {
            push @after, UTF8::R2::chr(hex $1);
        }

        # else
        else {
            push @after, $before;
        }
    }

    my $after = join '', @after;
    return qr/$after/;
}

#---------------------------------------------------------------------
# mb::require() like require(), mb.pm compatible
sub UTF8::R2::require (;$) {
    local $_ = @_ ? $_[0] : $_;

    # require perl version
    if (/^[0-9]/) {
        if ($] < $_) {
            confess "Perl $_ required--this is only version $], stopped";
        }
        else {
            undef $@;
            return 1;
        }
    }

    # require expr
    else {

        # find expr in @INC
        my $file = $_;
        if (($file =~ s{::}{/}g) or ($file !~ m{[\./\\]})) {
            $file .= '.pm';
        }
        if (exists $INC{$file}) {
            undef $@;
            return 1 if $INC{$file};
            confess "Compilation failed in require";
        }
        for my $prefix_file ($file, map { "$_/$file" } @INC) {
            if (-f $prefix_file) {
                $INC{$_} = $prefix_file;

                # run as Perl script
                # must use CORE::do to use <DATA>, because CORE::eval cannot do it.
                local $@;
                my $result = CORE::eval sprintf(<<'END', (caller)[0, 2, 1]);
package %s;
#line %s "%s"
CORE::do "$prefix_file";
END

                # return result
                if ($@) {
                    $INC{$_} = undef;
                    confess $@;
                }
                elsif (not $result) {
                    delete $INC{$_};
                    confess "$_ did not return true value";
                }
                else {
                    return $result;
                }
            }
        }
        confess "Can't find $_ in \@INC";
    }
}

#---------------------------------------------------------------------
# reverse() for UTF-8 codepoint string
sub UTF8::R2::reverse (@) {

    # in list context,
    if (wantarray) {

        # returns a list value consisting of the elements of @_ in the opposite order
        return CORE::reverse @_;
    }

    # in scalar context,
    else {

        # returns a string value with all characters in the opposite order of
        return (join '',
            CORE::reverse(
                @_ ?
                join('', @_) =~ /\G$x/g : # concatenates the elements of @_
                /\G$x/g                  # $_ when without arguments
            )
        );
    }
}

#---------------------------------------------------------------------
# rindex() for UTF-8 codepoint string
sub UTF8::R2::rindex ($$;$) {
    my $rindex = 0;
    if (@_ == 3) {
        $rindex = CORE::rindex $_[0], $_[1], CORE::length(UTF8::R2::substr($_[0], 0, $_[2]));
    }
    else {
        $rindex = CORE::rindex $_[0], $_[1];
    }
    if ($rindex == -1) {
        return -1;



( run in 1.003 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )