Char-UTF2
view release on metacpan or search on metacpan
lib/Eutf2.pm view on Meta::CPAN
last;
}
}
die qq{Unsupported modifier "$1" used at line $line.\n};
}
$slash = 'div';
# literal null string pattern
if ($string eq '') {
$modifier =~ tr/bB//d;
$modifier =~ tr/i//d;
return join '', $ope, $delimiter, $end_delimiter, $modifier;
}
# /b /B modifier
elsif ($modifier =~ tr/bB//d) {
# choice again delimiter
if ($delimiter =~ / [\@:] /oxms) {
my @char = $string =~ /\G ([\x00-\xFF]) /oxmsg;
my %octet = map {$_ => 1} @char;
if (not $octet{')'}) {
$delimiter = '(';
$end_delimiter = ')';
}
elsif (not $octet{'}'}) {
$delimiter = '{';
$end_delimiter = '}';
}
elsif (not $octet{']'}) {
$delimiter = '[';
$end_delimiter = ']';
}
elsif (not $octet{'>'}) {
$delimiter = '<';
$end_delimiter = '>';
}
else {
for my $char (qw( ! " $ % & * + - . / = ? ^ ` | ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
if (not $octet{$char}) {
$delimiter = $char;
$end_delimiter = $char;
last;
}
}
}
}
my $prematch = '';
return join '', $ope, $delimiter, $prematch, '(?:', $string, ')', $matched, $end_delimiter, $modifier;
}
my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
my $metachar = qr/[\@\\|[\]{^]/oxms;
# split regexp
my @char = $string =~ /\G((?>
[^\x80-\xFF\\\$\@\[\(]|(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF...
\\ (?>[1-9][0-9]*) |
\\g (?>\s*) (?>[1-9][0-9]*) |
\\g (?>\s*) \{ (?>\s*) (?>[1-9][0-9]*) (?>\s*) \} |
\\g (?>\s*) \{ (?>\s*) - (?>\s*) (?>[1-9][0-9]*) (?>\s*) \} |
\\x (?>[0-9A-Fa-f]{1,2}) |
\\ (?>[0-7]{2,3}) |
\\c [\x40-\x5F] |
\\x\{ (?>[0-9A-Fa-f]+) \} |
\\o\{ (?>[0-7]+) \} |
\\[bBNpP]\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} |
\\ $q_char |
\$` | \$\{`\} | \$ (?>\s*) PREMATCH | \$ (?>\s*) \{ (?>\s*) PREMATCH (?>\s*) \} | \$ (?>\s*) \{\^PREMATCH\} |
\$& | \$\{&\} | \$ (?>\s*) MATCH | \$ (?>\s*) \{ (?>\s*) MATCH (?>\s*) \} | \$ (?>\s*) \{\^MATCH\} |
\$ (?>\s*) POSTMATCH | \$ (?>\s*) \{ (?>\s*) POSTMATCH (?>\s*) \} | \$ (?>\s*) \{\^POSTMATCH\} |
[\$\@] $qq_variable |
\$ (?>\s* [0-9]+) |
\$ (?>\s*) \{ (?>\s* [0-9]+ \s*) \} |
\$ \$ (?![\w\{]) |
\$ (?>\s*) \$ (?>\s*) $qq_variable |
\[\^ |
\[\: (?>[a-z]+) :\] |
\[\:\^ (?>[a-z]+) :\] |
\(\? |
$q_char
))/oxmsg;
# choice again delimiter
if ($delimiter =~ / [\@:] /oxms) {
my %octet = map {$_ => 1} @char;
if (not $octet{')'}) {
$delimiter = '(';
$end_delimiter = ')';
}
elsif (not $octet{'}'}) {
$delimiter = '{';
$end_delimiter = '}';
}
elsif (not $octet{']'}) {
$delimiter = '[';
$end_delimiter = ']';
}
elsif (not $octet{'>'}) {
$delimiter = '<';
$end_delimiter = '>';
}
else {
for my $char (qw( ! " $ % & * + - . / = ? ^ ` | ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
if (not $octet{$char}) {
$delimiter = $char;
$end_delimiter = $char;
last;
}
}
}
}
# count '('
my $parens = grep { $_ eq '(' } @char;
my $left_e = 0;
my $right_e = 0;
for (my $i=0; $i <= $#char; $i++) {
# "\L\u" --> "\u\L"
lib/Eutf2.pm view on Meta::CPAN
$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '|' . Eutf2::fc($char[$i]) . ')';
}
}
# \u \l \U \L \F \Q \E
elsif ($char[$i] =~ /\A [<>] \z/oxms) {
if ($right_e < $left_e) {
$char[$i] = '\\' . $char[$i];
}
}
elsif ($char[$i] eq '\u') {
$char[$i] = '@{[Eutf2::ucfirst qq<';
$left_e++;
}
elsif ($char[$i] eq '\l') {
$char[$i] = '@{[Eutf2::lcfirst qq<';
$left_e++;
}
elsif ($char[$i] eq '\U') {
$char[$i] = '@{[Eutf2::uc qq<';
$left_e++;
}
elsif ($char[$i] eq '\L') {
$char[$i] = '@{[Eutf2::lc qq<';
$left_e++;
}
elsif ($char[$i] eq '\F') {
$char[$i] = '@{[Eutf2::fc qq<';
$left_e++;
}
elsif ($char[$i] eq '\Q') {
$char[$i] = '@{[CORE::quotemeta qq<';
$left_e++;
}
elsif ($char[$i] eq '\E') {
if ($right_e < $left_e) {
$char[$i] = '>]}';
$right_e++;
}
else {
$char[$i] = '';
}
}
elsif ($char[$i] eq '\Q') {
while (1) {
if (++$i > $#char) {
last;
}
if ($char[$i] eq '\E') {
last;
}
}
}
elsif ($char[$i] eq '\E') {
}
# \0 --> \0
elsif ($char[$i] =~ /\A \\ (?>\s*) 0 \z/oxms) {
}
# \g{N}, \g{-N}
# P.108 Using Simple Patterns
# in Chapter 7: In the World of Regular Expressions
# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
# P.221 Capturing
# in Chapter 5: Pattern Matching
# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
# \g{-1}, \g{-2}, \g{-3} --> \g{-1}, \g{-2}, \g{-3}
elsif ($char[$i] =~ /\A \\g (?>\s*) \{ (?>\s*) - (?>\s*) ((?>[1-9][0-9]*)) (?>\s*) \} \z/oxms) {
}
# \g{1}, \g{2}, \g{3} --> \g{2}, \g{3}, \g{4} (only when multibyte anchoring is enable)
elsif ($char[$i] =~ /\A \\g (?>\s*) \{ (?>\s*) ((?>[1-9][0-9]*)) (?>\s*) \} \z/oxms) {
}
# \g1, \g2, \g3 --> \g2, \g3, \g4 (only when multibyte anchoring is enable)
elsif ($char[$i] =~ /\A \\g (?>\s*) ((?>[1-9][0-9]*)) \z/oxms) {
}
# \1, \2, \3 --> \2, \3, \4 (only when multibyte anchoring is enable)
elsif ($char[$i] =~ /\A \\ (?>\s*) ((?>[1-9][0-9]*)) \z/oxms) {
}
# $0 --> $0
elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
if ($ignorecase) {
$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
}
}
elsif ($char[$i] =~ /\A \$ \{ (?>\s*) 0 (?>\s*) \} \z/oxms) {
if ($ignorecase) {
$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
}
}
# $$ --> $$
elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
}
# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
# $1, $2, $3 --> $1, $2, $3 otherwise
elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
$char[$i] = e_capture($1);
if ($ignorecase) {
$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
}
}
elsif ($char[$i] =~ /\A \$ \{ (?>\s*) ((?>[1-9][0-9]*)) (?>\s*) \} \z/oxms) {
$char[$i] = e_capture($1);
if ($ignorecase) {
$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
}
}
# $$foo[ ... ] --> $ $foo->[ ... ]
elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_]*(?: ::[A-Za-z_][A-Za-z0-9_]*)* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
$char[$i] = e_capture($1.'->'.$2);
if ($ignorecase) {
$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
}
}
# $$foo{ ... } --> $ $foo->{ ... }
elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_]*(?: ::[A-Za-z_][A-Za-z0-9_]*)* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
$char[$i] = e_capture($1.'->'.$2);
if ($ignorecase) {
$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
}
}
# $$foo
elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_]*(?: ::[A-Za-z_][A-Za-z0-9_]*)* )) \z/oxms) {
$char[$i] = e_capture($1);
if ($ignorecase) {
$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
}
}
( run in 0.506 second using v1.01-cache-2.11-cpan-75ffa21a3d4 )