perl
view release on metacpan or search on metacpan
t/re/pat_advanced.t view on Meta::CPAN
# Test named commits and the $REGERROR var
my $message = '$REGERROR';
local $REGERROR;
for my $word (qw (bar baz bop)) {
$REGERROR = "";
"aaaaa$word" =~
/a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/;
is($REGERROR, $word, $message);
}
}
{
#Mindnumbingly simple test of (*THEN)
for ("ABC","BAX") {
ok /A (*THEN) X | B (*THEN) C/x, "Simple (*THEN) test";
}
}
{
my $message = "Relative Recursion";
my $parens = qr/(\((?:[^()]++|(?-1))*+\))/;
local $_ = 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))';
my ($all, $one, $two) = ('', '', '');
ok(m/foo $parens \s* \+ \s* bar $parens/x, $message);
is($1, '((2*3)+4-3)', $message);
is($2, '(2*(3+4)-1*(2-3))', $message);
is($&, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))', $message);
is($&, $_, $message);
}
{
my $spaces=" ";
local $_ = join 'bar', $spaces, $spaces;
our $count = 0;
s/(?>\s+bar)(?{$count++})//g;
is($_, $spaces, "SUSPEND final string");
is($count, 1, "Optimiser should have prevented more than one match");
}
{
# From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus>
my $dow_name = "nada";
my $parser = "(\$dow_name) = \$time_string =~ /(D\x{e9}\\ " .
"C\x{e9}adaoin|D\x{e9}\\ Sathairn|\\w+|\x{100})/";
my $time_string = "D\x{e9} C\x{e9}adaoin";
eval $parser;
ok !$@, "Test Eval worked";
is($dow_name, $time_string, "UTF-8 trie common prefix extraction");
}
{
my $v;
($v = 'bar') =~ /(\w+)/g;
$v = 'foo';
is("$1", 'bar',
'$1 is safe after /g - may fail due to specialized config in pp_hot.c');
}
{
my $message = "http://nntp.perl.org/group/perl.perl5.porters/118663";
my $qr_barR1 = qr/(bar)\g-1/;
like("foobarbarxyz", $qr_barR1, $message);
like("foobarbarxyz", qr/foo${qr_barR1}xyz/, $message);
like("foobarbarxyz", qr/(foo)${qr_barR1}xyz/, $message);
like("foobarbarxyz", qr/(foo)(bar)\g{-1}xyz/, $message);
like("foobarbarxyz", qr/(foo${qr_barR1})xyz/, $message);
like("foobarbarxyz", qr/(foo(bar)\g{-1})xyz/, $message);
}
{
my $message = '$REGMARK';
our @r = ();
local $REGMARK;
local $REGERROR;
like('foofoo', qr/foo (*MARK:foo) (?{push @r,$REGMARK}) /x, $message);
is("@r","foo", $message);
is($REGMARK, "foo", $message);
unlike('foofoo', qr/foo (*MARK:foo) (*FAIL) /x, $message);
is($REGMARK, '', $message);
is($REGERROR, 'foo', $message);
}
{
my $message = '\K test';
my $x;
$x = "abc.def.ghi.jkl";
$x =~ s/.*\K\..*//;
is($x, "abc.def.ghi", $message);
$x = "one two three four";
$x =~ s/o+ \Kthree//g;
is($x, "one two four", $message);
$x = "abcde";
$x =~ s/(.)\K/$1/g;
is($x, "aabbccddee", $message);
}
{
sub kt {
return '4' if $_[0] eq '09028623';
}
# Nested EVAL using PL_curpm (via $1 or friends)
my $re;
our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x;
$re = qr/^ ( (??{ $grabit }) ) $ /x;
my @res = '0902862349' =~ $re;
is(join ("-", @res), "0902862349",
'PL_curpm is set properly on nested eval');
our $qr = qr/ (o) (??{ $1 }) /x;
ok 'boob'=~/( b (??{ $qr }) b )/x && 1, "PL_curpm, nested eval";
}
{
use charnames ":full";
like "\N{ROMAN NUMERAL ONE}", qr/\p{Alphabetic}/, "I =~ Alphabetic";
like "\N{ROMAN NUMERAL ONE}", qr/\p{Uppercase}/, "I =~ Uppercase";
unlike "\N{ROMAN NUMERAL ONE}", qr/\p{Lowercase}/, "I !~ Lowercase";
like "\N{ROMAN NUMERAL ONE}", qr/\p{IDStart}/, "I =~ ID_Start";
like "\N{ROMAN NUMERAL ONE}", qr/\p{IDContinue}/, "I =~ ID_Continue";
like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Alphabetic}/, "i =~ Alphabetic";
unlike "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Uppercase}/, "i !~ Uppercase";
like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Uppercase}/i, "i =~ Uppercase under /i";
unlike "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Titlecase}/, "i !~ Titlecase";
like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Titlecase}/i, "i =~ Titlecase under /i";
like "\N{ROMAN NUMERAL ONE}", qr/\p{Lowercase}/i, "I =~ Lowercase under /i";
t/re/pat_advanced.t view on Meta::CPAN
fresh_perl_like('qr/\p{upper:]}|\337(?|ss)|)(?0/',
qr/Unicode property wildcard not terminated/,
{},
"Assertion failure with single character wildcard");
}
{ # [perl #134034] Previously assertion failure
fresh_perl_is('use utf8; q!Ȧिमíê¸ð£Î¢á§ááá¡áá!=~/(?li)\b{wb}\B(*COMMIT)0/;',
"", {}, "*COMMIT caused positioning beyond EOS");
}
{ # [GH #17486] Previously assertion failure
fresh_perl_is('0=~/(?iaa)ss\337(?0)|/',
"", {}, "EXACTFUP node isn't changed into something else");
}
{ # [GH #17593]
fresh_perl_is('qr/((?+2147483647))/',
"Invalid reference to group in regex; marked by <--"
. " HERE in m/((?+2147483647) <-- HERE )/ at - line 1.",
{}, "integer overflow, undefined behavior in ASAN");
fresh_perl_is('qr/((?-2147483647))/',
"Reference to nonexistent group in regex; marked by <--"
. " HERE in m/((?-2147483647) <-- HERE )/ at - line 1.",
{}, "Large negative relative capture group");
fresh_perl_is('qr/((?+18446744073709551615))/',
"Invalid reference to group in regex; marked by <--"
. " HERE in m/((?+18446744073709551615 <-- HERE ))/ at -"
. " line 1.",
{}, "Too large relative group number");
fresh_perl_is('qr/((?-18446744073709551615))/',
"Invalid reference to group in regex; marked by <--"
. " HERE in m/((?-18446744073709551615 <-- HERE ))/ at -"
. " line 1.",
{}, "Too large negative relative group number");
}
{ # GH #17734, ASAN use after free
fresh_perl_like('no warnings "experimental::uniprop_wildcards";
my $re = q<[[\p{name=/[Y-]+Z/}]]>;
eval { "\N{BYZANTINE MUSICAL SYMBOL PSILI}"
=~ /$re/ }; print $@ if $@; print "Done\n";',
qr/Done/,
{}, "GH #17734");
}
{ # GH $17278 assertion fails
fresh_perl_is('use locale;
my $A_grave = "\N{LATIN CAPITAL LETTER A WITH GRAVE}";
my $a_grave = "\N{LATIN SMALL LETTER A WITH GRAVE}";
my $z="q!$a_grave! =~ m!(?^i)[$A_grave]!";
print eval $z, "\n";',
1,
{}, "GH #17278");
}
for my $try ( 1 .. 10 ) {
# GH $19350 assertion fails - run 10 times as this bug is a heisenbug
# and does not always fail, but should fail at least once in 10 tries.
fresh_perl_is('use re Debug=>"ALL";qr{(?{a})(?<b>\g{c}})',
<<'EOF_DEBUG_OUT',
Assembling pattern from 2 elements
Compiling REx "(?{a})(?<b>\g{c}"
Starting parse and generation
<(?{a})(?<b>>...| 1| reg
| | brnc
| | piec
| | atom
<?{a})(?<b>\>...| | reg
<(?<b>\g{c}> | 4| piec
| | atom
<?<b>\g{c}> | | reg
| | Setting open paren #1 to 4
<\g{c}> | 6| brnc
| | piec
| | atom
<> | 9| tail~ OPEN1 'b' (4) -> REFN
| | Setting close paren #1 to 9
| 11| lsbr~ tying lastbr REFN <1> (6) to ender CLOSE1 'b' (9) offset 3
| | tail~ REFN <1> (6) -> CLOSE
Unmatched ( in regex; marked by <-- HERE in m/(?{a})( <-- HERE ?<b>\g{c}/ at - line 1.
Freeing REx: "(?{a})(?<b>\g{c}"
EOF_DEBUG_OUT
{rtrim_result=>1},
"Github Issue #19350, assert fail in "
. "Debug => 'ALL' from malformed qr// (heisenbug try $try)");
}
{ # Related to GH $19350 but segfaults instead of asserts, and does so reliably, not randomly.
# use re Debug => "PARSE" is similar to "ALL", but does not include the optimize info, so we
# do not need to deal with normlazing memory addresses in the output.
fresh_perl_is(
'use re Debug=>"PARSE";qr{(?<b>\g{c})(?<c>x)(?&b)}',
<<'EOF_DEBUG_OUT',
Assembling pattern from 1 elements
Compiling REx "(?<b>\g{c})(?<c>x)(?&b)"
Starting parse and generation
<(?<b>\g{c})>...| 1| reg
| | brnc
| | piec
| | atom
<?<b>\g{c})(>...| | reg
<\g{c})(?<c>>...| 3| brnc
| | piec
| | atom
<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN
| 8| lsbr~ tying lastbr REFN <1> (3) to ender CLOSE1 'b' (6) offset 3
| | tail~ REFN <1> (3) -> CLOSE
<(?<c>x)(?&b)> | | piec
| | atom
<?<c>x)(?&b)> | | reg
<x)(?&b)> | 10| brnc
| | piec
| | atom
<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT
| 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
| | tail~ EXACT <x> (10) -> CLOSE
<(?&b)> | | tail~ OPEN1 'b' (1)
| | ~ REFN <1> (3)
| | ~ CLOSE1 'b' (6) -> OPEN
| | piec
| | atom
<?&b)> | | reg
<> | 17| tail~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12) -> GOSUB
| 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
| | tail~ OPEN1 'b' (1)
| | ~ REFN <1> (3)
| | ~ CLOSE1 'b' (6)
| | ~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12)
| | ~ GOSUB1[+0:14] 'b' (14) -> END
Need to redo parse
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
Starting parse and generation
<(?<b>\g{c})>...| 1| reg
| | brnc
| | piec
| | atom
<?<b>\g{c})(>...| | reg
<\g{c})(?<c>>...| 3| brnc
| | piec
| | atom
<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN
| 8| lsbr~ tying lastbr REFN2 'c' <1> (3) to ender CLOSE1 'b' (6) offset 3
| | tail~ REFN2 'c' <1> (3) -> CLOSE
<(?<c>x)(?&b)> | | piec
| | atom
<?<c>x)(?&b)> | | reg
<x)(?&b)> | 10| brnc
| | piec
| | atom
<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT
| 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
| | tail~ EXACT <x> (10) -> CLOSE
<(?&b)> | | tail~ OPEN1 'b' (1)
| | ~ REFN2 'c' <1> (3)
| | ~ CLOSE1 'b' (6) -> OPEN
| | piec
| | atom
<?&b)> | | reg
<> | 17| tail~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12) -> GOSUB
| 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
| | tail~ OPEN1 'b' (1)
| | ~ REFN2 'c' <1> (3)
| | ~ CLOSE1 'b' (6)
| | ~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12)
| | ~ GOSUB1[+0:14] 'b' (14) -> END
Required size 17 nodes
first at 3
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
EOF_DEBUG_OUT
{rtrim_result=>1},
"Related to Github Issue #19350, forward \\g{x} pattern segv under use re Debug => 'PARSE'");
}
{ # perl-security#140, read/write past buffer end
fresh_perl_like('qr/\p{utf8::perl x}/',
qr/Illegal user-defined property name "utf8::perl x" in regex/,
{}, "perl-security#140");
fresh_perl_is('qr/\p{utf8::_perl_surrogate}/', "",
{}, "perl-security#140");
}
{ # GH 20009
my $x = "awesome quotes";
utf8::upgrade($x);
$x =~ s/^[\x{0301}\x{030C}]+//;
}
# !!! NOTE that tests that aren't at all likely to crash perl should go
# a ways above, above these last ones. There's a comment there that, like
# this comment, contains the word 'NOTE'
done_testing();
} # End of sub run_tests
1;
( run in 0.578 second using v1.01-cache-2.11-cpan-1dc43b0fbd2 )