perl

 view release on metacpan or  search on metacpan

t/re/pat_advanced.t  view on Meta::CPAN

        # Test named commits and the $REGERROR var
	my $message = '$REGERROR';
        local $REGERROR;
        for my $word (qw (bar baz bop)) {
            $REGERROR = "";
            "aaaaa$word" =~
              /a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/;
            is($REGERROR, $word, $message);
        }
    }

    {
        #Mindnumbingly simple test of (*THEN)
        for ("ABC","BAX") {
            ok /A (*THEN) X | B (*THEN) C/x, "Simple (*THEN) test";
        }
    }

    {
        my $message = "Relative Recursion";
        my $parens = qr/(\((?:[^()]++|(?-1))*+\))/;
        local $_ = 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))';
        my ($all, $one, $two) = ('', '', '');
        ok(m/foo $parens \s* \+ \s* bar $parens/x, $message);
        is($1, '((2*3)+4-3)', $message);
        is($2, '(2*(3+4)-1*(2-3))', $message);
        is($&, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))', $message);
        is($&, $_, $message);
    }

    {
        my $spaces="      ";
        local $_ = join 'bar', $spaces, $spaces;
        our $count = 0;
        s/(?>\s+bar)(?{$count++})//g;
        is($_, $spaces, "SUSPEND final string");
        is($count, 1, "Optimiser should have prevented more than one match");
    }

    {
        # From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus>
        my $dow_name = "nada";
        my $parser = "(\$dow_name) = \$time_string =~ /(D\x{e9}\\ " .
                     "C\x{e9}adaoin|D\x{e9}\\ Sathairn|\\w+|\x{100})/";
        my $time_string = "D\x{e9} C\x{e9}adaoin";
        eval $parser;
        ok !$@, "Test Eval worked";
        is($dow_name, $time_string, "UTF-8 trie common prefix extraction");
    }

    {
        my $v;
        ($v = 'bar') =~ /(\w+)/g;
        $v = 'foo';
        is("$1", 'bar',
	   '$1 is safe after /g - may fail due to specialized config in pp_hot.c');
    }

    {
        my $message = "http://nntp.perl.org/group/perl.perl5.porters/118663";
        my $qr_barR1 = qr/(bar)\g-1/;
        like("foobarbarxyz", $qr_barR1, $message);
        like("foobarbarxyz", qr/foo${qr_barR1}xyz/, $message);
        like("foobarbarxyz", qr/(foo)${qr_barR1}xyz/, $message);
        like("foobarbarxyz", qr/(foo)(bar)\g{-1}xyz/, $message);
        like("foobarbarxyz", qr/(foo${qr_barR1})xyz/, $message);
        like("foobarbarxyz", qr/(foo(bar)\g{-1})xyz/, $message);
    }

    {
        my $message = '$REGMARK';
        our @r = ();
        local $REGMARK;
        local $REGERROR;
        like('foofoo', qr/foo (*MARK:foo) (?{push @r,$REGMARK}) /x, $message);
        is("@r","foo", $message);
        is($REGMARK, "foo", $message);
        unlike('foofoo', qr/foo (*MARK:foo) (*FAIL) /x, $message);
        is($REGMARK, '', $message);
        is($REGERROR, 'foo', $message);
    }

    {
        my $message = '\K test';
        my $x;
        $x = "abc.def.ghi.jkl";
        $x =~ s/.*\K\..*//;
        is($x, "abc.def.ghi", $message);

        $x = "one two three four";
        $x =~ s/o+ \Kthree//g;
        is($x, "one two  four", $message);

        $x = "abcde";
        $x =~ s/(.)\K/$1/g;
        is($x, "aabbccddee", $message);
    }

    {
        sub kt {
            return '4' if $_[0] eq '09028623';
        }
        # Nested EVAL using PL_curpm (via $1 or friends)
        my $re;
        our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x;
        $re = qr/^ ( (??{ $grabit }) ) $ /x;
        my @res = '0902862349' =~ $re;
        is(join ("-", @res), "0902862349",
	   'PL_curpm is set properly on nested eval');

        our $qr = qr/ (o) (??{ $1 }) /x;
        ok 'boob'=~/( b (??{ $qr }) b )/x && 1, "PL_curpm, nested eval";
    }

    {
        use charnames ":full";
        like "\N{ROMAN NUMERAL ONE}", qr/\p{Alphabetic}/, "I =~ Alphabetic";
        like "\N{ROMAN NUMERAL ONE}", qr/\p{Uppercase}/,  "I =~ Uppercase";
        unlike "\N{ROMAN NUMERAL ONE}", qr/\p{Lowercase}/,  "I !~ Lowercase";
        like "\N{ROMAN NUMERAL ONE}", qr/\p{IDStart}/,    "I =~ ID_Start";
        like "\N{ROMAN NUMERAL ONE}", qr/\p{IDContinue}/, "I =~ ID_Continue";
        like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Alphabetic}/, "i =~ Alphabetic";
        unlike "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Uppercase}/,  "i !~ Uppercase";
        like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Uppercase}/i,  "i =~ Uppercase under /i";
        unlike "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Titlecase}/,  "i !~ Titlecase";
        like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Titlecase}/i,  "i =~ Titlecase under /i";
        like "\N{ROMAN NUMERAL ONE}", qr/\p{Lowercase}/i,  "I =~ Lowercase under /i";

t/re/pat_advanced.t  view on Meta::CPAN

        fresh_perl_like('qr/\p{upper:]}|\337(?|ss)|)(?0/',
                        qr/Unicode property wildcard not terminated/,
                        {},
                        "Assertion failure with single character wildcard");
    }

    {   # [perl #134034]    Previously assertion failure
        fresh_perl_is('use utf8; q!Ȧिम한글💣΢ყაოსაა!=~/(?li)\b{wb}\B(*COMMIT)0/;',
                      "", {}, "*COMMIT caused positioning beyond EOS");
    }

    {   # [GH #17486]    Previously assertion failure
        fresh_perl_is('0=~/(?iaa)ss\337(?0)|/',
                      "", {}, "EXACTFUP node isn't changed into something else");
    }

    {   # [GH #17593]
        fresh_perl_is('qr/((?+2147483647))/',
                      "Invalid reference to group in regex; marked by <--"
                    . " HERE in m/((?+2147483647) <-- HERE )/ at - line 1.",
                      {}, "integer overflow, undefined behavior in ASAN");
        fresh_perl_is('qr/((?-2147483647))/',
                      "Reference to nonexistent group in regex; marked by <--"
                    . " HERE in m/((?-2147483647) <-- HERE )/ at - line 1.",
                      {}, "Large negative relative capture group");
        fresh_perl_is('qr/((?+18446744073709551615))/',
                      "Invalid reference to group in regex; marked by <--"
                    . " HERE in m/((?+18446744073709551615 <-- HERE ))/ at -"
                    . " line 1.",
                      {}, "Too large relative group number");
        fresh_perl_is('qr/((?-18446744073709551615))/',
                      "Invalid reference to group in regex; marked by <--"
                    . " HERE in m/((?-18446744073709551615 <-- HERE ))/ at -"
                    . " line 1.",
                      {}, "Too large negative relative group number");
    }

    {   # GH #17734, ASAN use after free
        fresh_perl_like('no warnings "experimental::uniprop_wildcards";
                         my $re = q<[[\p{name=/[Y-]+Z/}]]>;
                         eval { "\N{BYZANTINE MUSICAL SYMBOL PSILI}"
                                =~ /$re/ }; print $@ if $@; print "Done\n";',
                         qr/Done/,
                         {}, "GH #17734");
    }

    {   # GH $17278 assertion fails
        fresh_perl_is('use locale;
                       my $A_grave = "\N{LATIN CAPITAL LETTER A WITH GRAVE}";
                       my $a_grave = "\N{LATIN SMALL LETTER A WITH GRAVE}";

                       my $z="q!$a_grave! =~ m!(?^i)[$A_grave]!";
                       print eval $z, "\n";',
                       1,
                       {}, "GH #17278");
    }
    
    for my $try ( 1 .. 10 ) {
        # GH $19350 assertion fails - run 10 times as this bug is a heisenbug
        # and does not always fail, but should fail at least once in 10 tries.
        fresh_perl_is('use re Debug=>"ALL";qr{(?{a})(?<b>\g{c}})',
                      <<'EOF_DEBUG_OUT',
Assembling pattern from 2 elements
Compiling REx "(?{a})(?<b>\g{c}"
Starting parse and generation
<(?{a})(?<b>>...|   1|  reg    
                |    |    brnc   
                |    |      piec   
                |    |        atom   
<?{a})(?<b>\>...|    |          reg    
<(?<b>\g{c}>    |   4|      piec   
                |    |        atom   
<?<b>\g{c}>     |    |          reg    
                |    |            Setting open paren #1 to 4
<\g{c}>         |   6|            brnc   
                |    |              piec   
                |    |                atom   
<>              |   9|            tail~ OPEN1 'b' (4) -> REFN
                |    |            Setting close paren #1 to 9
                |  11|          lsbr~ tying lastbr REFN <1> (6) to ender CLOSE1 'b' (9) offset 3
                |    |            tail~ REFN <1> (6) -> CLOSE
Unmatched ( in regex; marked by <-- HERE in m/(?{a})( <-- HERE ?<b>\g{c}/ at - line 1.
Freeing REx: "(?{a})(?<b>\g{c}"
EOF_DEBUG_OUT
                      {rtrim_result=>1},
                      "Github Issue #19350, assert fail in "
                          . "Debug => 'ALL' from malformed qr// (heisenbug try $try)");
    }
    {   # Related to GH $19350 but segfaults instead of asserts, and does so reliably, not randomly.
        # use re Debug => "PARSE" is similar to "ALL", but does not include the optimize info, so we
        # do not need to deal with normlazing memory addresses in the output.
        fresh_perl_is(
                      'use re Debug=>"PARSE";qr{(?<b>\g{c})(?<c>x)(?&b)}',
                      <<'EOF_DEBUG_OUT',
Assembling pattern from 1 elements
Compiling REx "(?<b>\g{c})(?<c>x)(?&b)"
Starting parse and generation
<(?<b>\g{c})>...|   1|  reg    
                |    |    brnc   
                |    |      piec   
                |    |        atom   
<?<b>\g{c})(>...|    |          reg    
<\g{c})(?<c>>...|   3|            brnc   
                |    |              piec   
                |    |                atom   
<)(?<c>x)(?&b)> |   6|            tail~ OPEN1 'b' (1) -> REFN
                |   8|          lsbr~ tying lastbr REFN <1> (3) to ender CLOSE1 'b' (6) offset 3
                |    |            tail~ REFN <1> (3) -> CLOSE
<(?<c>x)(?&b)>  |    |      piec   
                |    |        atom   
<?<c>x)(?&b)>   |    |          reg    
<x)(?&b)>       |  10|            brnc
                |    |              piec   
                |    |                atom   
<)(?&b)>        |  12|            tail~ OPEN2 'c' (8) -> EXACT
                |  14|          lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
                |    |            tail~ EXACT <x> (10) -> CLOSE
<(?&b)>         |    |      tail~ OPEN1 'b' (1)  
                |    |          ~ REFN <1> (3)
                |    |          ~ CLOSE1 'b' (6) -> OPEN
                |    |      piec   
                |    |        atom   
<?&b)>          |    |          reg    
<>              |  17|      tail~ OPEN2 'c' (8)
                |    |          ~ EXACT <x> (10)
                |    |          ~ CLOSE2 'c' (12) -> GOSUB
                |  18|  lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
                |    |    tail~ OPEN1 'b' (1)  
                |    |        ~ REFN <1> (3)
                |    |        ~ CLOSE1 'b' (6)
                |    |        ~ OPEN2 'c' (8)
                |    |        ~ EXACT <x> (10)
                |    |        ~ CLOSE2 'c' (12)
                |    |        ~ GOSUB1[+0:14] 'b' (14) -> END
Need to redo parse
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
Starting parse and generation
<(?<b>\g{c})>...|   1|  reg    
                |    |    brnc   
                |    |      piec   
                |    |        atom   
<?<b>\g{c})(>...|    |          reg    
<\g{c})(?<c>>...|   3|            brnc   
                |    |              piec   
                |    |                atom   
<)(?<c>x)(?&b)> |   6|            tail~ OPEN1 'b' (1) -> REFN
                |   8|          lsbr~ tying lastbr REFN2 'c' <1> (3) to ender CLOSE1 'b' (6) offset 3
                |    |            tail~ REFN2 'c' <1> (3) -> CLOSE
<(?<c>x)(?&b)>  |    |      piec   
                |    |        atom   
<?<c>x)(?&b)>   |    |          reg    
<x)(?&b)>       |  10|            brnc
                |    |              piec   
                |    |                atom   
<)(?&b)>        |  12|            tail~ OPEN2 'c' (8) -> EXACT
                |  14|          lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
                |    |            tail~ EXACT <x> (10) -> CLOSE
<(?&b)>         |    |      tail~ OPEN1 'b' (1)  
                |    |          ~ REFN2 'c' <1> (3)
                |    |          ~ CLOSE1 'b' (6) -> OPEN
                |    |      piec   
                |    |        atom   
<?&b)>          |    |          reg    
<>              |  17|      tail~ OPEN2 'c' (8)
                |    |          ~ EXACT <x> (10)
                |    |          ~ CLOSE2 'c' (12) -> GOSUB
                |  18|  lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
                |    |    tail~ OPEN1 'b' (1)  
                |    |        ~ REFN2 'c' <1> (3)
                |    |        ~ CLOSE1 'b' (6)
                |    |        ~ OPEN2 'c' (8)
                |    |        ~ EXACT <x> (10)
                |    |        ~ CLOSE2 'c' (12)
                |    |        ~ GOSUB1[+0:14] 'b' (14) -> END
Required size 17 nodes
first at 3
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
EOF_DEBUG_OUT
                      {rtrim_result=>1},
                      "Related to Github Issue #19350, forward \\g{x} pattern segv under use re Debug => 'PARSE'");
    }

    {   # perl-security#140, read/write past buffer end
        fresh_perl_like('qr/\p{utf8::perl x}/',
                        qr/Illegal user-defined property name "utf8::perl x" in regex/,
                        {}, "perl-security#140");
        fresh_perl_is('qr/\p{utf8::_perl_surrogate}/', "",
                        {}, "perl-security#140");
    }

    {   # GH 20009
        my $x = "awesome quotes";
        utf8::upgrade($x);
        $x =~ s/^[\x{0301}\x{030C}]+//;
    }


    # !!! NOTE that tests that aren't at all likely to crash perl should go
    # a ways above, above these last ones.  There's a comment there that, like
    # this comment, contains the word 'NOTE'

    done_testing();
} # End of sub run_tests

1;



( run in 0.334 second using v1.01-cache-2.11-cpan-b888b73be4d )