perl

 view release on metacpan or  search on metacpan

regcomp.c  view on Meta::CPAN

                                     would be a bug if these returned
                                     non-portables */
                           (bool) RExC_strict,
                           TRUE, /* Allow an optimized regnode result */
                           NULL);
            RETURN_FAIL_ON_RESTART_FLAGP(flagp);
            /* regclass() can only return RESTART_PARSE and NEED_UTF8 if
             * multi-char folds are allowed.  */
            if (!ret)
                FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                      (UV) *flagp);

            RExC_parse--;   /* regclass() leaves this one too far ahead */

          finish_meta_pat:
                   /* The escapes above that don't take a parameter can't be
                    * followed by a '{'.  But 'pX', 'p{foo}' and
                    * correspondingly 'P' can be */
            if (   RExC_parse - atom_parse_start == 1
                && UCHARAT(RExC_parse + 1) == '{'
                && UNLIKELY(! regcurly(RExC_parse + 1, RExC_end, NULL)))
            {
                RExC_parse_inc_by(2);
                vFAIL("Unescaped left brace in regex is illegal here");
            }
            nextchar(pRExC_state);
            break;
        case 'N':
            /* Handle \N, \N{} and \N{NAMED SEQUENCE} (the latter meaning the
             * \N{...} evaluates to a sequence of more than one code points).
             * The function call below returns a regnode, which is our result.
             * The parameters cause it to fail if the \N{} evaluates to a
             * single code point; we handle those like any other literal.  The
             * reason that the multicharacter case is handled here and not as
             * part of the EXACtish code is because of quantifiers.  In
             * /\N{BLAH}+/, the '+' applies to the whole thing, and doing it
             * this way makes that Just Happen. dmq.
             * join_exact() will join this up with adjacent EXACTish nodes
             * later on, if appropriate. */
            ++RExC_parse;
            if (grok_bslash_N(pRExC_state,
                              &ret,     /* Want a regnode returned */
                              NULL,     /* Fail if evaluates to a single code
                                           point */
                              NULL,     /* Don't need a count of how many code
                                           points */
                              flagp,
                              RExC_strict,
                              depth)
            ) {
                break;
            }

            RETURN_FAIL_ON_RESTART_FLAGP(flagp);

            /* Here, evaluates to a single code point.  Go get that */
            RExC_parse_set(atom_parse_start);
            goto defchar;

        case 'k':    /* Handle \k<NAME> and \k'NAME' and \k{NAME} */
      parse_named_seq:  /* Also handle non-numeric \g{...} */
        {
            char ch;
            if (   RExC_parse >= RExC_end - 1
                || ((   ch = RExC_parse[1]) != '<'
                                      && ch != '\''
                                      && ch != '{'))
            {
                RExC_parse_inc_by(1);
                /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
                vFAIL2("Sequence %.2s... not terminated", atom_parse_start);
            } else {
                RExC_parse_inc_by(2);
                if (ch == '{') {
                    while (isBLANK(*RExC_parse)) {
                        RExC_parse_inc_by(1);
                    }
                }
                ret = handle_named_backref(pRExC_state,
                                           flagp,
                                           atom_parse_start,
                                           (ch == '<')
                                           ? '>'
                                           : (ch == '{')
                                             ? '}'
                                             : '\'');
            }
            break;
        }
        case 'g':
        case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            {
                I32 num;
                char * endbrace = NULL;
                char * s = RExC_parse;
                char * e = RExC_end;

                if (*s == 'g') {
                    bool isrel = 0;

                    s++;
                    if (*s == '{') {
                        endbrace = (char *) memchr(s, '}', RExC_end - s);
                        if (! endbrace ) {

                            /* Missing '}'.  Position after the number to give
                             * a better indication to the user of where the
                             * problem is. */
                            s++;
                            if (*s == '-') {
                                s++;
                            }

                            /* If it looks to be a name and not a number, go
                             * handle it there */
                            if (! isDIGIT(*s)) {
                                goto parse_named_seq;
                            }

                            do {
                                s++;
                            } while isDIGIT(*s);

                            RExC_parse_set(s);
                            vFAIL("Unterminated \\g{...} pattern");
                        }

                        s++;    /* Past the '{' */

                        while (isBLANK(*s)) {
                            s++;
                        }

                        /* Ignore trailing blanks */
                        e = endbrace;
                        while (s < e && isBLANK(*(e - 1))) {
                            e--;
                        }
                    }

                    /* Here, have isolated the meat of the construct from any
                     * surrounding braces */

                    if (*s == '-') {
                        isrel = 1;
                        s++;
                    }

                    if (endbrace && !isDIGIT(*s)) {
                        goto parse_named_seq;
                    }

                    RExC_parse_set(s);
                    num = S_backref_value(RExC_parse, RExC_end);
                    if (num == 0)
                        vFAIL("Reference to invalid group 0");
                    else if (num == I32_MAX) {
                         if (isDIGIT(*RExC_parse))
                            vFAIL("Reference to nonexistent group");
                        else
                            vFAIL("Unterminated \\g... pattern");
                    }

                    if (isrel) {
                        num = RExC_npar - num;
                        if (num < 1)
                            vFAIL("Reference to nonexistent or unclosed group");
                    }
                    else
                    if (num < RExC_logical_npar) {
                        num = RExC_logical_to_parno[num];
                    }
                    else
                    if (ALL_PARENS_COUNTED)  {
                        if (num < RExC_logical_total_parens)
                            num = RExC_logical_to_parno[num];
                        else {
                            num = -1;
                        }
                    }
                    else{
                        REQUIRE_PARENS_PASS;
                    }
                }
                else {
                    num = S_backref_value(RExC_parse, RExC_end);
                    /* bare \NNN might be backref or octal - if it is larger
                     * than or equal RExC_npar then it is assumed to be an
                     * octal escape. Note RExC_npar is +1 from the actual
                     * number of parens. */
                    /* Note we do NOT check if num == I32_MAX here, as that is
                     * handled by the RExC_npar check */

                    if (    /* any numeric escape < 10 is always a backref */
                           num > 9
                            /* any numeric escape < RExC_npar is a backref */
                        && num >= RExC_logical_npar
                            /* cannot be an octal escape if it starts with [89]
                             * */
                        && ! inRANGE(*RExC_parse, '8', '9')
                    ) {
                        /* Probably not meant to be a backref, instead likely
                         * to be an octal character escape, e.g. \35 or \777.
                         * The above logic should make it obvious why using
                         * octal escapes in patterns is problematic. - Yves */
                        RExC_parse_set(atom_parse_start);
                        goto defchar;
                    }
                    if (num < RExC_logical_npar) {
                        num = RExC_logical_to_parno[num];
                    }
                    else
                    if (ALL_PARENS_COUNTED) {
                        if (num < RExC_logical_total_parens) {
                            num = RExC_logical_to_parno[num];
                        } else {
                            num = -1;
                        }
                    } else {
                        REQUIRE_PARENS_PASS;
                    }
                }

                /* At this point RExC_parse points at a numeric escape like
                 * \12 or \88 or the digits in \g{34} or \g34 or something
                 * similar, which we should NOT treat as an octal escape. It
                 * may or may not be a valid backref escape. For instance
                 * \88888888 is unlikely to be a valid backref.
                 *
                 * We've already figured out what value the digits represent.
                 * Now, move the parse to beyond them. */
                if (endbrace) {
                    RExC_parse_set(endbrace + 1);
                }
                else while (isDIGIT(*RExC_parse)) {
                    RExC_parse_inc_by(1);
                }
                if (num < 0)
                    vFAIL("Reference to nonexistent group");

                if (num >= (I32)RExC_npar) {
                    /* It might be a forward reference; we can't fail until we
                     * know, by completing the parse to get all the groups, and
                     * then reparsing */
                    if (ALL_PARENS_COUNTED)  {
                        if (num >= RExC_total_parens)  {
                            vFAIL("Reference to nonexistent group");
                        }
                    }
                    else {
                        REQUIRE_PARENS_PASS;
                    }
                }
                RExC_sawback = 1;
                ret = reg2node(pRExC_state,
                               ((! FOLD)
                                 ? REF
                                 : (ASCII_FOLD_RESTRICTED)
                                   ? REFFA
                                   : (AT_LEAST_UNI_SEMANTICS)
                                     ? REFFU
                                     : (LOC)
                                       ? REFFL
                                       : REFF),
                                num, RExC_nestroot);
                if (RExC_nestroot && num >= RExC_nestroot)
                    FLAGS(REGNODE_p(ret)) = VOLATILE_REF;
                if (OP(REGNODE_p(ret)) == REFF) {
                    RExC_seen_d_op = TRUE;
                }
                *flagp |= HASWIDTH;

                skip_to_be_ignored_text(pRExC_state, &RExC_parse,
                                        FALSE /* Don't force to /x */ );
            }
            break;
        case '\0':
            if (RExC_parse >= RExC_end)
                FAIL("Trailing \\");
            /* FALLTHROUGH */
        default:
            /* Do not generate "unrecognized" warnings here, we fall
               back into the quick-grab loop below */
            RExC_parse_set(atom_parse_start);
            goto defchar;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 0.535 second using v1.00-cache-2.02-grep-82fe00e-cpan-1925d2aa809 )