Affix

 view release on metacpan or  search on metacpan

infix/src/core/signature.c  view on Meta::CPAN

 * SPDX-License-Identifier: (Artistic-2.0 OR MIT)
 *
 * The documentation blocks within this file are licensed under the
 * Creative Commons Attribution 4.0 International License (CC BY 4.0).
 *
 * SPDX-License-Identifier: CC-BY-4.0
 */
/**
 * @file signature.c
 * @brief Implements the `infix` signature string parser and type printer.
 * @ingroup internal_core
 *
 * @details This module is responsible for two key functionalities that form the
 * user-facing API of the library:
 *
 * 1.  **Parsing:** It contains a hand-written recursive descent parser that transforms a
 *     human-readable signature string (e.g., `"({int, *char}) -> void"`) into an
 *     unresolved `infix_type` object graph. This is the **"Parse"** stage of the core
 *     data pipeline. The internal entry point for the "Parse" stage is `_infix_parse_type_internal`.
 *
 * 2.  **Printing:** It provides functions to serialize a fully resolved `infix_type`
 *     graph back into a canonical signature string. This is crucial for introspection,
 *     debugging, and verifying the library's understanding of a type.
 *
 * The public functions `infix_type_from_signature` and `infix_signature_parse`
 * are high-level orchestrators. They manage the entire **"Parse -> Copy -> Resolve -> Layout"**
 * pipeline, providing the user with a fully validated, self-contained, and ready-to-use
 * type object that is safe to use for the lifetime of its returned arena.
 */
#include "common/infix_internals.h"
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/** @internal A thread-local pointer to the full signature string being parsed, used by `error.c` for rich error
 * reporting. */
extern INFIX_TLS const char * g_infix_last_signature_context;
/** @internal A safeguard against stack overflows from malicious or deeply nested signatures (e.g., `{{{{...}}}}`). */
#define MAX_RECURSION_DEPTH 32
static infix_status parse_function_signature_details(parser_state * state,
                                                     infix_type ** out_ret_type,
                                                     infix_function_argument ** out_args,
                                                     size_t * out_num_args,
                                                     size_t * out_num_fixed_args);
// Parser Helper Functions
/**
 * @internal
 * @brief Sets a detailed parser error, capturing the current position in the string.
 * @param[in,out] state The current parser state.
 * @param[in] code The error code to set.
 */
INFIX_INTERNAL void _infix_set_parser_error(parser_state * state, infix_error_code_t code) {
    _infix_set_error(INFIX_CATEGORY_PARSER, code, (size_t)(state->p - state->start));
}
INFIX_INTERNAL void skip_whitespace(parser_state * state);

/**
 * @internal
 * @brief Advances the parser's cursor past any whitespace or C-style line comments.
 * @param[in,out] state The parser state to modify.
 */
INFIX_INTERNAL void skip_whitespace(parser_state * state) {
    while (true) {
        while (isspace((unsigned char)*state->p))
            state->p++;
        if (*state->p == '#')  // C-style line comments
            while (*state->p != '\n' && *state->p != '\0')
                state->p++;
        else
            break;
    }
}
/**
 * @internal
 * @brief Parses an unsigned integer from the string, used for array/vector sizes.
 * @param[in,out] state The parser state.
 * @param[out] out_val A pointer to store the parsed value.
 * @return `true` on success, `false` on failure.
 */
static bool parse_size_t(parser_state * state, size_t * out_val) {
    const char * start = state->p;
    char * end;
    errno = 0;  // Reset errno before call
    unsigned long long val = strtoull(start, &end, 10);

    // Check for no conversion (end==start) OR overflow (ERANGE)
    if (end == start || errno == ERANGE) {
        // Use INTEGER_OVERFLOW code for range errors
        _infix_set_parser_error(state, errno == ERANGE ? INFIX_CODE_INTEGER_OVERFLOW : INFIX_CODE_UNEXPECTED_TOKEN);
        return false;
    }

    // Check for truncation if size_t is smaller than unsigned long long (e.g. 32-bit builds)
    if (val > SIZE_MAX) {
        _infix_set_parser_error(state, INFIX_CODE_INTEGER_OVERFLOW);
        return false;
    }
    *out_val = (size_t)val;
    state->p = end;
    return true;
}
/**
 * @internal
 * @brief Parses a C-style identifier from the string.
 * @details This is used for member names, named types, and function argument names.
 * It handles simple identifiers (`my_var`) and C++-style namespaces (`NS::Name`).
 * @param[in,out] state The parser state.
 * @return An arena-allocated string for the identifier, or `nullptr` on failure.
 */
static const char * parse_identifier(parser_state * state) {
    skip_whitespace(state);
    const char * start = state->p;
    if (!isalpha((unsigned char)*start) && *start != '_')
        return nullptr;
    while (isalnum((unsigned char)*state->p) || *state->p == '_' || *state->p == ':') {
        if (*state->p == ':' && state->p[1] != ':')
            break;  // A single ':' is not part of an identifier.
        if (*state->p == ':')
            state->p++;  // Consume first ':' of '::'
        state->p++;
    }
    size_t len = state->p - start;
    if (len == 0)
        return nullptr;
    char * name = infix_arena_calloc(state->arena, 1, len + 1, 1);
    if (!name) {
        _infix_set_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_OUT_OF_MEMORY, (size_t)(state->p - state->start));
        return nullptr;
    }
    infix_memcpy((void *)name, start, len);
    name[len] = '\0';
    return name;
}
/**
 * @internal
 * @brief Consumes a specific keyword from the string (e.g., "int", "struct").
 * @details This function is careful to match whole words only. For example, it will
 * successfully consume "int" from "int x", but will fail on "integer", preventing
 * false positives.
 * @param[in,out] state The parser state.
 * @param[in] keyword The keyword to consume.
 * @return `true` if the keyword was successfully consumed.
 */
static bool consume_keyword(parser_state * state, const char * keyword) {
    skip_whitespace(state);
    size_t len = strlen(keyword);
    if (strncmp(state->p, keyword, len) == 0) {
        // Ensure it's not a prefix of a longer word (e.g., "int" vs "integer").
        if (isalnum((unsigned char)state->p[len]) || state->p[len] == '_')
            return false;
        state->p += len;
        skip_whitespace(state);
        return true;
    }
    return false;
}
/**
 * @internal
 * @brief Parses an optional named prefix, like `name: type`.
 * @details If a valid identifier is found followed by a colon, the name is returned
 * and the parser's cursor is advanced past the colon. If not, the parser state is
 * rewound to its original position (backtracking) and `nullptr` is returned.
 * @param[in,out] state The parser state.
 * @return An arena-allocated string for the name, or `nullptr` if no `name:` prefix is present.
 */
static const char * parse_optional_name_prefix(parser_state * state) {
    skip_whitespace(state);
    // Save the current position in case we need to backtrack.
    const char * p_before = state->p;
    const char * name = parse_identifier(state);
    if (name) {
        skip_whitespace(state);
        if (*state->p == ':') {  // Found "identifier:", so consume the colon and return the name.
            state->p++;
            return name;
        }
    }
    // If it wasn't a `name:`, backtrack to the original position.
    state->p = p_before;
    return nullptr;
}
/**
 * @internal
 * @brief A lookahead function to disambiguate a grouped type `(type)` from a
 *        function signature `(...) -> type`.
 *
 * @details This is a classic parser "lookahead". When the parser encounters an opening
 * parenthesis `(`, it calls this function to peek ahead in the string without
 * consuming any input. By scanning for a matching `)` and checking if it is
 * followed by a `->` token, it can decide whether to parse the content as a
 * single, parenthesized type or as a full function signature.
 *
 * @param[in] state The current parser state (read-only).
 * @return `true` if a `->` token follows the closing parenthesis.
 */
static bool is_function_signature_ahead(const parser_state * state) {
    const char * p = state->p;
    if (*p != '(')
        return false;
    p++;
    // Find the matching ')' by tracking nesting depth.
    int depth = 1;
    while (*p != '\0' && depth > 0) {
        if (*p == '(')
            depth++;
        else if (*p == ')')
            depth--;
        p++;
    }
    if (depth != 0)
        return false;  // Mismatched parentheses.
    // Skip any whitespace or comments after the ')'
    while (isspace((unsigned char)*p) || *p == '#') {
        if (*p == '#')
            while (*p != '\n' && *p != '\0')
                p++;
        else
            p++;
    }
    // Check for the '->' arrow.
    return (p[0] == '-' && p[1] == '>');



( run in 3.411 seconds using v1.01-cache-2.11-cpan-5837b0d9d2c )