Affix

 view release on metacpan or  search on metacpan

infix/src/core/signature.c  view on Meta::CPAN

 * SPDX-License-Identifier: (Artistic-2.0 OR MIT)
 *
 * The documentation blocks within this file are licensed under the
 * Creative Commons Attribution 4.0 International License (CC BY 4.0).
 *
 * SPDX-License-Identifier: CC-BY-4.0
 */
/**
 * @file signature.c
 * @brief Implements the `infix` signature string parser and type printer.
 * @ingroup internal_core
 *
 * @details This module is responsible for two key functionalities that form the
 * user-facing API of the library:
 *
 * 1.  **Parsing:** It contains a hand-written recursive descent parser that transforms a
 *     human-readable signature string (e.g., `"({int, *char}) -> void"`) into an
 *     unresolved `infix_type` object graph. This is the **"Parse"** stage of the core
 *     data pipeline. The internal entry point for the "Parse" stage is `_infix_parse_type_internal`.
 *
 * 2.  **Printing:** It provides functions to serialize a fully resolved `infix_type`
 *     graph back into a canonical signature string. This is crucial for introspection,
 *     debugging, and verifying the library's understanding of a type.
 *
 * The public functions `infix_type_from_signature` and `infix_signature_parse`
 * are high-level orchestrators. They manage the entire **"Parse -> Copy -> Resolve -> Layout"**
 * pipeline, providing the user with a fully validated, self-contained, and ready-to-use
 * type object that is safe to use for the lifetime of its returned arena.
 */
#include "common/infix_internals.h"
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/** @internal A thread-local pointer to the full signature string being parsed, used by `error.c` for rich error
 * reporting. */
extern INFIX_TLS const char * g_infix_last_signature_context;
/** @internal A safeguard against stack overflows from malicious or deeply nested signatures (e.g., `{{{{...}}}}`). */
#define MAX_RECURSION_DEPTH 32
static infix_status parse_function_signature_details(parser_state * state,
                                                     infix_type ** out_ret_type,
                                                     infix_function_argument ** out_args,
                                                     size_t * out_num_args,
                                                     size_t * out_num_fixed_args);
// Parser Helper Functions
/**
 * @internal
 * @brief Sets a detailed parser error, capturing the current position in the string.
 * @param[in,out] state The current parser state.
 * @param[in] code The error code to set.
 */
INFIX_INTERNAL void _infix_set_parser_error(parser_state * state, infix_error_code_t code) {
    _infix_set_error(INFIX_CATEGORY_PARSER, code, (size_t)(state->p - state->start));
}
INFIX_INTERNAL void skip_whitespace(parser_state * state);

/**
 * @internal
 * @brief Advances the parser's cursor past any whitespace or C-style line comments.
 * @param[in,out] state The parser state to modify.
 */
INFIX_INTERNAL void skip_whitespace(parser_state * state) {
    while (true) {
        while (isspace((unsigned char)*state->p))
            state->p++;
        if (*state->p == '#')  // C-style line comments
            while (*state->p != '\n' && *state->p != '\0')
                state->p++;
        else
            break;
    }
}
/**
 * @internal
 * @brief Parses an unsigned integer from the string, used for array/vector sizes.
 * @param[in,out] state The parser state.
 * @param[out] out_val A pointer to store the parsed value.
 * @return `true` on success, `false` on failure.
 */
static bool parse_size_t(parser_state * state, size_t * out_val) {
    const char * start = state->p;
    char * end;
    errno = 0;  // Reset errno before call
    unsigned long long val = strtoull(start, &end, 10);

    // Check for no conversion (end==start) OR overflow (ERANGE)
    if (end == start || errno == ERANGE) {
        // Use INTEGER_OVERFLOW code for range errors
        _infix_set_parser_error(state, errno == ERANGE ? INFIX_CODE_INTEGER_OVERFLOW : INFIX_CODE_UNEXPECTED_TOKEN);
        return false;
    }

    // Check for truncation if size_t is smaller than unsigned long long (e.g. 32-bit builds)
    if (val > SIZE_MAX) {
        _infix_set_parser_error(state, INFIX_CODE_INTEGER_OVERFLOW);
        return false;
    }
    *out_val = (size_t)val;
    state->p = end;
    return true;
}
/**
 * @internal
 * @brief Parses a C-style identifier from the string.
 * @details This is used for member names, named types, and function argument names.
 * It handles simple identifiers (`my_var`) and C++-style namespaces (`NS::Name`).
 * @param[in,out] state The parser state.
 * @return An arena-allocated string for the identifier, or `nullptr` on failure.
 */
static const char * parse_identifier(parser_state * state) {
    skip_whitespace(state);
    const char * start = state->p;
    if (!isalpha((unsigned char)*start) && *start != '_')
        return nullptr;
    while (isalnum((unsigned char)*state->p) || *state->p == '_' || *state->p == ':') {
        if (*state->p == ':' && state->p[1] != ':')
            break;  // A single ':' is not part of an identifier.
        if (*state->p == ':')
            state->p++;  // Consume first ':' of '::'
        state->p++;
    }
    size_t len = state->p - start;
    if (len == 0)
        return nullptr;
    char * name = infix_arena_calloc(state->arena, 1, len + 1, 1);
    if (!name) {
        _infix_set_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_OUT_OF_MEMORY, (size_t)(state->p - state->start));
        return nullptr;
    }
    infix_memcpy((void *)name, start, len);
    name[len] = '\0';
    return name;
}
/**
 * @internal
 * @brief Consumes a specific keyword from the string (e.g., "int", "struct").
 * @details This function is careful to match whole words only. For example, it will
 * successfully consume "int" from "int x", but will fail on "integer", preventing
 * false positives.
 * @param[in,out] state The parser state.
 * @param[in] keyword The keyword to consume.
 * @return `true` if the keyword was successfully consumed.
 */
static bool consume_keyword(parser_state * state, const char * keyword) {
    skip_whitespace(state);
    size_t len = strlen(keyword);
    if (strncmp(state->p, keyword, len) == 0) {
        // Ensure it's not a prefix of a longer word (e.g., "int" vs "integer").
        if (isalnum((unsigned char)state->p[len]) || state->p[len] == '_')
            return false;
        state->p += len;
        skip_whitespace(state);
        return true;
    }
    return false;
}
/**
 * @internal
 * @brief Parses an optional named prefix, like `name: type`.
 * @details If a valid identifier is found followed by a colon, the name is returned
 * and the parser's cursor is advanced past the colon. If not, the parser state is
 * rewound to its original position (backtracking) and `nullptr` is returned.
 * @param[in,out] state The parser state.
 * @return An arena-allocated string for the name, or `nullptr` if no `name:` prefix is present.
 */
static const char * parse_optional_name_prefix(parser_state * state) {

infix/src/core/signature.c  view on Meta::CPAN

        return nullptr;
    }
    state->p++;
    infix_type * packed_type = nullptr;
    // For packed structs, the total size is simply the sum of member sizes without padding.
    // The user of `infix_type_create_packed_struct` must provide pre-calculated offsets.
    // Since our parser doesn't know the offsets, we pass a preliminary size. The final
    // layout pass will fix this if needed, but for packed structs, the user's offsets
    // are king.
    size_t total_size = 0;
    for (size_t i = 0; i < num_members; ++i)
        total_size += members[i].type->size;
    infix_status status =
        infix_type_create_packed_struct(state->arena, &packed_type, total_size, alignment, members, num_members);
    if (status != INFIX_SUCCESS)
        return nullptr;
    return packed_type;
}
// Main Parser Logic
/**
 * @internal
 * @brief Parses any primitive type keyword from the signature string.
 * @details This function attempts to match and consume a variety of standard and
 *          aliased keywords for primitive types (e.g., `sint32`, `int`, `uint`).
 *          If a match is found, it returns a pointer to the corresponding static
 *          singleton type object.
 * @param[in,out] state The parser state.
 * @return A pointer to the static `infix_type` for the primitive, or `nullptr` if no keyword is matched.
 */
INFIX_INTERNAL infix_type * parse_primitive(parser_state * state) {
    if (consume_keyword(state, "sint8") || consume_keyword(state, "int8"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT8);
    if (consume_keyword(state, "uint8"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT8);
    if (consume_keyword(state, "sint16") || consume_keyword(state, "int16"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT16);
    if (consume_keyword(state, "uint16"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT16);
    if (consume_keyword(state, "sint32") || consume_keyword(state, "int32"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT32);
    if (consume_keyword(state, "uint32"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT32);
    if (consume_keyword(state, "sint64") || consume_keyword(state, "int64"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT64);
    if (consume_keyword(state, "uint64"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT64);
    if (consume_keyword(state, "sint128") || consume_keyword(state, "int128"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT128);
    if (consume_keyword(state, "uint128"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT128);
    if (consume_keyword(state, "float16"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_FLOAT16);
    if (consume_keyword(state, "float32"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_FLOAT);
    if (consume_keyword(state, "float64"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_DOUBLE);
    if (consume_keyword(state, "bool"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_BOOL);
    if (consume_keyword(state, "void"))
        return infix_type_create_void();
    // C-style convenience aliases
    if (consume_keyword(state, "uchar"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT8);
    if (consume_keyword(state, "char"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT8);
    if (consume_keyword(state, "ushort"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT16);
    if (consume_keyword(state, "short"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT16);
    if (consume_keyword(state, "uint"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT32);
    if (consume_keyword(state, "int"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT32);
    if (consume_keyword(state, "ulonglong"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT64);
    if (consume_keyword(state, "longlong"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_SINT64);
    // `long` is platform-dependent, so we use `sizeof` to pick the correct size.
    if (consume_keyword(state, "ulong"))
        return infix_type_create_primitive(sizeof(unsigned long) == 8 ? INFIX_PRIMITIVE_UINT64
                                                                      : INFIX_PRIMITIVE_UINT32);
    if (consume_keyword(state, "long"))
        return infix_type_create_primitive(sizeof(long) == 8 ? INFIX_PRIMITIVE_SINT64 : INFIX_PRIMITIVE_SINT32);
    if (consume_keyword(state, "double"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_DOUBLE);
    if (consume_keyword(state, "float"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_FLOAT);
    if (consume_keyword(state, "longdouble"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_LONG_DOUBLE);
    if (consume_keyword(state, "size_t"))
        return infix_type_create_primitive(sizeof(size_t) == 8 ? INFIX_PRIMITIVE_UINT64 : INFIX_PRIMITIVE_UINT32);
    if (consume_keyword(state, "ssize_t"))
        return infix_type_create_primitive(sizeof(ssize_t) == 8 ? INFIX_PRIMITIVE_SINT64 : INFIX_PRIMITIVE_SINT32);
    // uchar.h types
    if (consume_keyword(state, "char8_t"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT8);
    if (consume_keyword(state, "char16_t"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT16);
    if (consume_keyword(state, "char32_t"))
        return infix_type_create_primitive(INFIX_PRIMITIVE_UINT32);
    // AVX convenience aliases
    if (consume_keyword(state, "m256d")) {
        infix_type * type = nullptr;
        infix_status status =
            infix_type_create_vector(state->arena, &type, infix_type_create_primitive(INFIX_PRIMITIVE_DOUBLE), 4);
        if (status != INFIX_SUCCESS)
            return nullptr;    // Propagate failure
        type->alignment = 32;  // YMM registers require 32-byte alignment
        return type;
    }
    if (consume_keyword(state, "m256")) {
        infix_type * type = nullptr;
        infix_status status =
            infix_type_create_vector(state->arena, &type, infix_type_create_primitive(INFIX_PRIMITIVE_FLOAT), 8);
        if (status != INFIX_SUCCESS)
            return nullptr;    // Propagate failure
        type->alignment = 32;  // YMM registers require 32-byte alignment
        return type;
    }
    if (consume_keyword(state, "m512d")) {
        infix_type * type = nullptr;

infix/src/core/signature.c  view on Meta::CPAN

        return INFIX_ERROR_INVALID_ARGUMENT;
    }

    // Create final arena
    *out_arena = infix_arena_create(8192);
    if (!*out_arena) {
        infix_arena_destroy(parser_arena);
        _infix_set_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_OUT_OF_MEMORY, 0);
        return INFIX_ERROR_ALLOCATION_FAILED;
    }

    // "Copy" stage
    infix_type * final_func_type = _copy_type_graph_to_arena(*out_arena, raw_func_type);
    infix_arena_destroy(parser_arena);
    if (!final_func_type) {
        infix_arena_destroy(*out_arena);
        *out_arena = nullptr;
        _infix_set_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_OUT_OF_MEMORY, 0);
        return INFIX_ERROR_ALLOCATION_FAILED;
    }

    // Resolve and layout stages
    status = _infix_resolve_type_graph_inplace(&final_func_type, registry);
    if (status != INFIX_SUCCESS) {
        infix_arena_destroy(*out_arena);
        *out_arena = nullptr;
        return INFIX_ERROR_INVALID_ARGUMENT;
    }
    _infix_type_recalculate_layout(final_func_type);

    // Unpack the results for the caller from the final, processed function type object.
    *out_ret_type = final_func_type->meta.func_ptr_info.return_type;
    *out_args = final_func_type->meta.func_ptr_info.args;
    *out_num_args = final_func_type->meta.func_ptr_info.num_args;
    *out_num_fixed_args = final_func_type->meta.func_ptr_info.num_fixed_args;
    return INFIX_SUCCESS;
}

// Type Printing Logic
/**
 * @internal
 * @struct printer_state
 * @brief Holds the state for the recursive type-to-string printer.
 */
typedef struct {
    char * p;            /**< The current write position in the output buffer. */
    size_t remaining;    /**< The number of bytes remaining in the buffer. */
    infix_status status; /**< The current status, set to an error if the buffer is too small. */
    // Itanium mangling state
    const void * itanium_subs[64]; /**< Dictionary of substitutable components. */
    size_t itanium_sub_count;      /**< Number of components in the dictionary. */
    // MSVC mangling state
    const infix_type * msvc_types[10]; /**< First 10 encountered types for back-referencing. */
    size_t msvc_type_count;            /**< Number of types encountered. */
} printer_state;
/**
 * @internal
 * @brief A safe `vsnprintf` wrapper for building the signature string.
 * Updates the printer state and sets an error on buffer overflow.
 * @param[in,out] state The printer state.
 * @param[in] fmt The `printf`-style format string.
 * @param[in] ... Arguments for the format string.
 */
static void _print(printer_state * state, const char * fmt, ...) {
    if (state->status != INFIX_SUCCESS)
        return;
    va_list args;
    va_start(args, fmt);
    int written = vsnprintf(state->p, state->remaining, fmt, args);
    va_end(args);
    if (written < 0 || (size_t)written >= state->remaining)
        // If snprintf failed or would have overflowed, mark an error.
        state->status = INFIX_ERROR_INVALID_ARGUMENT;
    else {
        state->p += written;
        state->remaining -= written;
    }
}
// Forward declaration for mutual recursion in printers.
static void _infix_type_print_signature_recursive(printer_state * state, const infix_type * type);
static void _infix_type_print_itanium_recursive(printer_state * state, const infix_type * type);
static void _infix_type_print_msvc_recursive(printer_state * state, const infix_type * type);

// Itanium Mangling Helpers
static bool _find_itanium_sub(printer_state * state, const void * component, size_t * index) {
    for (size_t i = 0; i < state->itanium_sub_count; i++) {
        if (state->itanium_subs[i] == component) {
            *index = i;
            return true;
        }
    }
    return false;
}

static void _add_itanium_sub(printer_state * state, const void * component) {
    if (state->itanium_sub_count < 64)
        state->itanium_subs[state->itanium_sub_count++] = component;
}

static void _print_itanium_sub(printer_state * state, size_t index) {
    if (index == 0) {
        _print(state, "S_");
    }
    else {
        index--;  // S0_ is index 1
        _print(state, "S");
        if (index == 0) {
            _print(state, "0");
        }
        else {
            char buf[16];
            int pos = 0;
            size_t val = index;
            while (val > 0) {
                int digit = val % 36;
                buf[pos++] = (digit < 10) ? (char)('0' + digit) : (char)('A' + digit - 10);
                val /= 36;
            }
            while (pos > 0)
                _print(state, "%c", buf[--pos]);
        }



( run in 0.745 second using v1.01-cache-2.11-cpan-ceb78f64989 )