Affix
view release on metacpan or search on metacpan
infix/src/core/signature.c view on Meta::CPAN
* SPDX-License-Identifier: (Artistic-2.0 OR MIT)
*
* The documentation blocks within this file are licensed under the
* Creative Commons Attribution 4.0 International License (CC BY 4.0).
*
* SPDX-License-Identifier: CC-BY-4.0
*/
/**
* @file signature.c
* @brief Implements the `infix` signature string parser and type printer.
* @ingroup internal_core
*
* @details This module is responsible for two key functionalities that form the
* user-facing API of the library:
*
* 1. **Parsing:** It contains a hand-written recursive descent parser that transforms a
* human-readable signature string (e.g., `"({int, *char}) -> void"`) into an
* unresolved `infix_type` object graph. This is the **"Parse"** stage of the core
* data pipeline. The internal entry point for the "Parse" stage is `_infix_parse_type_internal`.
*
* 2. **Printing:** It provides functions to serialize a fully resolved `infix_type`
* graph back into a canonical signature string. This is crucial for introspection,
* debugging, and verifying the library's understanding of a type.
*
* The public functions `infix_type_from_signature` and `infix_signature_parse`
* are high-level orchestrators. They manage the entire **"Parse -> Copy -> Resolve -> Layout"**
* pipeline, providing the user with a fully validated, self-contained, and ready-to-use
* type object that is safe to use for the lifetime of its returned arena.
*/
#include "common/infix_internals.h"
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/** @internal A thread-local pointer to the full signature string being parsed, used by `error.c` for rich error
* reporting. */
extern INFIX_TLS const char * g_infix_last_signature_context;
/** @internal A safeguard against stack overflows from malicious or deeply nested signatures (e.g., `{{{{...}}}}`). */
#define MAX_RECURSION_DEPTH 32
static infix_status parse_function_signature_details(parser_state * state,
infix_type ** out_ret_type,
infix_function_argument ** out_args,
size_t * out_num_args,
size_t * out_num_fixed_args);
// Parser Helper Functions
/**
* @internal
* @brief Sets a detailed parser error, capturing the current position in the string.
* @param[in,out] state The current parser state.
* @param[in] code The error code to set.
*/
INFIX_INTERNAL void _infix_set_parser_error(parser_state * state, infix_error_code_t code) {
_infix_set_error(INFIX_CATEGORY_PARSER, code, (size_t)(state->p - state->start));
}
INFIX_INTERNAL void skip_whitespace(parser_state * state);
/**
* @internal
* @brief Advances the parser's cursor past any whitespace or C-style line comments.
* @param[in,out] state The parser state to modify.
*/
INFIX_INTERNAL void skip_whitespace(parser_state * state) {
while (true) {
while (isspace((unsigned char)*state->p))
state->p++;
if (*state->p == '#') // C-style line comments
while (*state->p != '\n' && *state->p != '\0')
state->p++;
else
break;
}
}
/**
* @internal
* @brief Parses an unsigned integer from the string, used for array/vector sizes.
* @param[in,out] state The parser state.
* @param[out] out_val A pointer to store the parsed value.
* @return `true` on success, `false` on failure.
*/
static bool parse_size_t(parser_state * state, size_t * out_val) {
const char * start = state->p;
char * end;
errno = 0; // Reset errno before call
unsigned long long val = strtoull(start, &end, 10);
// Check for no conversion (end==start) OR overflow (ERANGE)
if (end == start || errno == ERANGE) {
// Use INTEGER_OVERFLOW code for range errors
_infix_set_parser_error(state, errno == ERANGE ? INFIX_CODE_INTEGER_OVERFLOW : INFIX_CODE_UNEXPECTED_TOKEN);
return false;
}
// Check for truncation if size_t is smaller than unsigned long long (e.g. 32-bit builds)
if (val > SIZE_MAX) {
_infix_set_parser_error(state, INFIX_CODE_INTEGER_OVERFLOW);
return false;
}
*out_val = (size_t)val;
state->p = end;
return true;
}
/**
* @internal
* @brief Parses a C-style identifier from the string.
* @details This is used for member names, named types, and function argument names.
* It handles simple identifiers (`my_var`) and C++-style namespaces (`NS::Name`).
* @param[in,out] state The parser state.
* @return An arena-allocated string for the identifier, or `nullptr` on failure.
*/
static const char * parse_identifier(parser_state * state) {
skip_whitespace(state);
const char * start = state->p;
if (!isalpha((unsigned char)*start) && *start != '_')
return nullptr;
while (isalnum((unsigned char)*state->p) || *state->p == '_' || *state->p == ':') {
if (*state->p == ':' && state->p[1] != ':')
break; // A single ':' is not part of an identifier.
if (*state->p == ':')
state->p++; // Consume first ':' of '::'
state->p++;
}
size_t len = state->p - start;
if (len == 0)
return nullptr;
char * name = infix_arena_calloc(state->arena, 1, len + 1, 1);
if (!name) {
_infix_set_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_OUT_OF_MEMORY, (size_t)(state->p - state->start));
return nullptr;
}
infix_memcpy((void *)name, start, len);
name[len] = '\0';
return name;
}
/**
* @internal
* @brief Consumes a specific keyword from the string (e.g., "int", "struct").
* @details This function is careful to match whole words only. For example, it will
* successfully consume "int" from "int x", but will fail on "integer", preventing
* false positives.
* @param[in,out] state The parser state.
* @param[in] keyword The keyword to consume.
* @return `true` if the keyword was successfully consumed.
*/
static bool consume_keyword(parser_state * state, const char * keyword) {
skip_whitespace(state);
size_t len = strlen(keyword);
if (strncmp(state->p, keyword, len) == 0) {
// Ensure it's not a prefix of a longer word (e.g., "int" vs "integer").
if (isalnum((unsigned char)state->p[len]) || state->p[len] == '_')
return false;
state->p += len;
skip_whitespace(state);
return true;
}
return false;
}
/**
* @internal
* @brief Parses an optional named prefix, like `name: type`.
* @details If a valid identifier is found followed by a colon, the name is returned
* and the parser's cursor is advanced past the colon. If not, the parser state is
* rewound to its original position (backtracking) and `nullptr` is returned.
* @param[in,out] state The parser state.
* @return An arena-allocated string for the name, or `nullptr` if no `name:` prefix is present.
*/
static const char * parse_optional_name_prefix(parser_state * state) {
skip_whitespace(state);
// Save the current position in case we need to backtrack.
const char * p_before = state->p;
const char * name = parse_identifier(state);
if (name) {
skip_whitespace(state);
if (*state->p == ':') { // Found "identifier:", so consume the colon and return the name.
state->p++;
return name;
}
}
// If it wasn't a `name:`, backtrack to the original position.
state->p = p_before;
return nullptr;
}
/**
* @internal
* @brief A lookahead function to disambiguate a grouped type `(type)` from a
* function signature `(...) -> type`.
*
* @details This is a classic parser "lookahead". When the parser encounters an opening
* parenthesis `(`, it calls this function to peek ahead in the string without
* consuming any input. By scanning for a matching `)` and checking if it is
* followed by a `->` token, it can decide whether to parse the content as a
* single, parenthesized type or as a full function signature.
*
* @param[in] state The current parser state (read-only).
* @return `true` if a `->` token follows the closing parenthesis.
*/
static bool is_function_signature_ahead(const parser_state * state) {
const char * p = state->p;
if (*p != '(')
return false;
p++;
// Find the matching ')' by tracking nesting depth.
int depth = 1;
while (*p != '\0' && depth > 0) {
if (*p == '(')
depth++;
else if (*p == ')')
depth--;
p++;
}
if (depth != 0)
return false; // Mismatched parentheses.
// Skip any whitespace or comments after the ')'
while (isspace((unsigned char)*p) || *p == '#') {
if (*p == '#')
while (*p != '\n' && *p != '\0')
p++;
else
p++;
}
// Check for the '->' arrow.
return (p[0] == '-' && p[1] == '>');
( run in 3.411 seconds using v1.01-cache-2.11-cpan-5837b0d9d2c )