Affix
view release on metacpan or search on metacpan
infix/src/arch/aarch64/abi_arm64.c view on Meta::CPAN
// Recursive step for structs: check every member.
if (type->category == INFIX_TYPE_STRUCT) {
if (type->meta.aggregate_info.num_members == 0)
return false;
for (size_t i = 0; i < type->meta.aggregate_info.num_members; ++i)
if (!is_hfa_recursive_check(type->meta.aggregate_info.members[i].type, base_type, field_count))
return false;
return true;
}
// If it's not a float, complex, array, or struct, it cannot be part of an HFA.
return false;
}
/**
* @internal
* @brief Determines if a type is a Homogeneous Floating-point Aggregate (HFA).
* @details An HFA is a struct or array containing 1 to 4 elements of the same, single
* floating-point type (`float` or `double`), including in nested aggregates.
*
* @param type The `infix_type` to check.
* @param[out] out_base_type If the type is an HFA, this is set to its base `float` or `double` type.
* @return `true` if the type is a valid HFA, `false` otherwise.
*/
static bool is_hfa(const infix_type * type, const infix_type ** out_base_type) {
if (type->category != INFIX_TYPE_STRUCT && type->category != INFIX_TYPE_ARRAY &&
type->category != INFIX_TYPE_COMPLEX)
return false;
// HFAs cannot be excessively large.
if (type->size == 0 || type->size > 64) // Max HFA size is 4 * sizeof(double) = 32 on standard, 4*16=64 on others
return false;
// Find the base float/double type of the first primitive element.
const infix_type * base = get_hfa_base_type(type);
if (base == nullptr)
return false;
// Check that the total size is a multiple of the base type, with 1 to 4 elements.
size_t num_elements = type->size / base->size;
if (num_elements < 1 || num_elements > 4 || type->size != num_elements * base->size)
return false;
// Verify that ALL members recursively conform to this single base type.
size_t field_count = 0;
if (!is_hfa_recursive_check(type, base, &field_count))
return false;
if (out_base_type)
*out_base_type = base;
return true;
}
/**
* @internal
* @brief Stage 1 (Forward): Analyzes a signature and creates a call frame layout for AAPCS64.
* @details This function assigns each argument to a location (GPR, VPR, or Stack) according
* to the AAPCS64 rules. It contains extensive conditional logic to handle ABI
* deviations on Apple and Windows platforms, especially for variadic arguments
* and 16-byte aggregate alignment.
*
* @param arena The temporary arena for allocations.
* @param out_layout Receives the created layout blueprint.
* @param ret_type The function's return type.
* @param arg_types Array of argument types.
* @param num_args Total number of arguments.
* @param num_fixed_args Number of non-variadic arguments.
* @param target_fn The target function address.
* @return `INFIX_SUCCESS` on success, or an error code on failure.
*/
static infix_status prepare_forward_call_frame_arm64(infix_arena_t * arena,
infix_call_frame_layout ** out_layout,
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
size_t num_fixed_args,
void * target_fn) {
if (out_layout == nullptr)
return INFIX_ERROR_INVALID_ARGUMENT;
infix_call_frame_layout * layout =
infix_arena_calloc(arena, 1, sizeof(infix_call_frame_layout), _Alignof(infix_call_frame_layout));
if (layout == nullptr) {
*out_layout = nullptr;
return INFIX_ERROR_ALLOCATION_FAILED;
}
layout->arg_locations =
infix_arena_calloc(arena, num_args, sizeof(infix_arg_location), _Alignof(infix_arg_location));
if (layout->arg_locations == nullptr && num_args > 0) {
*out_layout = nullptr;
return INFIX_ERROR_ALLOCATION_FAILED;
}
size_t gpr_count = 0, vpr_count = 0, stack_offset = 0;
layout->is_variadic = (num_fixed_args < num_args);
layout->target_fn = target_fn;
layout->num_args = num_args;
layout->num_stack_args = 0;
// Determine if the return value is passed by reference (via hidden pointer in X8).
// This is true for aggregates larger than 16 bytes.
bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
layout->return_value_in_memory = (ret_is_aggregate && ret_type->size > 16);
// Main Argument Classification Loop
for (size_t i = 0; i < num_args; ++i) {
infix_type * type = arg_types[i];
// Security: Reject excessively large types.
if (type->size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
bool placed_in_register = false;
c23_maybe_unused bool is_variadic_arg = (i >= num_fixed_args);
// Arrays decay to pointers. Always treat as a GPR argument (8 bytes).
if (type->category == INFIX_TYPE_ARRAY) {
if (gpr_count < NUM_GPR_ARGS) {
layout->arg_locations[i].type = ARG_LOCATION_GPR;
layout->arg_locations[i].reg_index = (uint8_t)gpr_count++;
placed_in_register = true;
}
else {
layout->arg_locations[i].type = ARG_LOCATION_STACK;
layout->arg_locations[i].stack_offset = (uint32_t)stack_offset;
stack_offset += 8;
layout->num_stack_args++;
placed_in_register = true;
}
continue;
}
infix/src/arch/aarch64/abi_arm64.c view on Meta::CPAN
hfa_base->size,
VPR_ARGS[i],
X20_REG,
(int32_t)(i * hfa_base->size)); // Explicit cast
}
else if (is_float16(ret_type))
emit_arm64_str_vpr(buf, 2, V0_REG, X20_REG, 0); // str h0, [x20]
else if (is_float(ret_type))
emit_arm64_str_vpr(buf, 4, V0_REG, X20_REG, 0); // str s0, [x20]
// Handle standard double OR 8-byte long double (macOS)
else if (is_double(ret_type) || (is_long_double(ret_type) && ret_type->size == 8))
emit_arm64_str_vpr(buf, 8, V0_REG, X20_REG, 0); // str d0, [x20]
else {
// Integer, pointer, or small aggregate return.
switch (ret_type->size) {
case 1:
emit_arm64_strb_imm(buf, X0_REG, X20_REG, 0);
break;
case 2:
emit_arm64_strh_imm(buf, X0_REG, X20_REG, 0);
break;
case 4:
emit_arm64_str_imm(buf, false, X0_REG, X20_REG, 0);
break;
case 8:
emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
break;
case 16: // For __int128_t or small structs
emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
emit_arm64_str_imm(buf, true, X1_REG, X20_REG, 8);
break;
default:
break;
}
}
}
// Deallocate stack space and restore registers.
// X29 was set to SP after all pushes.
// mov sp, x29
emit_arm64_mov_reg(buf, true, SP_REG, X29_FP_REG);
emit_arm64_ldp_post_index(buf, true, X21_REG, X22_REG, SP_REG, 16); // ldp x21, x22, [sp], #16
emit_arm64_ldp_post_index(buf, true, X19_REG, X20_REG, SP_REG, 16); // ldp x19, x20, [sp], #16
emit_arm64_ldp_post_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16); // ldp x29, x30, [sp], #16
emit_arm64_ret(buf, X30_LR_REG); // ret
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 1 (Reverse): Calculates the stack layout for a reverse trampoline stub.
* @details This function determines the total stack space the JIT-compiled stub will need
* for its local variables. This space includes:
* 1. A buffer to store the return value before it's placed in registers.
* 2. An array of `void*` pointers (`args_array`) to pass to the C dispatcher.
* 3. A contiguous data area where the contents of all incoming arguments
* (from registers or the caller's stack) will be saved.
*
* @param arena The temporary arena for allocations.
* @param[out] out_layout The resulting reverse call frame layout blueprint, populated with offsets.
* @param context The reverse trampoline context with full signature information.
* @return `INFIX_SUCCESS` on success, or an error code on failure.
*/
static infix_status prepare_reverse_call_frame_arm64(infix_arena_t * arena,
infix_reverse_call_frame_layout ** out_layout,
infix_reverse_t * context) {
infix_reverse_call_frame_layout * layout = infix_arena_calloc(
arena, 1, sizeof(infix_reverse_call_frame_layout), _Alignof(infix_reverse_call_frame_layout));
if (!layout)
return INFIX_ERROR_ALLOCATION_FAILED;
// The return buffer must be large enough and aligned for any type.
size_t return_size = (context->return_type->size + 15) & ~15;
// The array of pointers that will be passed to the C dispatcher.
size_t args_array_size = (context->num_args * sizeof(void *) + 15) & ~15;
// The contiguous block where we will save the actual argument data.
size_t saved_args_data_size = 0;
for (size_t i = 0; i < context->num_args; ++i) {
if (context->arg_types[i]->size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
// Ensure each saved argument slot is 16-byte aligned for simplicity and correctness.
saved_args_data_size += (context->arg_types[i]->size + 15) & ~15;
}
// Security check against excessively large aggregate argument data size.
if (saved_args_data_size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
size_t total_local_space = return_size + args_array_size + saved_args_data_size;
// The total stack allocation for the frame must be 16-byte aligned.
if (total_local_space > INFIX_MAX_STACK_ALLOC) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
layout->total_stack_alloc = (total_local_space + 15) & ~15;
// Local variables are accessed via positive offsets from the stack pointer (SP)
// after the initial `sub sp, sp, #alloc` in the prologue.
// The layout on our local stack will be: [ return_buffer | args_array | saved_args_data ]
layout->return_buffer_offset = 0;
layout->args_array_offset = layout->return_buffer_offset + (int32_t)return_size;
layout->saved_args_offset = layout->args_array_offset + (int32_t)args_array_size;
*out_layout = layout;
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 2 (Reverse): Generates the prologue for the reverse trampoline stub.
* @details This function emits the standard AArch64 function entry code. It saves the
* caller's frame pointer (X29) and the link register (X30, the return address)
* to the stack, establishes a new frame by pointing X29 to the current stack
* pointer, and allocates the pre-calculated stack space for local variables.
*
* @param buf The code buffer to write to.
* @param layout The blueprint containing the total stack space to allocate.
* @return `INFIX_SUCCESS` on success.
*/
static infix_status generate_reverse_prologue_arm64(code_buffer * buf, infix_reverse_call_frame_layout * layout) {
// `stp x29, x30, [sp, #-16]!` : Save Frame Pointer and Link Register, pre-decrementing SP.
emit_arm64_stp_pre_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, -16);
// `mov x29, sp` : Establish the new frame pointer.
emit_arm64_mov_reg(buf, true, X29_FP_REG, SP_REG);
( run in 1.965 second using v1.01-cache-2.11-cpan-39bf76dae61 )