Affix
view release on metacpan or search on metacpan
infix/src/arch/aarch64/abi_arm64.c view on Meta::CPAN
*/
static infix_status generate_forward_epilogue_arm64(code_buffer * buf,
infix_call_frame_layout * layout,
infix_type * ret_type) {
layout->epilogue_offset = (uint32_t)buf->size;
// If the function returns a value and it wasn't returned via hidden pointer...
if (ret_type->category != INFIX_TYPE_VOID && !layout->return_value_in_memory) {
// ...copy the result from the appropriate return register(s) into the user's return buffer (pointer in X20).
const infix_type * hfa_base = nullptr;
// The order of these checks is critical. Handle the most specific cases first.
// On Apple Silicon, long double is 8 bytes. Only emit 128-bit store if size is actually 16.
if ((is_long_double(ret_type) && ret_type->size == 16) ||
(ret_type->category == INFIX_TYPE_VECTOR && ret_type->size == 16))
emit_arm64_str_q_imm(buf, V0_REG, X20_REG, 0); // str q0, [x20]
else if (is_hfa(ret_type, &hfa_base)) {
size_t num_elements = ret_type->size / hfa_base->size;
for (size_t i = 0; i < num_elements; ++i)
emit_arm64_str_vpr(buf,
hfa_base->size,
VPR_ARGS[i],
X20_REG,
(int32_t)(i * hfa_base->size)); // Explicit cast
}
else if (is_float16(ret_type))
emit_arm64_str_vpr(buf, 2, V0_REG, X20_REG, 0); // str h0, [x20]
else if (is_float(ret_type))
emit_arm64_str_vpr(buf, 4, V0_REG, X20_REG, 0); // str s0, [x20]
// Handle standard double OR 8-byte long double (macOS)
else if (is_double(ret_type) || (is_long_double(ret_type) && ret_type->size == 8))
emit_arm64_str_vpr(buf, 8, V0_REG, X20_REG, 0); // str d0, [x20]
else {
// Integer, pointer, or small aggregate return.
switch (ret_type->size) {
case 1:
emit_arm64_strb_imm(buf, X0_REG, X20_REG, 0);
break;
case 2:
emit_arm64_strh_imm(buf, X0_REG, X20_REG, 0);
break;
case 4:
emit_arm64_str_imm(buf, false, X0_REG, X20_REG, 0);
break;
case 8:
emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
break;
case 16: // For __int128_t or small structs
emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
emit_arm64_str_imm(buf, true, X1_REG, X20_REG, 8);
break;
default:
break;
}
}
}
// Deallocate stack space and restore registers.
// X29 was set to SP after all pushes.
// mov sp, x29
emit_arm64_mov_reg(buf, true, SP_REG, X29_FP_REG);
emit_arm64_ldp_post_index(buf, true, X21_REG, X22_REG, SP_REG, 16); // ldp x21, x22, [sp], #16
emit_arm64_ldp_post_index(buf, true, X19_REG, X20_REG, SP_REG, 16); // ldp x19, x20, [sp], #16
emit_arm64_ldp_post_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16); // ldp x29, x30, [sp], #16
emit_arm64_ret(buf, X30_LR_REG); // ret
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 1 (Reverse): Calculates the stack layout for a reverse trampoline stub.
* @details This function determines the total stack space the JIT-compiled stub will need
* for its local variables. This space includes:
* 1. A buffer to store the return value before it's placed in registers.
* 2. An array of `void*` pointers (`args_array`) to pass to the C dispatcher.
* 3. A contiguous data area where the contents of all incoming arguments
* (from registers or the caller's stack) will be saved.
*
* @param arena The temporary arena for allocations.
* @param[out] out_layout The resulting reverse call frame layout blueprint, populated with offsets.
* @param context The reverse trampoline context with full signature information.
* @return `INFIX_SUCCESS` on success, or an error code on failure.
*/
static infix_status prepare_reverse_call_frame_arm64(infix_arena_t * arena,
infix_reverse_call_frame_layout ** out_layout,
infix_reverse_t * context) {
infix_reverse_call_frame_layout * layout = infix_arena_calloc(
arena, 1, sizeof(infix_reverse_call_frame_layout), _Alignof(infix_reverse_call_frame_layout));
if (!layout)
return INFIX_ERROR_ALLOCATION_FAILED;
// The return buffer must be large enough and aligned for any type.
size_t return_size = (context->return_type->size + 15) & ~15;
// The array of pointers that will be passed to the C dispatcher.
size_t args_array_size = (context->num_args * sizeof(void *) + 15) & ~15;
// The contiguous block where we will save the actual argument data.
size_t saved_args_data_size = 0;
for (size_t i = 0; i < context->num_args; ++i) {
if (context->arg_types[i]->size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
// Ensure each saved argument slot is 16-byte aligned for simplicity and correctness.
saved_args_data_size += (context->arg_types[i]->size + 15) & ~15;
}
// Security check against excessively large aggregate argument data size.
if (saved_args_data_size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
size_t total_local_space = return_size + args_array_size + saved_args_data_size;
// The total stack allocation for the frame must be 16-byte aligned.
if (total_local_space > INFIX_MAX_STACK_ALLOC) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
layout->total_stack_alloc = (total_local_space + 15) & ~15;
// Local variables are accessed via positive offsets from the stack pointer (SP)
// after the initial `sub sp, sp, #alloc` in the prologue.
// The layout on our local stack will be: [ return_buffer | args_array | saved_args_data ]
layout->return_buffer_offset = 0;
layout->args_array_offset = layout->return_buffer_offset + (int32_t)return_size;
layout->saved_args_offset = layout->args_array_offset + (int32_t)args_array_size;
*out_layout = layout;
return INFIX_SUCCESS;
}
infix/src/arch/aarch64/abi_arm64.c view on Meta::CPAN
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 5 (Reverse): Generates the epilogue for the reverse trampoline stub.
* @details After the C dispatcher returns, this code retrieves the return value from the
* return buffer on the stub's local stack and places it into the correct native return
* registers (X0, X1, V0, etc.) as required by the AAPCS64. It then tears down the
* stack frame and returns control to the native caller.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param context The reverse context.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_epilogue_arm64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
bool ret_is_aggregate =
(context->return_type->category == INFIX_TYPE_STRUCT || context->return_type->category == INFIX_TYPE_UNION ||
context->return_type->category == INFIX_TYPE_ARRAY || context->return_type->category == INFIX_TYPE_COMPLEX);
bool return_in_memory = ret_is_aggregate && context->return_type->size > 16;
if (context->return_type->category != INFIX_TYPE_VOID && !return_in_memory) {
const infix_type * base = nullptr;
// Explicitly check for 128-bit types.
// Note: On macOS ARM64, long double is 8 bytes, so is_long_double() is true but size is 8.
// We only want the 128-bit load if the size matches.
bool is_128bit = (context->return_type->size == 16);
if (is_128bit && (is_long_double(context->return_type) || context->return_type->category == INFIX_TYPE_VECTOR))
emit_arm64_ldr_q_imm(buf, V0_REG, SP_REG, layout->return_buffer_offset);
else if (is_hfa(context->return_type, &base)) {
size_t num_elements = context->return_type->size / base->size;
for (size_t i = 0; i < num_elements; ++i) {
emit_arm64_ldr_vpr(buf,
base->size,
VPR_ARGS[i],
SP_REG,
(int32_t)(layout->return_buffer_offset + i * base->size)); // Explicit cast
}
}
else if (is_long_double(context->return_type) ||
(context->return_type->category == INFIX_TYPE_VECTOR && context->return_type->size == 16))
emit_arm64_ldr_q_imm(buf, V0_REG, SP_REG, layout->return_buffer_offset);
else if (is_float16(context->return_type))
emit_arm64_ldr_vpr(buf, 2, V0_REG, SP_REG, layout->return_buffer_offset);
else if (is_float(context->return_type) || is_double(context->return_type) ||
(is_long_double(context->return_type) && context->return_type->size == 8))
emit_arm64_ldr_vpr(buf, context->return_type->size, V0_REG, SP_REG, layout->return_buffer_offset);
else {
// Integer, pointer, or small struct returned in GPRs.
emit_arm64_ldr_imm(buf, true, X0_REG, SP_REG, layout->return_buffer_offset);
if (context->return_type->size > 8)
emit_arm64_ldr_imm(buf, true, X1_REG, SP_REG, layout->return_buffer_offset + 8);
}
}
// Deallocate stack and restore frame.
if (layout->total_stack_alloc > 0)
// add sp, sp, #total_stack_alloc
emit_arm64_add_imm(buf, true, false, SP_REG, SP_REG, (uint32_t)layout->total_stack_alloc); // Cast size_t
// Restore Frame Pointer and Link Register, then return.
emit_arm64_ldp_post_index(
buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16); // ldp x29, x30, [sp], #16 (Load pair, post-indexed)
emit_arm64_ret(buf, X30_LR_REG); // ret
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 1 (Direct): Analyzes a signature and creates a call frame layout for AAPCS64.
*/
static infix_status prepare_direct_forward_call_frame_arm64(infix_arena_t * arena,
infix_direct_call_frame_layout ** out_layout,
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
infix_direct_arg_handler_t * handlers,
void * target_fn) {
// Reuse the standard classification logic.
infix_call_frame_layout * standard_layout = nullptr;
infix_status status =
prepare_forward_call_frame_arm64(arena, &standard_layout, ret_type, arg_types, num_args, num_args, target_fn);
if (status != INFIX_SUCCESS)
return status;
// Create the new direct layout and copy basic info.
infix_direct_call_frame_layout * layout =
infix_arena_calloc(arena, 1, sizeof(infix_direct_call_frame_layout), _Alignof(infix_direct_call_frame_layout));
if (!layout)
return INFIX_ERROR_ALLOCATION_FAILED;
layout->args =
infix_arena_calloc(arena, num_args, sizeof(infix_direct_arg_layout), _Alignof(infix_direct_arg_layout));
if (!layout->args && num_args > 0)
return INFIX_ERROR_ALLOCATION_FAILED;
layout->num_args = num_args;
layout->target_fn = target_fn;
layout->return_value_in_memory = standard_layout->return_value_in_memory;
// Calculate scratch space needed on the stack.
// Note: We do NOT store the scratch offset in layout->args[i].location.stack_offset,
// because that field is needed for the *outgoing* ABI stack offset.
// Instead, we just calculate the total size here, and recalculate the offsets
// sequentially during generation.
size_t scratch_space_needed = 0;
for (size_t i = 0; i < num_args; ++i) {
layout->args[i].location = standard_layout->arg_locations[i];
layout->args[i].type = arg_types[i];
layout->args[i].handler = &handlers[i];
if (handlers[i].aggregate_marshaller) {
scratch_space_needed = _infix_align_up(scratch_space_needed, arg_types[i]->alignment);
scratch_space_needed += arg_types[i]->size;
}
else if (handlers[i].scalar_marshaller) {
// Scalars need scratch space to bounce X0 -> Stack -> V0
scratch_space_needed = _infix_align_up(scratch_space_needed, 16);
scratch_space_needed += 16;
}
else if (handlers[i].writeback_handler) {
const infix_type * pointee = (arg_types[i]->category == INFIX_TYPE_POINTER)
? arg_types[i]->meta.pointer_info.pointee_type
infix/src/arch/aarch64/abi_arm64.c view on Meta::CPAN
align = arg_layout->type->alignment;
needs_scratch = true;
}
else if (arg_layout->handler->scalar_marshaller) {
size = 16;
align = 16;
needs_scratch = true;
}
else if (arg_layout->handler->writeback_handler) {
const infix_type * pointee = (arg_layout->type->category == INFIX_TYPE_POINTER)
? arg_layout->type->meta.pointer_info.pointee_type
: arg_layout->type;
size = pointee->size;
align = pointee->alignment;
needs_scratch = true;
}
if (needs_scratch) {
epilogue_scratch_offset = _infix_align_up(epilogue_scratch_offset, align);
my_scratch_offset = (int32_t)(standard_alloc_size + epilogue_scratch_offset);
epilogue_scratch_offset += size;
}
if (arg_layout->handler->writeback_handler) {
// Save C return value (in X0/V0) before calling out.
// Note: Technically should save more registers for HFA returns, but this matches basic needs.
emit_arm64_sub_imm(buf, true, false, SP_REG, SP_REG, 32);
emit_arm64_str_imm(buf, true, X0_REG, SP_REG, 0);
emit_arm64_str_imm(buf, true, X1_REG, SP_REG, 8);
emit_arm64_str_q_imm(buf, V0_REG, SP_REG, 16); // Save V0 (covers float/double/vector)
// Arg 1 (X0): Original language object pointer.
emit_arm64_ldr_imm(buf, true, X0_REG, X21_REG, (int32_t)(i * sizeof(void *)));
// Arg 2 (X1): Pointer to the C data.
// Address = Current SP (which is Original SP - 32) + 32 + offset
int32_t total_offset = 32 + my_scratch_offset;
emit_arm64_add_imm(buf, true, false, X1_REG, SP_REG, total_offset);
// Arg 3 (X2): The infix_type*.
emit_arm64_load_u64_immediate(buf, X2_REG, (uint64_t)arg_layout->type);
// Call the handler.
emit_arm64_load_u64_immediate(buf, X10_REG, (uint64_t)arg_layout->handler->writeback_handler);
emit_arm64_blr_reg(buf, X10_REG);
// Restore C return value.
emit_arm64_ldr_q_imm(buf, V0_REG, SP_REG, 16);
emit_arm64_ldr_imm(buf, true, X1_REG, SP_REG, 8);
emit_arm64_ldr_imm(buf, true, X0_REG, SP_REG, 0);
emit_arm64_add_imm(buf, true, false, SP_REG, SP_REG, 32);
}
}
// Standard Epilogue
// Restore stack pointer to the saved registers area.
// X29 was set to SP after all pushes.
// mov sp, x29
emit_arm64_mov_reg(buf, true, SP_REG, X29_FP_REG);
emit_arm64_ldp_post_index(buf, true, X21_REG, X22_REG, SP_REG, 16);
emit_arm64_ldp_post_index(buf, true, X19_REG, X20_REG, SP_REG, 16);
emit_arm64_ldp_post_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16);
emit_arm64_ret(buf, X30_LR_REG);
return INFIX_SUCCESS;
}
( run in 0.641 second using v1.01-cache-2.11-cpan-5735350b133 )