Affix
view release on metacpan or search on metacpan
infix/src/arch/aarch64/abi_arm64.c view on Meta::CPAN
* allocates the necessary stack space for stack-passed arguments.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_prologue_arm64(code_buffer * buf, infix_call_frame_layout * layout) {
// `stp x29, x30, [sp, #-16]!` : Push Frame Pointer and Link Register to the stack, pre-decrementing SP.
emit_arm64_stp_pre_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, -16);
// `stp x19, x20, [sp, #-16]!` : Save callee-saved registers that we will use for our context.
emit_arm64_stp_pre_index(buf, true, X19_REG, X20_REG, SP_REG, -16);
// `stp x21, x22, [sp, #-16]!`
emit_arm64_stp_pre_index(buf, true, X21_REG, X22_REG, SP_REG, -16);
// `mov x29, sp` : Establish the new Frame Pointer after all registers are pushed.
emit_arm64_mov_reg(buf, true, X29_FP_REG, SP_REG);
layout->prologue_size = (uint32_t)buf->size;
// Move the trampoline's own arguments into these now-safe callee-saved registers.
if (layout->target_fn == nullptr) { // Unbound trampoline args: (target_fn, ret_ptr, args_ptr) in X0, X1, X2.
emit_arm64_mov_reg(buf, true, X19_REG, X0_REG); // mov x19, x0 (x19 will hold target_fn)
emit_arm64_mov_reg(buf, true, X20_REG, X1_REG); // mov x20, x1 (x20 will hold ret_ptr)
emit_arm64_mov_reg(buf, true, X21_REG, X2_REG); // mov x21, x2 (x21 will hold args_ptr)
}
else { // Bound trampoline args: (ret_ptr, args_ptr) in X0, X1.
emit_arm64_mov_reg(buf, true, X20_REG, X0_REG); // mov x20, x0 (x20 = ret_ptr)
emit_arm64_mov_reg(buf, true, X21_REG, X1_REG); // mov x21, x1 (x21 = args_ptr)
}
// Allocate stack space for arguments that will be passed on the stack.
if (layout->total_stack_alloc > 0)
emit_arm64_sub_imm(buf, true, false, SP_REG, SP_REG, (uint32_t)layout->total_stack_alloc);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3 (Forward): Generates code to move arguments into their native locations.
* @details This function marshals arguments from the generic `void**` array (pointed to by X21)
* into the correct GPRs, VPRs, or stack slots, respecting HFA rules and platform-specific
* variadic conventions like Apple's stack-only approach.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param arg_types The array of argument types.
* @param num_args The total number of arguments.
* @param num_fixed_args The number of fixed (non-variadic) arguments.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_argument_moves_arm64(code_buffer * buf,
infix_call_frame_layout * layout,
infix_type ** arg_types,
size_t num_args,
c23_maybe_unused size_t num_fixed_args) {
// If returning a large struct, the ABI requires the hidden pointer (our return buffer, in X20)
// to be passed in the indirect result location register, x8.
if (layout->return_value_in_memory)
emit_arm64_mov_reg(buf, true, X8_REG, X20_REG); // mov x8, x20
// Standard AAPCS64 Quirk: For variadic calls, a GPR must contain the number of VPRs used.
// This rule does NOT apply to Apple's ABI, so we exclude it for macOS.
#if !defined(INFIX_OS_MACOS)
else if (layout->is_variadic)
// Since we don't know the types of variadic arguments at compile time, the ABI
// states the safest value is 0. A callee like printf will use this to determine
// how to process its va_list. We use x8 as it's a volatile register.
// A safe default is 0. Callee (like printf) uses this to interpret its va_list.
emit_arm64_load_u64_immediate(buf, X8_REG, 0); // mov x8, #0
#endif
// Main argument marshalling loop.
for (size_t i = 0; i < num_args; ++i) {
infix_arg_location * loc = &layout->arg_locations[i];
infix_type * type = arg_types[i];
// Load the pointer to the current argument's data into scratch register x9.
// x21 holds the base of the void** args_array.
emit_arm64_ldr_imm(buf, true, X9_REG, X21_REG, (int32_t)(i * sizeof(void *))); // ldr x9, [x21, #offset]
switch (loc->type) {
case ARG_LOCATION_GPR:
{
// Arrays passed by pointer. The data at X9 IS the pointer. Move X9 to dest reg.
if (type->category == INFIX_TYPE_ARRAY) {
emit_arm64_mov_reg(buf, true, GPR_ARGS[loc->reg_index], X9_REG);
break;
}
// C requires that signed integer types smaller than a full register be
// sign-extended when passed. We check for this case here.
bool is_signed_lt_64 = type->category == INFIX_TYPE_PRIMITIVE && type->size < 8 &&
(type->meta.primitive_id == INFIX_PRIMITIVE_SINT8 ||
type->meta.primitive_id == INFIX_PRIMITIVE_SINT16 ||
type->meta.primitive_id == INFIX_PRIMITIVE_SINT32);
if (is_signed_lt_64) { // Use Load Register Signed Word to sign-extend a 32-bit value to 64 bits.
if (type->size == 1)
emit_arm64_ldrsb_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
else if (type->size == 2)
emit_arm64_ldrsh_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
else
emit_arm64_ldrsw_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
}
else {
// Unsigned types and small structs
if (type->size == 1)
emit_arm64_ldrb_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
else if (type->size == 2)
emit_arm64_ldrh_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
else
// 4-byte or 8-byte load
emit_arm64_ldr_imm(
buf, type->size == 8, GPR_ARGS[loc->reg_index], X9_REG, 0); // ldr xN/wN, [x9]
}
break;
}
case ARG_LOCATION_GPR_PAIR:
// For types > 8 and <= 16 bytes passed in two GPRs (e.g., __int128_t).
emit_arm64_ldr_imm(buf, true, GPR_ARGS[loc->reg_index], X9_REG, 0); // ldr xN, [x9]
emit_arm64_ldr_imm(buf, true, GPR_ARGS[loc->reg_index + 1], X9_REG, 8); // ldr xN+1, [x9, #8]
break;
case ARG_LOCATION_GPR_REFERENCE:
// For large aggregates passed by reference, the pointer *is* the argument.
// x9 already holds this pointer, so we just move it to the target GPR.
emit_arm64_mov_reg(buf, true, GPR_ARGS[loc->reg_index], X9_REG); // mov xN, x9
break;
case ARG_LOCATION_VPR:
if ((is_long_double(type) && type->size == 16) || (type->category == INFIX_TYPE_VECTOR && type->size == 16))
emit_arm64_ldr_q_imm(buf, VPR_ARGS[loc->reg_index], X9_REG, 0); // ldr qN, [x9] (128-bit load)
else
( run in 0.732 second using v1.01-cache-2.11-cpan-5837b0d9d2c )