Affix
view release on metacpan or search on metacpan
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
else { // Mixed GPR and SSE
if (classes[0] == INTEGER) {
layout->arg_locations[i].type = ARG_LOCATION_INTEGER_SSE_PAIR;
layout->arg_locations[i].reg_index = gpr_count;
layout->arg_locations[i].reg_index2 = xmm_count;
}
else {
layout->arg_locations[i].type = ARG_LOCATION_SSE_INTEGER_PAIR;
layout->arg_locations[i].reg_index = xmm_count;
layout->arg_locations[i].reg_index2 = gpr_count;
}
}
gpr_count += gpr_needed;
xmm_count += xmm_needed;
placed_in_register = true;
}
}
}
// Fallback to stack
if (!placed_in_register) {
layout->arg_locations[i].type = ARG_LOCATION_STACK;
// Align current offset to the argument's natural alignment requirements.
// SysV requires 16-byte alignment for long double, __int128, and __m128 on the stack.
size_t align = type->alignment;
if (align < 8)
align = 8; // Stack slots are at least 8 bytes
current_stack_offset = (current_stack_offset + (align - 1)) & ~(align - 1); // Align up
layout->arg_locations[i].stack_offset = current_stack_offset;
current_stack_offset += (type->size + 7) & ~7; // Align to 8 bytes.
layout->num_stack_args++;
}
}
// Finalize the layout properties.
layout->num_gpr_args = gpr_count;
layout->num_xmm_args = xmm_count;
// The total stack space for arguments must be 16-byte aligned before the call.
layout->total_stack_alloc = (current_stack_offset + 15) & ~15;
// Safety check against excessive stack allocation.
if (layout->total_stack_alloc > INFIX_MAX_STACK_ALLOC) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
*out_layout = layout;
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 2 (Forward): Generates the function prologue for the System V trampoline.
* @details Sets up a standard stack frame, saves registers for the trampoline's context,
* and allocates stack space for arguments.
* @param buf The code buffer.
* @param layout The call frame layout.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_prologue_sysv_x64(code_buffer * buf, infix_call_frame_layout * layout) {
// Standard Function Prologue
emit_push_reg(buf, RBP_REG); // push rbp
emit_mov_reg_reg(buf, RBP_REG, RSP_REG); // mov rbp, rsp
// Save Callee-Saved Registers
// We will use these registers to store our context (target_fn, ret_ptr, args_ptr)
// across the native function call, so we must save their original values first.
emit_push_reg(buf, R12_REG); // push r12
emit_push_reg(buf, R13_REG); // push r13
emit_push_reg(buf, R14_REG); // push r14
emit_push_reg(buf, R15_REG); // push r15
// Move Trampoline Arguments to Persistent Registers
if (layout->target_fn == nullptr) { // Unbound trampoline
// The trampoline is called with (target_fn, ret_ptr, args_ptr) in RDI, RSI, RDX.
// We move these into our saved callee-saved registers to protect them.
emit_mov_reg_reg(buf, R12_REG, RDI_REG); // r12 = target_fn
emit_mov_reg_reg(buf, R13_REG, RSI_REG); // r13 = ret_ptr
emit_mov_reg_reg(buf, R14_REG, RDX_REG); // r14 = args_ptr
}
else { // Bound trampoline
// The trampoline is called with (ret_ptr, args_ptr) in RDI, RSI.
emit_mov_reg_reg(buf, R13_REG, RDI_REG); // r13 = ret_ptr
emit_mov_reg_reg(buf, R14_REG, RSI_REG); // r14 = args_ptr
}
// Allocate Stack Space
// If any arguments are passed on the stack, allocate space for them.
// The ABI requires this space to be 16-byte aligned.
if (layout->total_stack_alloc > 0)
emit_sub_reg_imm32(buf, RSP_REG, layout->total_stack_alloc);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3 (Forward): Generates code to move arguments from the `void**` array
* into their correct native locations (registers or stack).
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param arg_types The array of argument types.
* @param num_args Total number of arguments.
* @param num_fixed_args Number of fixed arguments.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_argument_moves_sysv_x64(code_buffer * buf,
infix_call_frame_layout * layout,
infix_type ** arg_types,
size_t num_args,
c23_maybe_unused size_t num_fixed_args) {
// If returning a large struct, the hidden pointer (stored in r13) must be moved to RDI.
if (layout->return_value_in_memory)
emit_mov_reg_reg(buf, GPR_ARGS[0], R13_REG); // mov rdi, r13
// Marshall Register Arguments
// Loop over all arguments that are passed in registers.
for (size_t i = 0; i < num_args; ++i) {
infix_arg_location * loc = &layout->arg_locations[i];
if (loc->type == ARG_LOCATION_STACK)
continue; // Handle stack arguments in a separate pass.
// Load the pointer to the argument's data into a scratch register (r15).
// r14 holds the base of the `void** args_array`.
// r15 = args_array[i]
emit_mov_reg_mem(buf, R15_REG, R14_REG, i * sizeof(void *));
switch (loc->type) {
case ARG_LOCATION_GPR:
{
infix_type * current_type = arg_types[i];
// An array parameter decays to a pointer. The `args` array for it
// contains a pointer TO the array data. We must pass this pointer itself,
// not the data it points to. R15 already holds this pointer.
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
}
else if (is_float16(current_type)) {
// movd eax, xmm_reg ; mov [rbp + arg_save_loc], ax
emit_movq_gpr_xmm(buf, RAX_REG, XMM_ARGS[xmm_idx++]);
emit_mov_mem_reg16(buf, RBP_REG, arg_save_loc, RAX_REG);
}
else if (is_float(current_type))
emit_movss_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
}
else
is_from_stack = true;
else if (gpr_idx < NUM_GPR_ARGS)
emit_mov_mem_reg(buf, RBP_REG, arg_save_loc, GPR_ARGS[gpr_idx++]);
else
is_from_stack = true;
}
else if (num_classes == 2) {
size_t gprs_needed = (classes[0] == INTEGER) + (classes[1] == INTEGER);
size_t xmms_needed = (classes[0] == SSE) + (classes[1] == SSE);
if (gpr_idx + gprs_needed <= NUM_GPR_ARGS && xmm_idx + xmms_needed <= NUM_XMM_ARGS) {
if (classes[0] == SSE)
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else
emit_mov_mem_reg(buf, RBP_REG, arg_save_loc, GPR_ARGS[gpr_idx++]);
if (classes[1] == SSE)
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc + 8, XMM_ARGS[xmm_idx++]);
else
emit_mov_mem_reg(buf, RBP_REG, arg_save_loc + 8, GPR_ARGS[gpr_idx++]);
}
else
is_from_stack = true;
}
if (is_from_stack) {
for (size_t offset = 0; offset < current_type->size; offset += 8) {
emit_mov_reg_mem(buf, RAX_REG, RBP_REG, stack_arg_offset + offset);
emit_mov_mem_reg(buf, RBP_REG, arg_save_loc + offset, RAX_REG);
}
stack_arg_offset += (current_type->size + 7) & ~7;
}
emit_lea_reg_mem(buf, RAX_REG, RBP_REG, arg_save_loc);
emit_mov_mem_reg(buf, RBP_REG, layout->args_array_offset + i * sizeof(void *), RAX_REG);
current_saved_data_offset += current_type->size;
}
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 4 (Reverse): Generates the code to call the high-level C dispatcher function.
* @details Emits code to load the dispatcher's arguments into the correct registers
* according to the System V ABI, then calls the dispatcher.
*
* The C dispatcher's signature is:
* `void fn(infix_reverse_t* context, void* return_value_ptr, void** args_array)`
*
* The generated code performs the following argument setup:
* 1. `RDI` (Arg 1): The `context` pointer (a 64-bit immediate).
* 2. `RSI` (Arg 2): The pointer to the return value buffer. This is either a
* pointer to local stack space, or the original pointer passed by the
* caller in RDI if the function returns a large struct by reference.
* 3. `RDX` (Arg 3): The pointer to the `args_array` on the local stack.
* 4. The address of the dispatcher function itself is loaded into a scratch
* register (`RAX`), which is then called.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param context The reverse context.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_dispatcher_call_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
// Arg 1 (RDI): The infix_reverse_t context pointer.
emit_mov_reg_imm64(buf, RDI_REG, (uint64_t)context); // mov rdi, #context_addr
// Arg 2 (RSI): Pointer to the return buffer.
// Correctly determine if the hidden pointer was used for the return value.
bool return_in_memory = false;
infix_type * ret_type = context->return_type;
bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
if (ret_is_aggregate) {
if (ret_type->size > 16)
return_in_memory = true;
else {
arg_class_t ret_classes[2];
size_t num_ret_classes;
classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
return_in_memory = true;
}
}
if (is_long_double(ret_type))
return_in_memory = false;
if (return_in_memory)
// The pointer was passed to us in RDI and saved. Load it back.
emit_mov_reg_mem(buf, RSI_REG, RBP_REG, layout->return_buffer_offset); // mov rsi, [rbp + return_buffer_offset]
else
// The return buffer is a local variable. Calculate its address.
emit_lea_reg_mem(buf, RSI_REG, RBP_REG, layout->return_buffer_offset); // lea rsi, [rbp + return_buffer_offset]
// Arg 3 (RDX): Pointer to the args_array we just built.
emit_lea_reg_mem(buf, RDX_REG, RBP_REG, layout->args_array_offset); // lea rdx, [rbp + args_array_offset]
// Load the dispatcher's address into a scratch register and call it.
emit_mov_reg_imm64(buf, RAX_REG, (uint64_t)context->internal_dispatcher); // mov rax, #dispatcher_addr
emit_call_reg(buf, RAX_REG);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 5 (Reverse): Generates the epilogue for the reverse trampoline stub.
* @details Retrieves the return value from the local buffer and places it into the
* correct return registers (RAX/RDX, XMM0/XMM1) or the x87 FPU stack. Then,
* it tears down the stack frame and returns to the native caller.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param context The reverse context.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_epilogue_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
emit_vmovupd_mem_zmm(buf, R13_REG, 0, XMM0_REG);
else if (ret_type->category == INFIX_TYPE_VECTOR)
emit_movups_mem_xmm(buf, R13_REG, 0, XMM0_REG);
else
emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
}
else { // INTEGER
switch (ret_type->size) {
case 1:
emit_mov_mem_reg8(buf, R13_REG, 0, RAX_REG);
break;
case 2:
emit_mov_mem_reg16(buf, R13_REG, 0, RAX_REG);
break;
case 4:
emit_mov_mem_reg32(buf, R13_REG, 0, RAX_REG);
break;
default:
emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
break;
}
}
}
else if (num_classes == 2) {
if (classes[0] == INTEGER && classes[1] == INTEGER) {
emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
emit_mov_mem_reg(buf, R13_REG, 8, RDX_REG);
}
else if (classes[0] == SSE && classes[1] == SSE) {
emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
emit_movsd_mem_xmm(buf, R13_REG, 8, XMM1_REG);
}
else if (classes[0] == INTEGER && classes[1] == SSE) {
emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
emit_movsd_mem_xmm(buf, R13_REG, 8, XMM0_REG);
}
else { // SSE, INTEGER
emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
emit_mov_mem_reg(buf, R13_REG, 8, RAX_REG);
}
}
}
}
// Call Write-Back Handlers
for (size_t i = 0; i < layout->num_args; ++i) {
const infix_direct_arg_layout * arg = &layout->args[i];
if (arg->handler->writeback_handler) {
// Save return registers before call
emit_push_reg(buf, RAX_REG); // +8
emit_push_reg(buf, RDX_REG); // +8
emit_sub_reg_imm32(buf, RSP_REG, 32); // +32 (space for XMM0/XMM1)
// Total stack shift: +48 bytes
emit_movsd_mem_xmm(buf, RSP_REG, 0, XMM0_REG);
// Set up args for write-back call
emit_mov_reg_mem(buf, RDI_REG, R14_REG, i * sizeof(void *));
// Arg 2 (RSI): Pointer to the C data (in our scratch space)
// Offsets are relative to the *original* RSP of the body.
// Since we just pushed/subbed 48 bytes, we must add 48 to reach the original frame.
int32_t temp_offset = (int32_t)arg->location.num_regs;
emit_lea_reg_mem(buf, RSI_REG, RSP_REG, temp_offset + 48);
emit_mov_reg_imm64(buf, RDX_REG, (uint64_t)arg->type);
emit_mov_reg_imm64(buf, R10_REG, (uint64_t)arg->handler->writeback_handler);
emit_call_reg(buf, R10_REG);
// Restore return registers
emit_movsd_xmm_mem(buf, XMM0_REG, RSP_REG, 0);
emit_add_reg_imm32(buf, RSP_REG, 32);
emit_pop_reg(buf, RDX_REG);
emit_pop_reg(buf, RAX_REG);
}
}
// Standard Epilogue
if (layout->total_stack_alloc > 0)
emit_add_reg_imm32(buf, RSP_REG, (int32_t)layout->total_stack_alloc);
emit_pop_reg(buf, R15_REG);
emit_pop_reg(buf, R14_REG);
emit_pop_reg(buf, R13_REG);
emit_pop_reg(buf, R12_REG);
emit_pop_reg(buf, RBP_REG);
emit_ret(buf);
return INFIX_SUCCESS;
}
( run in 3.844 seconds using v1.01-cache-2.11-cpan-99c4e6809bf )