Affix
view release on metacpan or search on metacpan
infix/src/arch/x64/abi_win_x64.c view on Meta::CPAN
else
emit_movsd_xmm_mem(buf, XMM4_REG, RSP_REG, reg_source_offset);
emit_movsd_mem_xmm(buf, RSP_REG, arg_save_loc, XMM4_REG);
}
}
else {
reg_source_offset = layout->gpr_save_area_offset + (int32_t)arg_pos * 8;
if (current_type->size == 1) {
emit_movzx_reg64_mem8(buf, RAX_REG, RSP_REG, reg_source_offset);
emit_mov_mem_reg8(buf, RSP_REG, arg_save_loc, RAX_REG);
}
else if (current_type->size == 2 || is_float16(current_type)) {
emit_movzx_reg64_mem16(buf, RAX_REG, RSP_REG, reg_source_offset);
emit_mov_mem_reg16(buf, RSP_REG, arg_save_loc, RAX_REG);
}
else {
emit_mov_reg_mem(buf, RAX_REG, RSP_REG, reg_source_offset);
emit_mov_mem_reg(buf, RSP_REG, arg_save_loc, RAX_REG);
}
}
emit_lea_reg_mem(buf, RAX_REG, RSP_REG, arg_save_loc);
}
// Store the final pointer into the args_array.
emit_mov_mem_reg(buf, RSP_REG, layout->args_array_offset + (int32_t)i * sizeof(void *), RAX_REG);
}
else {
// Argument was passed on the caller's stack.
// RBP points to saved RDI.
// [RBP] -> Saved RDI
// [RBP+8] -> Saved RSI
// [RBP+16] -> Saved RBP
// [RBP+24] -> Return Address
// [RBP+32..63] -> Shadow Space (32 bytes)
// [RBP+64..] -> Stack arguments
int32_t caller_stack_offset = 32 + SHADOW_SPACE + (int32_t)(stack_slot_offset * 8);
if (passed_by_ref)
emit_mov_reg_mem(buf, RAX_REG, RBP_REG, caller_stack_offset);
else
emit_lea_reg_mem(buf, RAX_REG, RBP_REG, caller_stack_offset);
emit_mov_mem_reg(buf, RSP_REG, layout->args_array_offset + (int32_t)i * sizeof(void *), RAX_REG);
stack_slot_offset += (passed_by_ref ? 8 : (current_type->size + 7)) / 8;
}
if (!passed_by_ref)
current_saved_data_offset += current_type->size;
}
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 4 (Reverse): Generates the code to call the high-level C dispatcher function.
* @details This function emits the instructions to load the three arguments for the C
* dispatcher into the correct registers according to the Windows x64 ABI,
* then calls the dispatcher.
*
* The C dispatcher's signature is:
* `void fn(infix_reverse_t* context, void* return_value_ptr, void** args_array)`
*
* The generated code performs the following argument setup:
* 1. `RCX` (Arg 1): The `context` pointer (a 64-bit immediate).
* 2. `RDX` (Arg 2): The pointer to the return value buffer. This is either a
* pointer to local stack space, or the original pointer passed by the
* caller in RCX if the function returns a large struct by reference.
* 3. `R8` (Arg 3): The pointer to the `args_array` on the local stack.
* 4. The address of the dispatcher function itself is loaded into `R9`,
* which is then called.
* @param buf The code buffer.
* @param layout The blueprint containing stack offsets.
* @param context The context, containing the dispatcher's address.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_dispatcher_call_win_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
// Arg 1 (RCX): Load the `context` pointer.
emit_mov_reg_imm64(buf, RCX_REG, (uint64_t)context);
// Arg 2 (RDX): Load the pointer to the return value buffer.
if (return_value_is_by_reference(context->return_type))
// If the return is by reference, the original caller passed the destination
// pointer in RCX. We saved it in our GPR save area (Step 1 of marshalling).
emit_mov_reg_mem(buf, RDX_REG, RSP_REG, layout->gpr_save_area_offset + 0 * 8);
else
// Otherwise, the return buffer is on our local stack. Load its address.
emit_lea_reg_mem(buf, RDX_REG, RSP_REG, layout->return_buffer_offset);
// Arg 3 (R8): Load the address of the `args_array` on our local stack.
emit_lea_reg_mem(buf, R8_REG, RSP_REG, layout->args_array_offset);
if (layout->max_align >= 32)
emit_vzeroupper(buf);
// Load the C dispatcher's address into a scratch register (R9) and call it.
emit_mov_reg_imm64(buf, R9_REG, (uint64_t)context->internal_dispatcher);
emit_call_reg(buf, R9_REG);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 5 (Reverse): Generates the epilogue for the reverse trampoline stub.
* @details After the C dispatcher returns, this code is responsible for the final steps
* of the reverse trampoline. It retrieves the return value from the buffer on
* the stub's local stack and places it into the correct native return register
* (`RAX` or `XMM0`) as required by the Windows x64 ABI.
*
* It then restores the stack pointer using `LEA RSP, [RBP - 16]` to undo the
* dynamic alignment performed in the prologue, restores saved registers, and returns.
*
* @param buf The code buffer.
* @param layout The blueprint containing stack offsets.
* @param context The context containing the return type information.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_epilogue_win_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
if (layout->max_align >= 32) {
// Only call VZEROUPPER if we aren't returning a value in YMM/ZMM registers,
// as VZEROUPPER would zero the upper half of the result.
bool returning_large_vector =
(context->return_type->category == INFIX_TYPE_VECTOR && context->return_type->size >= 32 &&
!return_value_is_by_reference(context->return_type));
if (!returning_large_vector)
emit_vzeroupper(buf);
}
// Handle the return value after the dispatcher returns.
if (context->return_type->category != INFIX_TYPE_VOID) {
if (return_value_is_by_reference(context->return_type))
// The return value was written directly via the hidden pointer.
// The ABI requires this original pointer (which was in RCX) to be returned in RAX.
emit_mov_reg_mem(buf, RAX_REG, RSP_REG, layout->gpr_save_area_offset + 0 * 8);
else {
// The return value is in our local buffer. Load it into the correct return register.
#if !defined(INFIX_COMPILER_MSVC)
if (context->return_type->size == 16 && context->return_type->category == INFIX_TYPE_PRIMITIVE)
// GCC/Clang on Windows returns 128-bit integers and long double in XMM0.
emit_movups_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
else
#endif
if (context->return_type->category == INFIX_TYPE_VECTOR) {
if (context->return_type->size == 64)
emit_vmovupd_zmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
else if (context->return_type->size == 32)
emit_vmovupd_ymm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
else // size 16
emit_movups_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
}
else if (is_float16(context->return_type)) {
// Half-precision is returned in the low 16 bits of XMM0.
emit_movzx_reg64_mem16(buf, RAX_REG, RSP_REG, layout->return_buffer_offset);
emit_movq_xmm_gpr(buf, XMM0_REG, RAX_REG);
}
else if (is_float(context->return_type))
emit_movss_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
else if (is_double(context->return_type))
emit_movsd_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
else
// All other by-value types (integers, pointers, small structs) are returned in RAX.
emit_mov_reg_mem(buf, RAX_REG, RSP_REG, layout->return_buffer_offset);
}
}
// Restore stack pointer to the saved registers area.
// RBP was set to RSP after all pushes.
// mov rsp, rbp
emit_mov_reg_reg(buf, RSP_REG, RBP_REG);
emit_pop_reg(buf, RDI_REG);
emit_pop_reg(buf, RSI_REG);
emit_pop_reg(buf, RBP_REG);
emit_ret(buf);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 1 (Direct): Analyzes a signature and creates a call frame layout for Windows x64.
* @details This function defines the on-stack layout for a direct marshalling trampoline.
* It allocates space for outgoing stack arguments, a scratch buffer for each aggregate
* marshaller, and a temporary save slot for each scalar marshaller.
*/
static infix_status prepare_direct_forward_call_frame_win_x64(infix_arena_t * arena,
infix_direct_call_frame_layout ** out_layout,
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
infix_direct_arg_handler_t * handlers,
void * target_fn) {
infix_direct_call_frame_layout * layout =
infix_arena_calloc(arena, 1, sizeof(infix_direct_call_frame_layout), _Alignof(infix_direct_call_frame_layout));
if (!layout)
( run in 2.279 seconds using v1.01-cache-2.11-cpan-ceb78f64989 )