Affix

 view release on metacpan or  search on metacpan

infix/src/arch/x64/abi_win_x64.c  view on Meta::CPAN

    // Define the layout of our local stack variables relative to RSP after allocation.
    // [ shadow space (32) | return_buffer | gpr_save | xmm_save | args_array | (padding) | saved_args_data ]
    layout->return_buffer_offset = (int32_t)_infix_align_up(SHADOW_SPACE, max_align);
    layout->gpr_save_area_offset = layout->return_buffer_offset + (int32_t)_infix_align_up(return_size, max_align);
    layout->xmm_save_area_offset =
        layout->gpr_save_area_offset + (int32_t)_infix_align_up(gpr_reg_save_area_size, max_align);
    layout->args_array_offset =
        layout->xmm_save_area_offset + (int32_t)_infix_align_up(xmm_reg_save_area_size, max_align);

    // Ensure proper alignment for the saved arguments area.
    layout->saved_args_offset =
        (int32_t)_infix_align_up((size_t)(layout->args_array_offset + args_array_size), max_align);

    layout->max_align = (uint32_t)max_align;

    *out_layout = layout;
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 2 (Reverse): Generates the prologue for the reverse trampoline stub.
 * @details Emits the standard Windows x64 function entry code. This involves:
 *          1. Creating a standard stack frame (`push rbp; mov rbp, rsp`).
 *          2. Saving any non-volatile registers that the stub will use as scratch space
 *             (RSI and RDI in this implementation).
 *          3. **Forcing stack alignment** (`and rsp, -16`).
 *          4. Allocating all necessary local stack space for the stub's internal
 *             data structures, as calculated in the `prepare` stage.
 *
 * @param buf The code buffer to write the assembly into.
 * @param layout The blueprint containing the total stack space to allocate.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_prologue_win_x64(code_buffer * buf, infix_reverse_call_frame_layout * layout) {
    // Standard function prologue to establish a stack frame.
    emit_push_reg(buf, RBP_REG);
    // Save callee-saved registers that we might use as scratch registers.
    emit_push_reg(buf, RSI_REG);
    emit_push_reg(buf, RDI_REG);
    emit_mov_reg_reg(buf, RBP_REG, RSP_REG);

    layout->prologue_size = (uint32_t)buf->size;

    // FORCE STACK ALIGNMENT.
    // Use the maximum alignment required by the signature (16, 32, or 64).
    emit_and_reg_imm8(buf, RSP_REG, -(int8_t)layout->max_align);

    // Allocate all local stack space calculated in the prepare stage. This includes
    // space for register save areas, the return buffer, args_array, and shadow space.
    if (layout->total_stack_alloc > 0)
        emit_sub_reg_imm32(buf, RSP_REG, (int32_t)layout->total_stack_alloc);

    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 3 (Reverse): Generates code to marshal arguments into the generic `void**` array.
 * @details This function performs the "un-marshalling" of arguments from their native
 *          locations into the generic format expected by the C dispatcher.
 *
 *          The process is as follows:
 *          1.  **Save All Argument Registers:** It first saves all four potential integer
 *              argument registers (RCX, RDX, R8, R9) and all four potential floating-point
 *              registers (XMM0-3) to a dedicated save area on the local stack. This
 *              captures all register-based arguments in one place.
 *
 *          2.  **Populate `args_array`:** It then iterates through the function's expected
 *              arguments and generates code to populate the `args_array`. For each argument:
 *              a. It determines if the argument was passed in a register or on the stack.
 *              b. If passed by reference, it gets the pointer directly from the register
 *                 save area or the caller's stack.
 *              c. If passed by value, it gets a pointer *to the saved copy* of the value.
 *              d. This pointer is then stored in the correct slot of the `args_array`.
 *
 * @param buf The code buffer.
 * @param layout The blueprint containing stack offsets for the save areas and `args_array`.
 * @param context The context containing the argument type information for the callback.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_argument_marshalling_win_x64(code_buffer * buf,
                                                                  infix_reverse_call_frame_layout * layout,
                                                                  infix_reverse_t * context) {
    // Step 1: Save all potential incoming argument registers to our local stack.
    // Use 64-byte offsets to support AVX-512 in the stack layout.
    emit_mov_mem_reg(buf, RSP_REG, layout->gpr_save_area_offset + 0 * 8, RCX_REG);
    emit_mov_mem_reg(buf, RSP_REG, layout->gpr_save_area_offset + 1 * 8, RDX_REG);
    emit_mov_mem_reg(buf, RSP_REG, layout->gpr_save_area_offset + 2 * 8, R8_REG);
    emit_mov_mem_reg(buf, RSP_REG, layout->gpr_save_area_offset + 3 * 8, R9_REG);

    if (layout->max_align >= 32) {
        // AVX enabled: Save full 256 bits
        emit_vmovupd_mem_ymm(buf, RSP_REG, layout->xmm_save_area_offset + 0 * 64, XMM0_REG);
        emit_vmovupd_mem_ymm(buf, RSP_REG, layout->xmm_save_area_offset + 1 * 64, XMM1_REG);
        emit_vmovupd_mem_ymm(buf, RSP_REG, layout->xmm_save_area_offset + 2 * 64, XMM2_REG);
        emit_vmovupd_mem_ymm(buf, RSP_REG, layout->xmm_save_area_offset + 3 * 64, XMM3_REG);
    }
    else {
        // SSE only: Save 128 bits
        emit_movups_mem_xmm(buf, RSP_REG, layout->xmm_save_area_offset + 0 * 64, XMM0_REG);
        emit_movups_mem_xmm(buf, RSP_REG, layout->xmm_save_area_offset + 1 * 64, XMM1_REG);
        emit_movups_mem_xmm(buf, RSP_REG, layout->xmm_save_area_offset + 2 * 64, XMM2_REG);
        emit_movups_mem_xmm(buf, RSP_REG, layout->xmm_save_area_offset + 3 * 64, XMM3_REG);
    }

    // Step 2: Populate the `args_array` with pointers to the argument data
    bool ret_by_ref = return_value_is_by_reference(context->return_type);
    size_t arg_pos_offset = ret_by_ref ? 1 : 0;
    size_t stack_slot_offset = 0;  // Tracks arguments on the caller's stack.
    size_t current_saved_data_offset = 0;
    for (size_t i = 0; i < context->num_args; i++) {
        infix_type * current_type = context->arg_types[i];
        bool is_fp = is_float16(current_type) || is_float(current_type) || is_double(current_type) ||
            (current_type->category == INFIX_TYPE_VECTOR && current_type->size <= 16);
        bool passed_by_ref = is_passed_by_reference(current_type);
        size_t arg_pos = i + arg_pos_offset;
        bool is_variadic_arg = (i >= context->num_fixed_args);

        int32_t arg_save_loc;
        if (!passed_by_ref) {
            current_saved_data_offset = _infix_align_up(current_saved_data_offset, current_type->alignment);
            arg_save_loc = layout->saved_args_offset + (int32_t)current_saved_data_offset;



( run in 0.695 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )