Affix

 view release on metacpan or  search on metacpan

infix/src/arch/x64/abi_win_x64.c  view on Meta::CPAN

                        else
                            emit_movsd_xmm_mem(buf, XMM4_REG, RSP_REG, reg_source_offset);
                        emit_movsd_mem_xmm(buf, RSP_REG, arg_save_loc, XMM4_REG);
                    }
                }
                else {
                    reg_source_offset = layout->gpr_save_area_offset + (int32_t)arg_pos * 8;
                    if (current_type->size == 1) {
                        emit_movzx_reg64_mem8(buf, RAX_REG, RSP_REG, reg_source_offset);
                        emit_mov_mem_reg8(buf, RSP_REG, arg_save_loc, RAX_REG);
                    }
                    else if (current_type->size == 2 || is_float16(current_type)) {
                        emit_movzx_reg64_mem16(buf, RAX_REG, RSP_REG, reg_source_offset);
                        emit_mov_mem_reg16(buf, RSP_REG, arg_save_loc, RAX_REG);
                    }
                    else {
                        emit_mov_reg_mem(buf, RAX_REG, RSP_REG, reg_source_offset);
                        emit_mov_mem_reg(buf, RSP_REG, arg_save_loc, RAX_REG);
                    }
                }
                emit_lea_reg_mem(buf, RAX_REG, RSP_REG, arg_save_loc);
            }
            // Store the final pointer into the args_array.
            emit_mov_mem_reg(buf, RSP_REG, layout->args_array_offset + (int32_t)i * sizeof(void *), RAX_REG);
        }
        else {
            // Argument was passed on the caller's stack.
            // RBP points to saved RDI.
            // [RBP] -> Saved RDI
            // [RBP+8] -> Saved RSI
            // [RBP+16] -> Saved RBP
            // [RBP+24] -> Return Address
            // [RBP+32..63] -> Shadow Space (32 bytes)
            // [RBP+64..] -> Stack arguments
            int32_t caller_stack_offset = 32 + SHADOW_SPACE + (int32_t)(stack_slot_offset * 8);
            if (passed_by_ref)
                emit_mov_reg_mem(buf, RAX_REG, RBP_REG, caller_stack_offset);
            else
                emit_lea_reg_mem(buf, RAX_REG, RBP_REG, caller_stack_offset);
            emit_mov_mem_reg(buf, RSP_REG, layout->args_array_offset + (int32_t)i * sizeof(void *), RAX_REG);
            stack_slot_offset += (passed_by_ref ? 8 : (current_type->size + 7)) / 8;
        }
        if (!passed_by_ref)
            current_saved_data_offset += current_type->size;
    }
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 4 (Reverse): Generates the code to call the high-level C dispatcher function.
 * @details This function emits the instructions to load the three arguments for the C
 *          dispatcher into the correct registers according to the Windows x64 ABI,
 *          then calls the dispatcher.
 *
 *          The C dispatcher's signature is:
 *          `void fn(infix_reverse_t* context, void* return_value_ptr, void** args_array)`
 *
 *          The generated code performs the following argument setup:
 *          1. `RCX` (Arg 1): The `context` pointer (a 64-bit immediate).
 *          2. `RDX` (Arg 2): The pointer to the return value buffer. This is either a
 *             pointer to local stack space, or the original pointer passed by the
 *             caller in RCX if the function returns a large struct by reference.
 *          3. `R8` (Arg 3): The pointer to the `args_array` on the local stack.
 *          4. The address of the dispatcher function itself is loaded into `R9`,
 *             which is then called.
 * @param buf The code buffer.
 * @param layout The blueprint containing stack offsets.
 * @param context The context, containing the dispatcher's address.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_dispatcher_call_win_x64(code_buffer * buf,
                                                             infix_reverse_call_frame_layout * layout,
                                                             infix_reverse_t * context) {
    // Arg 1 (RCX): Load the `context` pointer.
    emit_mov_reg_imm64(buf, RCX_REG, (uint64_t)context);
    // Arg 2 (RDX): Load the pointer to the return value buffer.
    if (return_value_is_by_reference(context->return_type))
        // If the return is by reference, the original caller passed the destination
        // pointer in RCX. We saved it in our GPR save area (Step 1 of marshalling).
        emit_mov_reg_mem(buf, RDX_REG, RSP_REG, layout->gpr_save_area_offset + 0 * 8);
    else
        // Otherwise, the return buffer is on our local stack. Load its address.
        emit_lea_reg_mem(buf, RDX_REG, RSP_REG, layout->return_buffer_offset);

    // Arg 3 (R8): Load the address of the `args_array` on our local stack.
    emit_lea_reg_mem(buf, R8_REG, RSP_REG, layout->args_array_offset);

    if (layout->max_align >= 32)
        emit_vzeroupper(buf);

    // Load the C dispatcher's address into a scratch register (R9) and call it.
    emit_mov_reg_imm64(buf, R9_REG, (uint64_t)context->internal_dispatcher);
    emit_call_reg(buf, R9_REG);
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 5 (Reverse): Generates the epilogue for the reverse trampoline stub.
 * @details After the C dispatcher returns, this code is responsible for the final steps
 *          of the reverse trampoline. It retrieves the return value from the buffer on
 *          the stub's local stack and places it into the correct native return register
 *          (`RAX` or `XMM0`) as required by the Windows x64 ABI.
 *
 *          It then restores the stack pointer using `LEA RSP, [RBP - 16]` to undo the
 *          dynamic alignment performed in the prologue, restores saved registers, and returns.
 *
 * @param buf The code buffer.
 * @param layout The blueprint containing stack offsets.
 * @param context The context containing the return type information.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_epilogue_win_x64(code_buffer * buf,
                                                      infix_reverse_call_frame_layout * layout,
                                                      infix_reverse_t * context) {
    if (layout->max_align >= 32) {
        // Only call VZEROUPPER if we aren't returning a value in YMM/ZMM registers,
        // as VZEROUPPER would zero the upper half of the result.
        bool returning_large_vector =
            (context->return_type->category == INFIX_TYPE_VECTOR && context->return_type->size >= 32 &&
             !return_value_is_by_reference(context->return_type));
        if (!returning_large_vector)
            emit_vzeroupper(buf);
    }

    // Handle the return value after the dispatcher returns.
    if (context->return_type->category != INFIX_TYPE_VOID) {
        if (return_value_is_by_reference(context->return_type))
            // The return value was written directly via the hidden pointer.
            // The ABI requires this original pointer (which was in RCX) to be returned in RAX.
            emit_mov_reg_mem(buf, RAX_REG, RSP_REG, layout->gpr_save_area_offset + 0 * 8);
        else {
            // The return value is in our local buffer. Load it into the correct return register.
#if !defined(INFIX_COMPILER_MSVC)
            if (context->return_type->size == 16 && context->return_type->category == INFIX_TYPE_PRIMITIVE)
                // GCC/Clang on Windows returns 128-bit integers and long double in XMM0.
                emit_movups_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
            else
#endif
                if (context->return_type->category == INFIX_TYPE_VECTOR) {
                if (context->return_type->size == 64)
                    emit_vmovupd_zmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
                else if (context->return_type->size == 32)
                    emit_vmovupd_ymm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
                else  // size 16
                    emit_movups_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
            }
            else if (is_float16(context->return_type)) {
                // Half-precision is returned in the low 16 bits of XMM0.
                emit_movzx_reg64_mem16(buf, RAX_REG, RSP_REG, layout->return_buffer_offset);
                emit_movq_xmm_gpr(buf, XMM0_REG, RAX_REG);
            }
            else if (is_float(context->return_type))
                emit_movss_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
            else if (is_double(context->return_type))
                emit_movsd_xmm_mem(buf, XMM0_REG, RSP_REG, layout->return_buffer_offset);
            else
                // All other by-value types (integers, pointers, small structs) are returned in RAX.
                emit_mov_reg_mem(buf, RAX_REG, RSP_REG, layout->return_buffer_offset);
        }
    }
    // Restore stack pointer to the saved registers area.
    // RBP was set to RSP after all pushes.
    // mov rsp, rbp
    emit_mov_reg_reg(buf, RSP_REG, RBP_REG);

    emit_pop_reg(buf, RDI_REG);
    emit_pop_reg(buf, RSI_REG);
    emit_pop_reg(buf, RBP_REG);

    emit_ret(buf);
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 1 (Direct): Analyzes a signature and creates a call frame layout for Windows x64.
 * @details This function defines the on-stack layout for a direct marshalling trampoline.
 * It allocates space for outgoing stack arguments, a scratch buffer for each aggregate
 * marshaller, and a temporary save slot for each scalar marshaller.
 */
static infix_status prepare_direct_forward_call_frame_win_x64(infix_arena_t * arena,
                                                              infix_direct_call_frame_layout ** out_layout,
                                                              infix_type * ret_type,
                                                              infix_type ** arg_types,
                                                              size_t num_args,
                                                              infix_direct_arg_handler_t * handlers,
                                                              void * target_fn) {
    infix_direct_call_frame_layout * layout =
        infix_arena_calloc(arena, 1, sizeof(infix_direct_call_frame_layout), _Alignof(infix_direct_call_frame_layout));
    if (!layout)



( run in 2.279 seconds using v1.01-cache-2.11-cpan-ceb78f64989 )