Affix

 view release on metacpan or  search on metacpan

infix/src/arch/x64/abi_sysv_x64.c  view on Meta::CPAN

                    else {  // Mixed GPR and SSE
                        if (classes[0] == INTEGER) {
                            layout->arg_locations[i].type = ARG_LOCATION_INTEGER_SSE_PAIR;
                            layout->arg_locations[i].reg_index = gpr_count;
                            layout->arg_locations[i].reg_index2 = xmm_count;
                        }
                        else {
                            layout->arg_locations[i].type = ARG_LOCATION_SSE_INTEGER_PAIR;
                            layout->arg_locations[i].reg_index = xmm_count;
                            layout->arg_locations[i].reg_index2 = gpr_count;
                        }
                    }
                    gpr_count += gpr_needed;
                    xmm_count += xmm_needed;
                    placed_in_register = true;
                }
            }
        }
        // Fallback to stack
        if (!placed_in_register) {
            layout->arg_locations[i].type = ARG_LOCATION_STACK;
            // Align current offset to the argument's natural alignment requirements.
            // SysV requires 16-byte alignment for long double, __int128, and __m128 on the stack.
            size_t align = type->alignment;
            if (align < 8)
                align = 8;  // Stack slots are at least 8 bytes
            current_stack_offset = (current_stack_offset + (align - 1)) & ~(align - 1);  // Align up
            layout->arg_locations[i].stack_offset = current_stack_offset;
            current_stack_offset += (type->size + 7) & ~7;  // Align to 8 bytes.
            layout->num_stack_args++;
        }
    }
    // Finalize the layout properties.
    layout->num_gpr_args = gpr_count;
    layout->num_xmm_args = xmm_count;
    // The total stack space for arguments must be 16-byte aligned before the call.
    layout->total_stack_alloc = (current_stack_offset + 15) & ~15;
    // Safety check against excessive stack allocation.
    if (layout->total_stack_alloc > INFIX_MAX_STACK_ALLOC) {
        *out_layout = nullptr;
        return INFIX_ERROR_LAYOUT_FAILED;
    }
    *out_layout = layout;
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 2 (Forward): Generates the function prologue for the System V trampoline.
 * @details Sets up a standard stack frame, saves registers for the trampoline's context,
 *          and allocates stack space for arguments.
 * @param buf The code buffer.
 * @param layout The call frame layout.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_forward_prologue_sysv_x64(code_buffer * buf, infix_call_frame_layout * layout) {
    // Standard Function Prologue
    emit_push_reg(buf, RBP_REG);              // push rbp
    emit_mov_reg_reg(buf, RBP_REG, RSP_REG);  // mov rbp, rsp
    // Save Callee-Saved Registers
    // We will use these registers to store our context (target_fn, ret_ptr, args_ptr)
    // across the native function call, so we must save their original values first.
    emit_push_reg(buf, R12_REG);  // push r12
    emit_push_reg(buf, R13_REG);  // push r13
    emit_push_reg(buf, R14_REG);  // push r14
    emit_push_reg(buf, R15_REG);  // push r15
    // Move Trampoline Arguments to Persistent Registers
    if (layout->target_fn == nullptr) {  // Unbound trampoline
        // The trampoline is called with (target_fn, ret_ptr, args_ptr) in RDI, RSI, RDX.
        // We move these into our saved callee-saved registers to protect them.
        emit_mov_reg_reg(buf, R12_REG, RDI_REG);  // r12 = target_fn
        emit_mov_reg_reg(buf, R13_REG, RSI_REG);  // r13 = ret_ptr
        emit_mov_reg_reg(buf, R14_REG, RDX_REG);  // r14 = args_ptr
    }
    else {  // Bound trampoline
        // The trampoline is called with (ret_ptr, args_ptr) in RDI, RSI.
        emit_mov_reg_reg(buf, R13_REG, RDI_REG);  // r13 = ret_ptr
        emit_mov_reg_reg(buf, R14_REG, RSI_REG);  // r14 = args_ptr
    }
    // Allocate Stack Space
    // If any arguments are passed on the stack, allocate space for them.
    // The ABI requires this space to be 16-byte aligned.
    if (layout->total_stack_alloc > 0)
        emit_sub_reg_imm32(buf, RSP_REG, layout->total_stack_alloc);
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 3 (Forward): Generates code to move arguments from the `void**` array
 *          into their correct native locations (registers or stack).
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @param arg_types The array of argument types.
 * @param num_args Total number of arguments.
 * @param num_fixed_args Number of fixed arguments.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_forward_argument_moves_sysv_x64(code_buffer * buf,
                                                             infix_call_frame_layout * layout,
                                                             infix_type ** arg_types,
                                                             size_t num_args,
                                                             c23_maybe_unused size_t num_fixed_args) {
    // If returning a large struct, the hidden pointer (stored in r13) must be moved to RDI.
    if (layout->return_value_in_memory)
        emit_mov_reg_reg(buf, GPR_ARGS[0], R13_REG);  // mov rdi, r13
    // Marshall Register Arguments
    // Loop over all arguments that are passed in registers.
    for (size_t i = 0; i < num_args; ++i) {
        infix_arg_location * loc = &layout->arg_locations[i];
        if (loc->type == ARG_LOCATION_STACK)
            continue;  // Handle stack arguments in a separate pass.
        // Load the pointer to the argument's data into a scratch register (r15).
        // r14 holds the base of the `void** args_array`.
        // r15 = args_array[i]
        emit_mov_reg_mem(buf, R15_REG, R14_REG, i * sizeof(void *));
        switch (loc->type) {
        case ARG_LOCATION_GPR:
            {
                infix_type * current_type = arg_types[i];
                // An array parameter decays to a pointer. The `args` array for it
                // contains a pointer TO the array data. We must pass this pointer itself,
                // not the data it points to. R15 already holds this pointer.

infix/src/arch/x64/abi_sysv_x64.c  view on Meta::CPAN

                            emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                    }
                    else if (is_float16(current_type)) {
                        // movd eax, xmm_reg ; mov [rbp + arg_save_loc], ax
                        emit_movq_gpr_xmm(buf, RAX_REG, XMM_ARGS[xmm_idx++]);
                        emit_mov_mem_reg16(buf, RBP_REG, arg_save_loc, RAX_REG);
                    }
                    else if (is_float(current_type))
                        emit_movss_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                    else
                        emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                }
                else
                    is_from_stack = true;
            else if (gpr_idx < NUM_GPR_ARGS)
                emit_mov_mem_reg(buf, RBP_REG, arg_save_loc, GPR_ARGS[gpr_idx++]);
            else
                is_from_stack = true;
        }
        else if (num_classes == 2) {
            size_t gprs_needed = (classes[0] == INTEGER) + (classes[1] == INTEGER);
            size_t xmms_needed = (classes[0] == SSE) + (classes[1] == SSE);
            if (gpr_idx + gprs_needed <= NUM_GPR_ARGS && xmm_idx + xmms_needed <= NUM_XMM_ARGS) {
                if (classes[0] == SSE)
                    emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                else
                    emit_mov_mem_reg(buf, RBP_REG, arg_save_loc, GPR_ARGS[gpr_idx++]);
                if (classes[1] == SSE)
                    emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc + 8, XMM_ARGS[xmm_idx++]);
                else
                    emit_mov_mem_reg(buf, RBP_REG, arg_save_loc + 8, GPR_ARGS[gpr_idx++]);
            }
            else
                is_from_stack = true;
        }
        if (is_from_stack) {
            for (size_t offset = 0; offset < current_type->size; offset += 8) {
                emit_mov_reg_mem(buf, RAX_REG, RBP_REG, stack_arg_offset + offset);
                emit_mov_mem_reg(buf, RBP_REG, arg_save_loc + offset, RAX_REG);
            }
            stack_arg_offset += (current_type->size + 7) & ~7;
        }
        emit_lea_reg_mem(buf, RAX_REG, RBP_REG, arg_save_loc);
        emit_mov_mem_reg(buf, RBP_REG, layout->args_array_offset + i * sizeof(void *), RAX_REG);
        current_saved_data_offset += current_type->size;
    }
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 4 (Reverse): Generates the code to call the high-level C dispatcher function.
 * @details Emits code to load the dispatcher's arguments into the correct registers
 *          according to the System V ABI, then calls the dispatcher.
 *
 *          The C dispatcher's signature is:
 *          `void fn(infix_reverse_t* context, void* return_value_ptr, void** args_array)`
 *
 *          The generated code performs the following argument setup:
 *          1. `RDI` (Arg 1): The `context` pointer (a 64-bit immediate).
 *          2. `RSI` (Arg 2): The pointer to the return value buffer. This is either a
 *             pointer to local stack space, or the original pointer passed by the
 *             caller in RDI if the function returns a large struct by reference.
 *          3. `RDX` (Arg 3): The pointer to the `args_array` on the local stack.
 *          4. The address of the dispatcher function itself is loaded into a scratch
 *             register (`RAX`), which is then called.
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @param context The reverse context.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_dispatcher_call_sysv_x64(code_buffer * buf,
                                                              infix_reverse_call_frame_layout * layout,
                                                              infix_reverse_t * context) {
    // Arg 1 (RDI): The infix_reverse_t context pointer.
    emit_mov_reg_imm64(buf, RDI_REG, (uint64_t)context);  // mov rdi, #context_addr
    // Arg 2 (RSI): Pointer to the return buffer.
    // Correctly determine if the hidden pointer was used for the return value.
    bool return_in_memory = false;
    infix_type * ret_type = context->return_type;
    bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
                             ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
    if (ret_is_aggregate) {
        if (ret_type->size > 16)
            return_in_memory = true;
        else {
            arg_class_t ret_classes[2];
            size_t num_ret_classes;
            classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
            if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
                return_in_memory = true;
        }
    }
    if (is_long_double(ret_type))
        return_in_memory = false;
    if (return_in_memory)
        // The pointer was passed to us in RDI and saved. Load it back.
        emit_mov_reg_mem(buf, RSI_REG, RBP_REG, layout->return_buffer_offset);  // mov rsi, [rbp + return_buffer_offset]
    else
        // The return buffer is a local variable. Calculate its address.
        emit_lea_reg_mem(buf, RSI_REG, RBP_REG, layout->return_buffer_offset);  // lea rsi, [rbp + return_buffer_offset]
    // Arg 3 (RDX): Pointer to the args_array we just built.
    emit_lea_reg_mem(buf, RDX_REG, RBP_REG, layout->args_array_offset);  // lea rdx, [rbp + args_array_offset]
    // Load the dispatcher's address into a scratch register and call it.
    emit_mov_reg_imm64(buf, RAX_REG, (uint64_t)context->internal_dispatcher);  // mov rax, #dispatcher_addr
    emit_call_reg(buf, RAX_REG);
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 5 (Reverse): Generates the epilogue for the reverse trampoline stub.
 * @details Retrieves the return value from the local buffer and places it into the
 *          correct return registers (RAX/RDX, XMM0/XMM1) or the x87 FPU stack. Then,
 *          it tears down the stack frame and returns to the native caller.
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @param context The reverse context.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_epilogue_sysv_x64(code_buffer * buf,
                                                       infix_reverse_call_frame_layout * layout,
                                                       infix_reverse_t * context) {

infix/src/arch/x64/abi_sysv_x64.c  view on Meta::CPAN

                        emit_vmovupd_mem_zmm(buf, R13_REG, 0, XMM0_REG);
                    else if (ret_type->category == INFIX_TYPE_VECTOR)
                        emit_movups_mem_xmm(buf, R13_REG, 0, XMM0_REG);
                    else
                        emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
                }
                else {  // INTEGER
                    switch (ret_type->size) {
                    case 1:
                        emit_mov_mem_reg8(buf, R13_REG, 0, RAX_REG);
                        break;
                    case 2:
                        emit_mov_mem_reg16(buf, R13_REG, 0, RAX_REG);
                        break;
                    case 4:
                        emit_mov_mem_reg32(buf, R13_REG, 0, RAX_REG);
                        break;
                    default:
                        emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
                        break;
                    }
                }
            }
            else if (num_classes == 2) {
                if (classes[0] == INTEGER && classes[1] == INTEGER) {
                    emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
                    emit_mov_mem_reg(buf, R13_REG, 8, RDX_REG);
                }
                else if (classes[0] == SSE && classes[1] == SSE) {
                    emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
                    emit_movsd_mem_xmm(buf, R13_REG, 8, XMM1_REG);
                }
                else if (classes[0] == INTEGER && classes[1] == SSE) {
                    emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
                    emit_movsd_mem_xmm(buf, R13_REG, 8, XMM0_REG);
                }
                else {  // SSE, INTEGER
                    emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
                    emit_mov_mem_reg(buf, R13_REG, 8, RAX_REG);
                }
            }
        }
    }

    // Call Write-Back Handlers
    for (size_t i = 0; i < layout->num_args; ++i) {
        const infix_direct_arg_layout * arg = &layout->args[i];
        if (arg->handler->writeback_handler) {
            // Save return registers before call
            emit_push_reg(buf, RAX_REG);           // +8
            emit_push_reg(buf, RDX_REG);           // +8
            emit_sub_reg_imm32(buf, RSP_REG, 32);  // +32 (space for XMM0/XMM1)
            // Total stack shift: +48 bytes

            emit_movsd_mem_xmm(buf, RSP_REG, 0, XMM0_REG);

            // Set up args for write-back call
            emit_mov_reg_mem(buf, RDI_REG, R14_REG, i * sizeof(void *));

            // Arg 2 (RSI): Pointer to the C data (in our scratch space)
            // Offsets are relative to the *original* RSP of the body.
            // Since we just pushed/subbed 48 bytes, we must add 48 to reach the original frame.
            int32_t temp_offset = (int32_t)arg->location.num_regs;
            emit_lea_reg_mem(buf, RSI_REG, RSP_REG, temp_offset + 48);

            emit_mov_reg_imm64(buf, RDX_REG, (uint64_t)arg->type);

            emit_mov_reg_imm64(buf, R10_REG, (uint64_t)arg->handler->writeback_handler);
            emit_call_reg(buf, R10_REG);

            // Restore return registers
            emit_movsd_xmm_mem(buf, XMM0_REG, RSP_REG, 0);
            emit_add_reg_imm32(buf, RSP_REG, 32);
            emit_pop_reg(buf, RDX_REG);
            emit_pop_reg(buf, RAX_REG);
        }
    }

    // Standard Epilogue
    if (layout->total_stack_alloc > 0)
        emit_add_reg_imm32(buf, RSP_REG, (int32_t)layout->total_stack_alloc);

    emit_pop_reg(buf, R15_REG);
    emit_pop_reg(buf, R14_REG);
    emit_pop_reg(buf, R13_REG);
    emit_pop_reg(buf, R12_REG);

    emit_pop_reg(buf, RBP_REG);
    emit_ret(buf);

    return INFIX_SUCCESS;
}



( run in 3.844 seconds using v1.01-cache-2.11-cpan-99c4e6809bf )