Affix

 view release on metacpan or  search on metacpan

infix/src/arch/aarch64/abi_arm64.c  view on Meta::CPAN

 *          allocates the necessary stack space for stack-passed arguments.
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_forward_prologue_arm64(code_buffer * buf, infix_call_frame_layout * layout) {
    // `stp x29, x30, [sp, #-16]!` : Push Frame Pointer and Link Register to the stack, pre-decrementing SP.
    emit_arm64_stp_pre_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, -16);
    // `stp x19, x20, [sp, #-16]!` : Save callee-saved registers that we will use for our context.
    emit_arm64_stp_pre_index(buf, true, X19_REG, X20_REG, SP_REG, -16);
    // `stp x21, x22, [sp, #-16]!`
    emit_arm64_stp_pre_index(buf, true, X21_REG, X22_REG, SP_REG, -16);
    // `mov x29, sp` : Establish the new Frame Pointer after all registers are pushed.
    emit_arm64_mov_reg(buf, true, X29_FP_REG, SP_REG);

    layout->prologue_size = (uint32_t)buf->size;

    // Move the trampoline's own arguments into these now-safe callee-saved registers.
    if (layout->target_fn == nullptr) {  // Unbound trampoline args: (target_fn, ret_ptr, args_ptr) in X0, X1, X2.
        emit_arm64_mov_reg(buf, true, X19_REG, X0_REG);  // mov x19, x0 (x19 will hold target_fn)
        emit_arm64_mov_reg(buf, true, X20_REG, X1_REG);  // mov x20, x1 (x20 will hold ret_ptr)
        emit_arm64_mov_reg(buf, true, X21_REG, X2_REG);  // mov x21, x2 (x21 will hold args_ptr)
    }
    else {                                               // Bound trampoline args: (ret_ptr, args_ptr) in X0, X1.
        emit_arm64_mov_reg(buf, true, X20_REG, X0_REG);  // mov x20, x0 (x20 = ret_ptr)
        emit_arm64_mov_reg(buf, true, X21_REG, X1_REG);  // mov x21, x1 (x21 = args_ptr)
    }
    // Allocate stack space for arguments that will be passed on the stack.
    if (layout->total_stack_alloc > 0)
        emit_arm64_sub_imm(buf, true, false, SP_REG, SP_REG, (uint32_t)layout->total_stack_alloc);
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 3 (Forward): Generates code to move arguments into their native locations.
 * @details This function marshals arguments from the generic `void**` array (pointed to by X21)
 *          into the correct GPRs, VPRs, or stack slots, respecting HFA rules and platform-specific
 *          variadic conventions like Apple's stack-only approach.
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @param arg_types The array of argument types.
 * @param num_args The total number of arguments.
 * @param num_fixed_args The number of fixed (non-variadic) arguments.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_forward_argument_moves_arm64(code_buffer * buf,
                                                          infix_call_frame_layout * layout,
                                                          infix_type ** arg_types,
                                                          size_t num_args,
                                                          c23_maybe_unused size_t num_fixed_args) {
    // If returning a large struct, the ABI requires the hidden pointer (our return buffer, in X20)
    // to be passed in the indirect result location register, x8.
    if (layout->return_value_in_memory)
        emit_arm64_mov_reg(buf, true, X8_REG, X20_REG);  // mov x8, x20
    // Standard AAPCS64 Quirk: For variadic calls, a GPR must contain the number of VPRs used.
    // This rule does NOT apply to Apple's ABI, so we exclude it for macOS.
#if !defined(INFIX_OS_MACOS)
    else if (layout->is_variadic)
        // Since we don't know the types of variadic arguments at compile time, the ABI
        // states the safest value is 0. A callee like printf will use this to determine
        // how to process its va_list. We use x8 as it's a volatile register.
        // A safe default is 0. Callee (like printf) uses this to interpret its va_list.
        emit_arm64_load_u64_immediate(buf, X8_REG, 0);  // mov x8, #0
#endif
    // Main argument marshalling loop.
    for (size_t i = 0; i < num_args; ++i) {
        infix_arg_location * loc = &layout->arg_locations[i];
        infix_type * type = arg_types[i];
        // Load the pointer to the current argument's data into scratch register x9.
        // x21 holds the base of the void** args_array.
        emit_arm64_ldr_imm(buf, true, X9_REG, X21_REG, (int32_t)(i * sizeof(void *)));  // ldr x9, [x21, #offset]
        switch (loc->type) {
        case ARG_LOCATION_GPR:
            {
                // Arrays passed by pointer. The data at X9 IS the pointer. Move X9 to dest reg.
                if (type->category == INFIX_TYPE_ARRAY) {
                    emit_arm64_mov_reg(buf, true, GPR_ARGS[loc->reg_index], X9_REG);
                    break;
                }

                // C requires that signed integer types smaller than a full register be
                // sign-extended when passed. We check for this case here.
                bool is_signed_lt_64 = type->category == INFIX_TYPE_PRIMITIVE && type->size < 8 &&
                    (type->meta.primitive_id == INFIX_PRIMITIVE_SINT8 ||
                     type->meta.primitive_id == INFIX_PRIMITIVE_SINT16 ||
                     type->meta.primitive_id == INFIX_PRIMITIVE_SINT32);
                if (is_signed_lt_64) {  // Use Load Register Signed Word to sign-extend a 32-bit value to 64 bits.
                    if (type->size == 1)
                        emit_arm64_ldrsb_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
                    else if (type->size == 2)
                        emit_arm64_ldrsh_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
                    else
                        emit_arm64_ldrsw_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
                }
                else {
                    // Unsigned types and small structs
                    if (type->size == 1)
                        emit_arm64_ldrb_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
                    else if (type->size == 2)
                        emit_arm64_ldrh_imm(buf, GPR_ARGS[loc->reg_index], X9_REG, 0);
                    else
                        // 4-byte or 8-byte load
                        emit_arm64_ldr_imm(
                            buf, type->size == 8, GPR_ARGS[loc->reg_index], X9_REG, 0);  // ldr xN/wN, [x9]
                }
                break;
            }
        case ARG_LOCATION_GPR_PAIR:
            // For types > 8 and <= 16 bytes passed in two GPRs (e.g., __int128_t).
            emit_arm64_ldr_imm(buf, true, GPR_ARGS[loc->reg_index], X9_REG, 0);      // ldr xN, [x9]
            emit_arm64_ldr_imm(buf, true, GPR_ARGS[loc->reg_index + 1], X9_REG, 8);  // ldr xN+1, [x9, #8]
            break;
        case ARG_LOCATION_GPR_REFERENCE:
            // For large aggregates passed by reference, the pointer *is* the argument.
            // x9 already holds this pointer, so we just move it to the target GPR.
            emit_arm64_mov_reg(buf, true, GPR_ARGS[loc->reg_index], X9_REG);  // mov xN, x9
            break;
        case ARG_LOCATION_VPR:
            if ((is_long_double(type) && type->size == 16) || (type->category == INFIX_TYPE_VECTOR && type->size == 16))
                emit_arm64_ldr_q_imm(buf, VPR_ARGS[loc->reg_index], X9_REG, 0);  // ldr qN, [x9] (128-bit load)
            else



( run in 0.732 second using v1.01-cache-2.11-cpan-5837b0d9d2c )