Affix

 view release on metacpan or  search on metacpan

infix/src/arch/aarch64/abi_arm64.c  view on Meta::CPAN

 */
static infix_status generate_forward_epilogue_arm64(code_buffer * buf,
                                                    infix_call_frame_layout * layout,
                                                    infix_type * ret_type) {
    layout->epilogue_offset = (uint32_t)buf->size;
    // If the function returns a value and it wasn't returned via hidden pointer...
    if (ret_type->category != INFIX_TYPE_VOID && !layout->return_value_in_memory) {
        // ...copy the result from the appropriate return register(s) into the user's return buffer (pointer in X20).
        const infix_type * hfa_base = nullptr;

        // The order of these checks is critical. Handle the most specific cases first.
        // On Apple Silicon, long double is 8 bytes. Only emit 128-bit store if size is actually 16.
        if ((is_long_double(ret_type) && ret_type->size == 16) ||
            (ret_type->category == INFIX_TYPE_VECTOR && ret_type->size == 16))
            emit_arm64_str_q_imm(buf, V0_REG, X20_REG, 0);  // str q0, [x20]
        else if (is_hfa(ret_type, &hfa_base)) {
            size_t num_elements = ret_type->size / hfa_base->size;
            for (size_t i = 0; i < num_elements; ++i)
                emit_arm64_str_vpr(buf,
                                   hfa_base->size,
                                   VPR_ARGS[i],
                                   X20_REG,
                                   (int32_t)(i * hfa_base->size));  // Explicit cast
        }
        else if (is_float16(ret_type))
            emit_arm64_str_vpr(buf, 2, V0_REG, X20_REG, 0);  // str h0, [x20]
        else if (is_float(ret_type))
            emit_arm64_str_vpr(buf, 4, V0_REG, X20_REG, 0);  // str s0, [x20]
        // Handle standard double OR 8-byte long double (macOS)
        else if (is_double(ret_type) || (is_long_double(ret_type) && ret_type->size == 8))
            emit_arm64_str_vpr(buf, 8, V0_REG, X20_REG, 0);  // str d0, [x20]
        else {
            // Integer, pointer, or small aggregate return.
            switch (ret_type->size) {
            case 1:
                emit_arm64_strb_imm(buf, X0_REG, X20_REG, 0);
                break;
            case 2:
                emit_arm64_strh_imm(buf, X0_REG, X20_REG, 0);
                break;
            case 4:
                emit_arm64_str_imm(buf, false, X0_REG, X20_REG, 0);
                break;
            case 8:
                emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
                break;
            case 16:  // For __int128_t or small structs
                emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
                emit_arm64_str_imm(buf, true, X1_REG, X20_REG, 8);
                break;
            default:
                break;
            }
        }
    }
    // Deallocate stack space and restore registers.
    // X29 was set to SP after all pushes.
    // mov sp, x29
    emit_arm64_mov_reg(buf, true, SP_REG, X29_FP_REG);

    emit_arm64_ldp_post_index(buf, true, X21_REG, X22_REG, SP_REG, 16);        // ldp x21, x22, [sp], #16
    emit_arm64_ldp_post_index(buf, true, X19_REG, X20_REG, SP_REG, 16);        // ldp x19, x20, [sp], #16
    emit_arm64_ldp_post_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16);  // ldp x29, x30, [sp], #16
    emit_arm64_ret(buf, X30_LR_REG);                                           // ret
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 1 (Reverse): Calculates the stack layout for a reverse trampoline stub.
 * @details This function determines the total stack space the JIT-compiled stub will need
 *          for its local variables. This space includes:
 *          1. A buffer to store the return value before it's placed in registers.
 *          2. An array of `void*` pointers (`args_array`) to pass to the C dispatcher.
 *          3. A contiguous data area where the contents of all incoming arguments
 *             (from registers or the caller's stack) will be saved.
 *
 * @param arena The temporary arena for allocations.
 * @param[out] out_layout The resulting reverse call frame layout blueprint, populated with offsets.
 * @param context The reverse trampoline context with full signature information.
 * @return `INFIX_SUCCESS` on success, or an error code on failure.
 */
static infix_status prepare_reverse_call_frame_arm64(infix_arena_t * arena,
                                                     infix_reverse_call_frame_layout ** out_layout,
                                                     infix_reverse_t * context) {
    infix_reverse_call_frame_layout * layout = infix_arena_calloc(
        arena, 1, sizeof(infix_reverse_call_frame_layout), _Alignof(infix_reverse_call_frame_layout));
    if (!layout)
        return INFIX_ERROR_ALLOCATION_FAILED;
    // The return buffer must be large enough and aligned for any type.
    size_t return_size = (context->return_type->size + 15) & ~15;
    // The array of pointers that will be passed to the C dispatcher.
    size_t args_array_size = (context->num_args * sizeof(void *) + 15) & ~15;
    // The contiguous block where we will save the actual argument data.
    size_t saved_args_data_size = 0;
    for (size_t i = 0; i < context->num_args; ++i) {
        if (context->arg_types[i]->size > INFIX_MAX_ARG_SIZE) {
            *out_layout = nullptr;
            return INFIX_ERROR_LAYOUT_FAILED;
        }
        // Ensure each saved argument slot is 16-byte aligned for simplicity and correctness.
        saved_args_data_size += (context->arg_types[i]->size + 15) & ~15;
    }
    // Security check against excessively large aggregate argument data size.
    if (saved_args_data_size > INFIX_MAX_ARG_SIZE) {
        *out_layout = nullptr;
        return INFIX_ERROR_LAYOUT_FAILED;
    }
    size_t total_local_space = return_size + args_array_size + saved_args_data_size;
    // The total stack allocation for the frame must be 16-byte aligned.
    if (total_local_space > INFIX_MAX_STACK_ALLOC) {
        *out_layout = nullptr;
        return INFIX_ERROR_LAYOUT_FAILED;
    }
    layout->total_stack_alloc = (total_local_space + 15) & ~15;
    // Local variables are accessed via positive offsets from the stack pointer (SP)
    // after the initial `sub sp, sp, #alloc` in the prologue.
    // The layout on our local stack will be: [ return_buffer | args_array | saved_args_data ]
    layout->return_buffer_offset = 0;
    layout->args_array_offset = layout->return_buffer_offset + (int32_t)return_size;
    layout->saved_args_offset = layout->args_array_offset + (int32_t)args_array_size;
    *out_layout = layout;
    return INFIX_SUCCESS;
}

infix/src/arch/aarch64/abi_arm64.c  view on Meta::CPAN

    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 5 (Reverse): Generates the epilogue for the reverse trampoline stub.
 * @details After the C dispatcher returns, this code retrieves the return value from the
 *          return buffer on the stub's local stack and places it into the correct native return
 *          registers (X0, X1, V0, etc.) as required by the AAPCS64. It then tears down the
 *          stack frame and returns control to the native caller.
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @param context The reverse context.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_epilogue_arm64(code_buffer * buf,
                                                    infix_reverse_call_frame_layout * layout,
                                                    infix_reverse_t * context) {
    bool ret_is_aggregate =
        (context->return_type->category == INFIX_TYPE_STRUCT || context->return_type->category == INFIX_TYPE_UNION ||
         context->return_type->category == INFIX_TYPE_ARRAY || context->return_type->category == INFIX_TYPE_COMPLEX);
    bool return_in_memory = ret_is_aggregate && context->return_type->size > 16;
    if (context->return_type->category != INFIX_TYPE_VOID && !return_in_memory) {
        const infix_type * base = nullptr;

        // Explicitly check for 128-bit types.
        // Note: On macOS ARM64, long double is 8 bytes, so is_long_double() is true but size is 8.
        // We only want the 128-bit load if the size matches.
        bool is_128bit = (context->return_type->size == 16);
        if (is_128bit && (is_long_double(context->return_type) || context->return_type->category == INFIX_TYPE_VECTOR))
            emit_arm64_ldr_q_imm(buf, V0_REG, SP_REG, layout->return_buffer_offset);
        else if (is_hfa(context->return_type, &base)) {
            size_t num_elements = context->return_type->size / base->size;
            for (size_t i = 0; i < num_elements; ++i) {
                emit_arm64_ldr_vpr(buf,
                                   base->size,
                                   VPR_ARGS[i],
                                   SP_REG,
                                   (int32_t)(layout->return_buffer_offset + i * base->size));  // Explicit cast
            }
        }
        else if (is_long_double(context->return_type) ||
                 (context->return_type->category == INFIX_TYPE_VECTOR && context->return_type->size == 16))
            emit_arm64_ldr_q_imm(buf, V0_REG, SP_REG, layout->return_buffer_offset);
        else if (is_float16(context->return_type))
            emit_arm64_ldr_vpr(buf, 2, V0_REG, SP_REG, layout->return_buffer_offset);
        else if (is_float(context->return_type) || is_double(context->return_type) ||
                 (is_long_double(context->return_type) && context->return_type->size == 8))
            emit_arm64_ldr_vpr(buf, context->return_type->size, V0_REG, SP_REG, layout->return_buffer_offset);
        else {
            // Integer, pointer, or small struct returned in GPRs.
            emit_arm64_ldr_imm(buf, true, X0_REG, SP_REG, layout->return_buffer_offset);
            if (context->return_type->size > 8)
                emit_arm64_ldr_imm(buf, true, X1_REG, SP_REG, layout->return_buffer_offset + 8);
        }
    }
    // Deallocate stack and restore frame.
    if (layout->total_stack_alloc > 0)
        // add sp, sp, #total_stack_alloc
        emit_arm64_add_imm(buf, true, false, SP_REG, SP_REG, (uint32_t)layout->total_stack_alloc);  // Cast size_t
    // Restore Frame Pointer and Link Register, then return.
    emit_arm64_ldp_post_index(
        buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16);  // ldp x29, x30, [sp], #16 (Load pair, post-indexed)
    emit_arm64_ret(buf, X30_LR_REG);                     // ret
    return INFIX_SUCCESS;
}

/**
 * @internal
 * @brief Stage 1 (Direct): Analyzes a signature and creates a call frame layout for AAPCS64.
 */
static infix_status prepare_direct_forward_call_frame_arm64(infix_arena_t * arena,
                                                            infix_direct_call_frame_layout ** out_layout,
                                                            infix_type * ret_type,
                                                            infix_type ** arg_types,
                                                            size_t num_args,
                                                            infix_direct_arg_handler_t * handlers,
                                                            void * target_fn) {
    // Reuse the standard classification logic.
    infix_call_frame_layout * standard_layout = nullptr;
    infix_status status =
        prepare_forward_call_frame_arm64(arena, &standard_layout, ret_type, arg_types, num_args, num_args, target_fn);
    if (status != INFIX_SUCCESS)
        return status;

    // Create the new direct layout and copy basic info.
    infix_direct_call_frame_layout * layout =
        infix_arena_calloc(arena, 1, sizeof(infix_direct_call_frame_layout), _Alignof(infix_direct_call_frame_layout));
    if (!layout)
        return INFIX_ERROR_ALLOCATION_FAILED;

    layout->args =
        infix_arena_calloc(arena, num_args, sizeof(infix_direct_arg_layout), _Alignof(infix_direct_arg_layout));
    if (!layout->args && num_args > 0)
        return INFIX_ERROR_ALLOCATION_FAILED;

    layout->num_args = num_args;
    layout->target_fn = target_fn;
    layout->return_value_in_memory = standard_layout->return_value_in_memory;

    // Calculate scratch space needed on the stack.
    // Note: We do NOT store the scratch offset in layout->args[i].location.stack_offset,
    // because that field is needed for the *outgoing* ABI stack offset.
    // Instead, we just calculate the total size here, and recalculate the offsets
    // sequentially during generation.
    size_t scratch_space_needed = 0;
    for (size_t i = 0; i < num_args; ++i) {
        layout->args[i].location = standard_layout->arg_locations[i];
        layout->args[i].type = arg_types[i];
        layout->args[i].handler = &handlers[i];

        if (handlers[i].aggregate_marshaller) {
            scratch_space_needed = _infix_align_up(scratch_space_needed, arg_types[i]->alignment);
            scratch_space_needed += arg_types[i]->size;
        }
        else if (handlers[i].scalar_marshaller) {
            // Scalars need scratch space to bounce X0 -> Stack -> V0
            scratch_space_needed = _infix_align_up(scratch_space_needed, 16);
            scratch_space_needed += 16;
        }
        else if (handlers[i].writeback_handler) {
            const infix_type * pointee = (arg_types[i]->category == INFIX_TYPE_POINTER)
                ? arg_types[i]->meta.pointer_info.pointee_type

infix/src/arch/aarch64/abi_arm64.c  view on Meta::CPAN

            align = arg_layout->type->alignment;
            needs_scratch = true;
        }
        else if (arg_layout->handler->scalar_marshaller) {
            size = 16;
            align = 16;
            needs_scratch = true;
        }
        else if (arg_layout->handler->writeback_handler) {
            const infix_type * pointee = (arg_layout->type->category == INFIX_TYPE_POINTER)
                ? arg_layout->type->meta.pointer_info.pointee_type
                : arg_layout->type;
            size = pointee->size;
            align = pointee->alignment;
            needs_scratch = true;
        }

        if (needs_scratch) {
            epilogue_scratch_offset = _infix_align_up(epilogue_scratch_offset, align);
            my_scratch_offset = (int32_t)(standard_alloc_size + epilogue_scratch_offset);
            epilogue_scratch_offset += size;
        }

        if (arg_layout->handler->writeback_handler) {
            // Save C return value (in X0/V0) before calling out.
            // Note: Technically should save more registers for HFA returns, but this matches basic needs.
            emit_arm64_sub_imm(buf, true, false, SP_REG, SP_REG, 32);
            emit_arm64_str_imm(buf, true, X0_REG, SP_REG, 0);
            emit_arm64_str_imm(buf, true, X1_REG, SP_REG, 8);
            emit_arm64_str_q_imm(buf, V0_REG, SP_REG, 16);  // Save V0 (covers float/double/vector)

            // Arg 1 (X0): Original language object pointer.
            emit_arm64_ldr_imm(buf, true, X0_REG, X21_REG, (int32_t)(i * sizeof(void *)));

            // Arg 2 (X1): Pointer to the C data.
            // Address = Current SP (which is Original SP - 32) + 32 + offset
            int32_t total_offset = 32 + my_scratch_offset;
            emit_arm64_add_imm(buf, true, false, X1_REG, SP_REG, total_offset);

            // Arg 3 (X2): The infix_type*.
            emit_arm64_load_u64_immediate(buf, X2_REG, (uint64_t)arg_layout->type);

            // Call the handler.
            emit_arm64_load_u64_immediate(buf, X10_REG, (uint64_t)arg_layout->handler->writeback_handler);
            emit_arm64_blr_reg(buf, X10_REG);

            // Restore C return value.
            emit_arm64_ldr_q_imm(buf, V0_REG, SP_REG, 16);
            emit_arm64_ldr_imm(buf, true, X1_REG, SP_REG, 8);
            emit_arm64_ldr_imm(buf, true, X0_REG, SP_REG, 0);
            emit_arm64_add_imm(buf, true, false, SP_REG, SP_REG, 32);
        }
    }

    // Standard Epilogue
    // Restore stack pointer to the saved registers area.
    // X29 was set to SP after all pushes.
    // mov sp, x29
    emit_arm64_mov_reg(buf, true, SP_REG, X29_FP_REG);

    emit_arm64_ldp_post_index(buf, true, X21_REG, X22_REG, SP_REG, 16);
    emit_arm64_ldp_post_index(buf, true, X19_REG, X20_REG, SP_REG, 16);
    emit_arm64_ldp_post_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16);
    emit_arm64_ret(buf, X30_LR_REG);

    return INFIX_SUCCESS;
}



( run in 0.641 second using v1.01-cache-2.11-cpan-5735350b133 )