Affix

 view release on metacpan or  search on metacpan

infix/src/arch/aarch64/abi_arm64.c  view on Meta::CPAN

    // Recursive step for structs: check every member.
    if (type->category == INFIX_TYPE_STRUCT) {
        if (type->meta.aggregate_info.num_members == 0)
            return false;
        for (size_t i = 0; i < type->meta.aggregate_info.num_members; ++i)
            if (!is_hfa_recursive_check(type->meta.aggregate_info.members[i].type, base_type, field_count))
                return false;
        return true;
    }
    // If it's not a float, complex, array, or struct, it cannot be part of an HFA.
    return false;
}
/**
 * @internal
 * @brief Determines if a type is a Homogeneous Floating-point Aggregate (HFA).
 * @details An HFA is a struct or array containing 1 to 4 elements of the same, single
 *          floating-point type (`float` or `double`), including in nested aggregates.
 *
 * @param type The `infix_type` to check.
 * @param[out] out_base_type If the type is an HFA, this is set to its base `float` or `double` type.
 * @return `true` if the type is a valid HFA, `false` otherwise.
 */
static bool is_hfa(const infix_type * type, const infix_type ** out_base_type) {
    if (type->category != INFIX_TYPE_STRUCT && type->category != INFIX_TYPE_ARRAY &&
        type->category != INFIX_TYPE_COMPLEX)
        return false;
    // HFAs cannot be excessively large.
    if (type->size == 0 || type->size > 64)  // Max HFA size is 4 * sizeof(double) = 32 on standard, 4*16=64 on others
        return false;
    // Find the base float/double type of the first primitive element.
    const infix_type * base = get_hfa_base_type(type);
    if (base == nullptr)
        return false;
    // Check that the total size is a multiple of the base type, with 1 to 4 elements.
    size_t num_elements = type->size / base->size;
    if (num_elements < 1 || num_elements > 4 || type->size != num_elements * base->size)
        return false;
    // Verify that ALL members recursively conform to this single base type.
    size_t field_count = 0;
    if (!is_hfa_recursive_check(type, base, &field_count))
        return false;
    if (out_base_type)
        *out_base_type = base;
    return true;
}
/**
 * @internal
 * @brief Stage 1 (Forward): Analyzes a signature and creates a call frame layout for AAPCS64.
 * @details This function assigns each argument to a location (GPR, VPR, or Stack) according
 *          to the AAPCS64 rules. It contains extensive conditional logic to handle ABI
 *          deviations on Apple and Windows platforms, especially for variadic arguments
 *          and 16-byte aggregate alignment.
 *
 * @param arena The temporary arena for allocations.
 * @param out_layout Receives the created layout blueprint.
 * @param ret_type The function's return type.
 * @param arg_types Array of argument types.
 * @param num_args Total number of arguments.
 * @param num_fixed_args Number of non-variadic arguments.
 * @param target_fn The target function address.
 * @return `INFIX_SUCCESS` on success, or an error code on failure.
 */
static infix_status prepare_forward_call_frame_arm64(infix_arena_t * arena,
                                                     infix_call_frame_layout ** out_layout,
                                                     infix_type * ret_type,
                                                     infix_type ** arg_types,
                                                     size_t num_args,
                                                     size_t num_fixed_args,
                                                     void * target_fn) {
    if (out_layout == nullptr)
        return INFIX_ERROR_INVALID_ARGUMENT;
    infix_call_frame_layout * layout =
        infix_arena_calloc(arena, 1, sizeof(infix_call_frame_layout), _Alignof(infix_call_frame_layout));
    if (layout == nullptr) {
        *out_layout = nullptr;
        return INFIX_ERROR_ALLOCATION_FAILED;
    }
    layout->arg_locations =
        infix_arena_calloc(arena, num_args, sizeof(infix_arg_location), _Alignof(infix_arg_location));
    if (layout->arg_locations == nullptr && num_args > 0) {
        *out_layout = nullptr;
        return INFIX_ERROR_ALLOCATION_FAILED;
    }
    size_t gpr_count = 0, vpr_count = 0, stack_offset = 0;
    layout->is_variadic = (num_fixed_args < num_args);
    layout->target_fn = target_fn;
    layout->num_args = num_args;
    layout->num_stack_args = 0;
    // Determine if the return value is passed by reference (via hidden pointer in X8).
    // This is true for aggregates larger than 16 bytes.
    bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
                             ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
    layout->return_value_in_memory = (ret_is_aggregate && ret_type->size > 16);
    // Main Argument Classification Loop
    for (size_t i = 0; i < num_args; ++i) {
        infix_type * type = arg_types[i];
        // Security: Reject excessively large types.
        if (type->size > INFIX_MAX_ARG_SIZE) {
            *out_layout = nullptr;
            return INFIX_ERROR_LAYOUT_FAILED;
        }
        bool placed_in_register = false;
        c23_maybe_unused bool is_variadic_arg = (i >= num_fixed_args);

        // Arrays decay to pointers. Always treat as a GPR argument (8 bytes).
        if (type->category == INFIX_TYPE_ARRAY) {
            if (gpr_count < NUM_GPR_ARGS) {
                layout->arg_locations[i].type = ARG_LOCATION_GPR;
                layout->arg_locations[i].reg_index = (uint8_t)gpr_count++;
                placed_in_register = true;
            }
            else {
                layout->arg_locations[i].type = ARG_LOCATION_STACK;
                layout->arg_locations[i].stack_offset = (uint32_t)stack_offset;
                stack_offset += 8;
                layout->num_stack_args++;
                placed_in_register = true;
            }
            continue;
        }

infix/src/arch/aarch64/abi_arm64.c  view on Meta::CPAN

                                   hfa_base->size,
                                   VPR_ARGS[i],
                                   X20_REG,
                                   (int32_t)(i * hfa_base->size));  // Explicit cast
        }
        else if (is_float16(ret_type))
            emit_arm64_str_vpr(buf, 2, V0_REG, X20_REG, 0);  // str h0, [x20]
        else if (is_float(ret_type))
            emit_arm64_str_vpr(buf, 4, V0_REG, X20_REG, 0);  // str s0, [x20]
        // Handle standard double OR 8-byte long double (macOS)
        else if (is_double(ret_type) || (is_long_double(ret_type) && ret_type->size == 8))
            emit_arm64_str_vpr(buf, 8, V0_REG, X20_REG, 0);  // str d0, [x20]
        else {
            // Integer, pointer, or small aggregate return.
            switch (ret_type->size) {
            case 1:
                emit_arm64_strb_imm(buf, X0_REG, X20_REG, 0);
                break;
            case 2:
                emit_arm64_strh_imm(buf, X0_REG, X20_REG, 0);
                break;
            case 4:
                emit_arm64_str_imm(buf, false, X0_REG, X20_REG, 0);
                break;
            case 8:
                emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
                break;
            case 16:  // For __int128_t or small structs
                emit_arm64_str_imm(buf, true, X0_REG, X20_REG, 0);
                emit_arm64_str_imm(buf, true, X1_REG, X20_REG, 8);
                break;
            default:
                break;
            }
        }
    }
    // Deallocate stack space and restore registers.
    // X29 was set to SP after all pushes.
    // mov sp, x29
    emit_arm64_mov_reg(buf, true, SP_REG, X29_FP_REG);

    emit_arm64_ldp_post_index(buf, true, X21_REG, X22_REG, SP_REG, 16);        // ldp x21, x22, [sp], #16
    emit_arm64_ldp_post_index(buf, true, X19_REG, X20_REG, SP_REG, 16);        // ldp x19, x20, [sp], #16
    emit_arm64_ldp_post_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, 16);  // ldp x29, x30, [sp], #16
    emit_arm64_ret(buf, X30_LR_REG);                                           // ret
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 1 (Reverse): Calculates the stack layout for a reverse trampoline stub.
 * @details This function determines the total stack space the JIT-compiled stub will need
 *          for its local variables. This space includes:
 *          1. A buffer to store the return value before it's placed in registers.
 *          2. An array of `void*` pointers (`args_array`) to pass to the C dispatcher.
 *          3. A contiguous data area where the contents of all incoming arguments
 *             (from registers or the caller's stack) will be saved.
 *
 * @param arena The temporary arena for allocations.
 * @param[out] out_layout The resulting reverse call frame layout blueprint, populated with offsets.
 * @param context The reverse trampoline context with full signature information.
 * @return `INFIX_SUCCESS` on success, or an error code on failure.
 */
static infix_status prepare_reverse_call_frame_arm64(infix_arena_t * arena,
                                                     infix_reverse_call_frame_layout ** out_layout,
                                                     infix_reverse_t * context) {
    infix_reverse_call_frame_layout * layout = infix_arena_calloc(
        arena, 1, sizeof(infix_reverse_call_frame_layout), _Alignof(infix_reverse_call_frame_layout));
    if (!layout)
        return INFIX_ERROR_ALLOCATION_FAILED;
    // The return buffer must be large enough and aligned for any type.
    size_t return_size = (context->return_type->size + 15) & ~15;
    // The array of pointers that will be passed to the C dispatcher.
    size_t args_array_size = (context->num_args * sizeof(void *) + 15) & ~15;
    // The contiguous block where we will save the actual argument data.
    size_t saved_args_data_size = 0;
    for (size_t i = 0; i < context->num_args; ++i) {
        if (context->arg_types[i]->size > INFIX_MAX_ARG_SIZE) {
            *out_layout = nullptr;
            return INFIX_ERROR_LAYOUT_FAILED;
        }
        // Ensure each saved argument slot is 16-byte aligned for simplicity and correctness.
        saved_args_data_size += (context->arg_types[i]->size + 15) & ~15;
    }
    // Security check against excessively large aggregate argument data size.
    if (saved_args_data_size > INFIX_MAX_ARG_SIZE) {
        *out_layout = nullptr;
        return INFIX_ERROR_LAYOUT_FAILED;
    }
    size_t total_local_space = return_size + args_array_size + saved_args_data_size;
    // The total stack allocation for the frame must be 16-byte aligned.
    if (total_local_space > INFIX_MAX_STACK_ALLOC) {
        *out_layout = nullptr;
        return INFIX_ERROR_LAYOUT_FAILED;
    }
    layout->total_stack_alloc = (total_local_space + 15) & ~15;
    // Local variables are accessed via positive offsets from the stack pointer (SP)
    // after the initial `sub sp, sp, #alloc` in the prologue.
    // The layout on our local stack will be: [ return_buffer | args_array | saved_args_data ]
    layout->return_buffer_offset = 0;
    layout->args_array_offset = layout->return_buffer_offset + (int32_t)return_size;
    layout->saved_args_offset = layout->args_array_offset + (int32_t)args_array_size;
    *out_layout = layout;
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 2 (Reverse): Generates the prologue for the reverse trampoline stub.
 * @details This function emits the standard AArch64 function entry code. It saves the
 *          caller's frame pointer (X29) and the link register (X30, the return address)
 *          to the stack, establishes a new frame by pointing X29 to the current stack
 *          pointer, and allocates the pre-calculated stack space for local variables.
 *
 * @param buf The code buffer to write to.
 * @param layout The blueprint containing the total stack space to allocate.
 * @return `INFIX_SUCCESS` on success.
 */
static infix_status generate_reverse_prologue_arm64(code_buffer * buf, infix_reverse_call_frame_layout * layout) {
    // `stp x29, x30, [sp, #-16]!` : Save Frame Pointer and Link Register, pre-decrementing SP.
    emit_arm64_stp_pre_index(buf, true, X29_FP_REG, X30_LR_REG, SP_REG, -16);
    // `mov x29, sp` : Establish the new frame pointer.
    emit_arm64_mov_reg(buf, true, X29_FP_REG, SP_REG);



( run in 1.965 second using v1.01-cache-2.11-cpan-39bf76dae61 )