Affix

 view release on metacpan or  search on metacpan

infix/src/arch/x64/abi_sysv_x64.c  view on Meta::CPAN

    .prepare_forward_call_frame = prepare_forward_call_frame_sysv_x64,
    .generate_forward_prologue = generate_forward_prologue_sysv_x64,
    .generate_forward_argument_moves = generate_forward_argument_moves_sysv_x64,
    .generate_forward_call_instruction = generate_forward_call_instruction_sysv_x64,
    .generate_forward_epilogue = generate_forward_epilogue_sysv_x64};

/** The v-table of System V x64 functions for generating reverse trampolines. */
static infix_status prepare_reverse_call_frame_sysv_x64(infix_arena_t * arena,
                                                        infix_reverse_call_frame_layout ** out_layout,
                                                        infix_reverse_t * context);
static infix_status generate_reverse_prologue_sysv_x64(code_buffer * buf, infix_reverse_call_frame_layout * layout);
static infix_status generate_reverse_argument_marshalling_sysv_x64(code_buffer * buf,
                                                                   infix_reverse_call_frame_layout * layout,
                                                                   infix_reverse_t * context);
static infix_status generate_reverse_dispatcher_call_sysv_x64(code_buffer * buf,
                                                              infix_reverse_call_frame_layout * layout,
                                                              infix_reverse_t * context);
static infix_status generate_reverse_epilogue_sysv_x64(code_buffer * buf,
                                                       infix_reverse_call_frame_layout * layout,
                                                       infix_reverse_t * context);
const infix_reverse_abi_spec g_sysv_x64_reverse_spec = {
    .prepare_reverse_call_frame = prepare_reverse_call_frame_sysv_x64,
    .generate_reverse_prologue = generate_reverse_prologue_sysv_x64,
    .generate_reverse_argument_marshalling = generate_reverse_argument_marshalling_sysv_x64,
    .generate_reverse_dispatcher_call = generate_reverse_dispatcher_call_sysv_x64,
    .generate_reverse_epilogue = generate_reverse_epilogue_sysv_x64};

/** The v-table for the new Direct Marshalling ABI. */
static infix_status prepare_direct_forward_call_frame_sysv_x64(infix_arena_t * arena,
                                                               infix_direct_call_frame_layout ** out_layout,
                                                               infix_type * ret_type,
                                                               infix_type ** arg_types,
                                                               size_t num_args,
                                                               infix_direct_arg_handler_t * handlers,
                                                               void * target_fn);
static infix_status generate_direct_forward_prologue_sysv_x64(code_buffer * buf,
                                                              infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_argument_moves_sysv_x64(code_buffer * buf,
                                                                    infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_call_instruction_sysv_x64(code_buffer * buf,
                                                                      infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_epilogue_sysv_x64(code_buffer * buf,
                                                              infix_direct_call_frame_layout * layout,
                                                              infix_type * ret_type);
const infix_direct_forward_abi_spec g_sysv_x64_direct_forward_spec = {
    .prepare_direct_forward_call_frame = prepare_direct_forward_call_frame_sysv_x64,
    .generate_direct_forward_prologue = generate_direct_forward_prologue_sysv_x64,
    .generate_direct_forward_argument_moves = generate_direct_forward_argument_moves_sysv_x64,
    .generate_direct_forward_call_instruction = generate_direct_forward_call_instruction_sysv_x64,
    .generate_direct_forward_epilogue = generate_direct_forward_epilogue_sysv_x64};

/**
 * @internal
 * @brief Recursively classifies the eightbytes of an aggregate type.
 * @details This is the core of the complex System V classification algorithm. It traverses
 * the fields of a struct/array, examining each 8-byte chunk ("eightbyte") and assigning it a
 * class (INTEGER, SSE, MEMORY). The classification is "merged" according to ABI rules
 * (e.g., if an eightbyte contains both INTEGER and SSE parts, it becomes INTEGER).
 *
 * @param type The type of the current member/element being examined.
 * @param offset The byte offset of this member from the start of the aggregate.
 * @param[in,out] classes An array of two `arg_class_t` that is updated during classification.
 * @param depth The current recursion depth (to prevent stack overflow on malicious input).
 * @param field_count A counter to prevent DoS from excessively complex types.
 * @param is_bitfield True if the current member is a bitfield.
 * @return `true` if a condition forcing MEMORY classification is found, `false` otherwise.
 */
static bool classify_recursive(
    const infix_type * type, size_t offset, arg_class_t classes[2], int depth, size_t * field_count, bool is_bitfield) {
    // A recursive call can be made with a NULL type (e.g., from a malformed array from fuzzer).
    if (type == nullptr)
        return false;  // Terminate recusion path.
    // Abort classification if the type is excessively complex or too deep. Give up and pass in memory.
    if (*field_count > MAX_AGGREGATE_FIELDS_TO_CLASSIFY || depth > MAX_CLASSIFY_DEPTH) {
        classes[0] = MEMORY;
        return true;
    }
    // The ABI requires natural alignment for standard members.
    // Bitfields are an exception: they are allowed to be unaligned relative to their
    // base type's alignment, as long as they stay within their storage unit.
    if (!is_bitfield && type->alignment != 0 && offset % type->alignment != 0) {
        classes[0] = MEMORY;
        return true;
    }
    // If a struct is packed, its layout is explicit and should not be inferred
    // by recursive classification. Treat it as an opaque block of memory.
    // For classification purposes, this is equivalent to an integer array.
    if (type->category == INFIX_TYPE_PRIMITIVE) {
        (*field_count)++;
        // `long double` is a special case. It is passed in memory on the stack, not x87 registers.
        if (is_long_double(type)) {
            classes[0] = MEMORY;
            return true;
        }
        // Consider all eightbytes that the primitive occupies, not just the starting offset.
        size_t start_offset = offset;
        // Check for overflow before calculating end_offset
        if (type->size == 0)
            return false;
        if (start_offset > SIZE_MAX - (type->size - 1)) {
            classes[0] = MEMORY;
            return true;
        }
        size_t end_offset = start_offset + type->size - 1;
        size_t start_eightbyte = start_offset / 8;
        size_t end_eightbyte = end_offset / 8;
        arg_class_t new_class = (is_float16(type) || is_float(type) || is_double(type)) ? SSE : INTEGER;
        for (size_t index = start_eightbyte; index <= end_eightbyte && index < 2; ++index) {
            // Merge the new class with the existing class for this eightbyte.
            // The rule is: if an eightbyte contains both SSE and INTEGER parts, it is classified as INTEGER.
            if (classes[index] != new_class)
                classes[index] = (classes[index] == NO_CLASS) ? new_class : INTEGER;
        }
        return false;
    }
    if (type->category == INFIX_TYPE_POINTER) {
        (*field_count)++;
        size_t index = offset / 8;
        if (index < 2 && classes[index] != INTEGER)
            classes[index] = INTEGER;  // Pointers are always INTEGER class. Merge with existing class.
        return false;
    }
    if (type->category == INFIX_TYPE_ARRAY) {
        if (type->meta.array_info.element_type == nullptr)
            return false;
        // If the array elements have no size, iterating over them is pointless
        // and can cause a timeout if num_elements is large, as the offset never advances.
        // We only need to classify the element type once at the starting offset.
        if (type->meta.array_info.element_type->size == 0) {
            if (type->meta.array_info.num_elements > 0)
                // Classify the zero-sized element just once.
                return classify_recursive(
                    type->meta.array_info.element_type, offset, classes, depth + 1, field_count, false);
            return false;  // An empty array of zero-sized structs has no effect on classification.
        }
        for (size_t i = 0; i < type->meta.array_info.num_elements; ++i) {
            // Check count *before* each recursive call inside the loop.
            if (*field_count > MAX_AGGREGATE_FIELDS_TO_CLASSIFY) {
                classes[0] = MEMORY;
                return true;
            }
            size_t element_offset = offset + i * type->meta.array_info.element_type->size;
            // If we are already past the 16-byte boundary relevant for
            // register passing, there is no need to classify further. This prunes
            // the recursion tree for large arrays.
            if (element_offset >= 16)
                break;
            if (classify_recursive(
                    type->meta.array_info.element_type, element_offset, classes, depth + 1, field_count, false))
                return true;  // Propagate unaligned discovery up the call stack
        }
        return false;
    }
    if (type->category == INFIX_TYPE_COMPLEX) {
        infix_type * base = type->meta.complex_info.base_type;
        // A zero-sized base type would cause infinite recursion.
        // Treat this as a malformed type and stop classification.
        if (base == nullptr || base->size == 0)
            return false;
        // A complex number is just like a struct { base_type real; base_type imag; }
        // So we classify the first element at offset 0.
        if (classify_recursive(base, offset, classes, depth + 1, field_count, false))
            return true;  // Propagate unaligned discovery
        // And the second element at offset + size of the base.
        if (classify_recursive(base, offset + base->size, classes, depth + 1, field_count, false))
            return true;  // Propagate unaligned discovery
        return false;
    }
    if (type->category == INFIX_TYPE_VECTOR) {
        (*field_count)++;
        size_t num_eightbytes = (type->size + 7) / 8;
        for (size_t i = 0; i < num_eightbytes && (offset / 8 + i) < 2; ++i) {
            // Merging rule: if an eightbyte contains both SSE and INTEGER parts, it is INTEGER.
            // If it's NO_CLASS, it becomes SSE.
            if (classes[offset / 8 + i] == NO_CLASS)
                classes[offset / 8 + i] = SSE;
        }
        return false;
    }
    if (type->category == INFIX_TYPE_STRUCT || type->category == INFIX_TYPE_UNION) {
        // A generated type can have num_members > 0 but a NULL members pointer.
        // This is invalid and must be passed in memory.
        if (type->meta.aggregate_info.members == nullptr) {
            classes[0] = MEMORY;
            return true;
        }
        // Recursively classify each member of the struct/union.
        for (size_t i = 0; i < type->meta.aggregate_info.num_members; ++i) {
            // Check count *before* each recursive call inside the loop.
            if (*field_count > MAX_AGGREGATE_FIELDS_TO_CLASSIFY) {
                classes[0] = MEMORY;
                return true;
            }
            infix_struct_member * member = &type->meta.aggregate_info.members[i];
            // A generated type can have a NULL member type.
            // This is invalid, and the aggregate must be passed in memory.
            if (member->type == nullptr) {
                classes[0] = MEMORY;
                return true;
            }
            size_t member_offset = offset + member->offset;
            // If this member starts at or after the 16-byte boundary,
            // it cannot influence register classification, so we can skip it.
            if (member_offset >= 16)
                continue;
            if (classify_recursive(member->type, member_offset, classes, depth + 1, field_count, member->is_bitfield))
                return true;  // Propagate unaligned discovery
        }
        return false;
    }
    return false;
}
/**
 * @internal
 * @brief Classifies an aggregate type for argument passing according to the System V ABI.
 * @details This function implements the complete classification algorithm. An aggregate
 *          is broken down into up to two "eightbytes". Each is classified as INTEGER,
 *          SSE, or MEMORY. If the size is > 16 bytes or classification fails, it's MEMORY.
 *
 * @param type The aggregate type to classify.
 * @param[out] classes An array of two `arg_class_t` to be filled.
 * @param[out] num_classes The number of valid classes (1 or 2).
 */
static void classify_aggregate_sysv(const infix_type * type, arg_class_t classes[2], size_t * num_classes) {
    // Initialize to a clean state.
    classes[0] = NO_CLASS;
    classes[1] = NO_CLASS;
    *num_classes = 0;
    // If the size is greater than 16 bytes, it's passed in memory.
    if (type->size > 16) {
        classes[0] = MEMORY;
        *num_classes = 1;
        return;
    }
    // Run the recursive classification. If it returns true, an unaligned
    // field was found, and the class is already set to MEMORY. We can stop.
    size_t field_count = 0;                                              // Initialize the counter for this aggregate.
    if (classify_recursive(type, 0, classes, 0, &field_count, false)) {  // Pass counter to initial call
        *num_classes = 1;
        return;
    }
    // Post-processing for alignment padding.
    if (type->size > 0 && classes[0] == NO_CLASS)
        classes[0] = INTEGER;
    if (type->size > 8 && classes[1] == NO_CLASS)
        classes[1] = INTEGER;
    // Count the number of valid, classified eightbytes.
    if (classes[0] != NO_CLASS)
        (*num_classes)++;
    if (classes[1] != NO_CLASS)
        (*num_classes)++;
}
/**
 * @internal
 * @brief Stage 1 (Forward): Analyzes a signature and creates a call frame layout for System V.
 * @details This function iterates through a function's arguments, classifying each one
 *          to determine its location (GPR, XMM, or stack) according to the SysV ABI rules.
 * @param arena The temporary arena for allocations.
 * @param out_layout Receives the created layout blueprint.
 * @param ret_type The function's return type.
 * @param arg_types Array of argument types.
 * @param num_args Total number of arguments.

infix/src/arch/x64/abi_sysv_x64.c  view on Meta::CPAN

    emit_pop_reg(buf, R12_REG);
    emit_pop_reg(buf, RBP_REG);
    emit_ret(buf);
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 1 (Reverse): Calculates the stack layout for a reverse trampoline stub.
 * @details This function determines the total stack space needed for the stub's local variables,
 *          including the return buffer, the `void**` args_array, and the saved argument data.
 * @param arena The temporary arena for allocations.
 * @param[out] out_layout The resulting reverse call frame layout blueprint.
 * @param context The reverse trampoline context.
 * @return `INFIX_SUCCESS` on success.
 */
static infix_status prepare_reverse_call_frame_sysv_x64(infix_arena_t * arena,
                                                        infix_reverse_call_frame_layout ** out_layout,
                                                        infix_reverse_t * context) {
    infix_reverse_call_frame_layout * layout = infix_arena_calloc(
        arena, 1, sizeof(infix_reverse_call_frame_layout), _Alignof(infix_reverse_call_frame_layout));
    if (!layout)
        return INFIX_ERROR_ALLOCATION_FAILED;
    // Calculate space for each component, ensuring 16-byte alignment for safety and simplicity.
    size_t return_size = (context->return_type->size + 15) & ~15;
    size_t args_array_size = (context->num_args * sizeof(void *) + 15) & ~15;
    size_t saved_args_data_size = 0;
    size_t max_align = 16;  // Start with 16 for stack safety
    for (size_t i = 0; i < context->num_args; ++i) {
        // Security: Reject excessively large types before they reach the code generator.
        if (context->arg_types[i]->size > INFIX_MAX_ARG_SIZE) {
            *out_layout = nullptr;
            return INFIX_ERROR_LAYOUT_FAILED;
        }
        size_t align = context->arg_types[i]->alignment;
        if (align < 8)
            align = 8;
        if (align > max_align)
            max_align = align;

        saved_args_data_size = _infix_align_up(saved_args_data_size, align);
        saved_args_data_size += context->arg_types[i]->size;
    }
    if (saved_args_data_size > INFIX_MAX_ARG_SIZE) {
        *out_layout = nullptr;
        return INFIX_ERROR_LAYOUT_FAILED;
    }
    size_t total_local_space = return_size + args_array_size + saved_args_data_size + max_align;
    // Safety check against allocating too much stack.
    if (total_local_space > INFIX_MAX_STACK_ALLOC) {
        *out_layout = nullptr;
        return INFIX_ERROR_LAYOUT_FAILED;
    }
    // The total allocation for the stack frame must be aligned to the maximum required alignment.
    layout->total_stack_alloc = (uint32_t)_infix_align_up(total_local_space, max_align);

    // Local variables are accessed via negative offsets from the frame pointer (RBP).
    // The layout is [ return_buffer | args_array | (padding) | saved_args_data ]
    layout->return_buffer_offset = -(int32_t)layout->total_stack_alloc;
    layout->args_array_offset = layout->return_buffer_offset + (int32_t)return_size;

    // Align the start of the saved data area
    layout->saved_args_offset =
        (int32_t)_infix_align_up((size_t)(layout->args_array_offset + args_array_size), max_align);
    layout->max_align = (uint32_t)max_align;

    *out_layout = layout;
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 2 (Reverse): Generates the prologue for the reverse trampoline stub.
 * @details Emits standard System V function entry code, creates a stack frame,
 *          and allocates all necessary local stack space.
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_prologue_sysv_x64(code_buffer * buf, infix_reverse_call_frame_layout * layout) {
    emit_push_reg(buf, RBP_REG);              // push rbp
    emit_mov_reg_reg(buf, RBP_REG, RSP_REG);  // mov rbp, rsp

    // FORCE ALIGNMENT.
    // AND RSP, -max_align
    emit_and_reg_imm8(buf, RSP_REG, (int8_t)-(int8_t)layout->max_align);

    emit_sub_reg_imm32(buf, RSP_REG, layout->total_stack_alloc);  // Allocate our calculated space.
    return INFIX_SUCCESS;
}
/**
 * @internal
 * @brief Stage 3 (Reverse): Generates code to marshal arguments from their native
 *          locations into the generic `void**` array for the C dispatcher.
 * @param buf The code buffer.
 * @param layout The layout blueprint.
 * @param context The reverse trampoline context.
 * @return `INFIX_SUCCESS`.
 */
static infix_status generate_reverse_argument_marshalling_sysv_x64(code_buffer * buf,
                                                                   infix_reverse_call_frame_layout * layout,
                                                                   infix_reverse_t * context) {
    size_t gpr_idx = 0, xmm_idx = 0, current_saved_data_offset = 0;
    // Correctly determine if the return value uses a hidden pointer by performing a full ABI classification.
    bool return_in_memory = false;
    infix_type * ret_type = context->return_type;
    bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
                             ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
    if (ret_is_aggregate) {
        if (ret_type->size > 16)
            return_in_memory = true;
        else {
            arg_class_t ret_classes[2];
            size_t num_ret_classes;
            classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
            if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
                return_in_memory = true;
        }
    }
    // The long double primitive is a special case that does not use the hidden pointer.
    if (is_long_double(ret_type))
        return_in_memory = false;
    // If the return value is passed by reference, save the pointer from RDI.
    if (return_in_memory)
        emit_mov_mem_reg(buf, RBP_REG, layout->return_buffer_offset, GPR_ARGS[gpr_idx++]);  // mov [rbp + offset], rdi
    // Stack arguments passed by the caller start at [rbp + 16].
    size_t stack_arg_offset = 16;
    for (size_t i = 0; i < context->num_args; i++) {
        infix_type * current_type = context->arg_types[i];
        current_saved_data_offset = _infix_align_up(current_saved_data_offset, current_type->alignment);
        int32_t arg_save_loc = layout->saved_args_offset + current_saved_data_offset;

        // Correct classification logic for vectors/primitives vs aggregates
        arg_class_t classes[2] = {NO_CLASS, NO_CLASS};
        size_t num_classes = 0;
        bool is_aggregate =
            (current_type->category == INFIX_TYPE_STRUCT || current_type->category == INFIX_TYPE_UNION ||
             current_type->category == INFIX_TYPE_ARRAY || current_type->category == INFIX_TYPE_COMPLEX);

        if (is_aggregate) {
            classify_aggregate_sysv(current_type, classes, &num_classes);
        }
        else if (is_float16(current_type) || is_float(current_type) || is_double(current_type) ||
                 current_type->category == INFIX_TYPE_VECTOR) {
            classes[0] = SSE;
            num_classes = 1;
        }
        else {
            classes[0] = INTEGER;
            num_classes = 1;
            if (current_type->size > 8) {
                classes[1] = INTEGER;
                num_classes = 2;
            }
        }

        bool is_from_stack = false;
        // Determine if the argument is in registers or on the stack.
        if (classes[0] == MEMORY)
            is_from_stack = true;
        else if (num_classes == 1) {
            if (classes[0] == SSE)
                if (xmm_idx < NUM_XMM_ARGS) {
                    // Use appropriate width move for vectors to prevent truncation
                    if (current_type->category == INFIX_TYPE_VECTOR) {
                        if (current_type->size == 64)
                            emit_vmovupd_mem_zmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                        else if (current_type->size == 32)
                            emit_vmovupd_mem_ymm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                        else if (current_type->size == 16)
                            emit_movups_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                        else  // size 8 (or other small vector)
                            emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                    }
                    else if (is_float16(current_type)) {
                        // movd eax, xmm_reg ; mov [rbp + arg_save_loc], ax
                        emit_movq_gpr_xmm(buf, RAX_REG, XMM_ARGS[xmm_idx++]);
                        emit_mov_mem_reg16(buf, RBP_REG, arg_save_loc, RAX_REG);
                    }
                    else if (is_float(current_type))
                        emit_movss_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                    else
                        emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
                }
                else
                    is_from_stack = true;



( run in 0.802 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )