Affix
view release on metacpan or search on metacpan
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
.prepare_forward_call_frame = prepare_forward_call_frame_sysv_x64,
.generate_forward_prologue = generate_forward_prologue_sysv_x64,
.generate_forward_argument_moves = generate_forward_argument_moves_sysv_x64,
.generate_forward_call_instruction = generate_forward_call_instruction_sysv_x64,
.generate_forward_epilogue = generate_forward_epilogue_sysv_x64};
/** The v-table of System V x64 functions for generating reverse trampolines. */
static infix_status prepare_reverse_call_frame_sysv_x64(infix_arena_t * arena,
infix_reverse_call_frame_layout ** out_layout,
infix_reverse_t * context);
static infix_status generate_reverse_prologue_sysv_x64(code_buffer * buf, infix_reverse_call_frame_layout * layout);
static infix_status generate_reverse_argument_marshalling_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context);
static infix_status generate_reverse_dispatcher_call_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context);
static infix_status generate_reverse_epilogue_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context);
const infix_reverse_abi_spec g_sysv_x64_reverse_spec = {
.prepare_reverse_call_frame = prepare_reverse_call_frame_sysv_x64,
.generate_reverse_prologue = generate_reverse_prologue_sysv_x64,
.generate_reverse_argument_marshalling = generate_reverse_argument_marshalling_sysv_x64,
.generate_reverse_dispatcher_call = generate_reverse_dispatcher_call_sysv_x64,
.generate_reverse_epilogue = generate_reverse_epilogue_sysv_x64};
/** The v-table for the new Direct Marshalling ABI. */
static infix_status prepare_direct_forward_call_frame_sysv_x64(infix_arena_t * arena,
infix_direct_call_frame_layout ** out_layout,
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
infix_direct_arg_handler_t * handlers,
void * target_fn);
static infix_status generate_direct_forward_prologue_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_argument_moves_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_call_instruction_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_epilogue_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout,
infix_type * ret_type);
const infix_direct_forward_abi_spec g_sysv_x64_direct_forward_spec = {
.prepare_direct_forward_call_frame = prepare_direct_forward_call_frame_sysv_x64,
.generate_direct_forward_prologue = generate_direct_forward_prologue_sysv_x64,
.generate_direct_forward_argument_moves = generate_direct_forward_argument_moves_sysv_x64,
.generate_direct_forward_call_instruction = generate_direct_forward_call_instruction_sysv_x64,
.generate_direct_forward_epilogue = generate_direct_forward_epilogue_sysv_x64};
/**
* @internal
* @brief Recursively classifies the eightbytes of an aggregate type.
* @details This is the core of the complex System V classification algorithm. It traverses
* the fields of a struct/array, examining each 8-byte chunk ("eightbyte") and assigning it a
* class (INTEGER, SSE, MEMORY). The classification is "merged" according to ABI rules
* (e.g., if an eightbyte contains both INTEGER and SSE parts, it becomes INTEGER).
*
* @param type The type of the current member/element being examined.
* @param offset The byte offset of this member from the start of the aggregate.
* @param[in,out] classes An array of two `arg_class_t` that is updated during classification.
* @param depth The current recursion depth (to prevent stack overflow on malicious input).
* @param field_count A counter to prevent DoS from excessively complex types.
* @param is_bitfield True if the current member is a bitfield.
* @return `true` if a condition forcing MEMORY classification is found, `false` otherwise.
*/
static bool classify_recursive(
const infix_type * type, size_t offset, arg_class_t classes[2], int depth, size_t * field_count, bool is_bitfield) {
// A recursive call can be made with a NULL type (e.g., from a malformed array from fuzzer).
if (type == nullptr)
return false; // Terminate recusion path.
// Abort classification if the type is excessively complex or too deep. Give up and pass in memory.
if (*field_count > MAX_AGGREGATE_FIELDS_TO_CLASSIFY || depth > MAX_CLASSIFY_DEPTH) {
classes[0] = MEMORY;
return true;
}
// The ABI requires natural alignment for standard members.
// Bitfields are an exception: they are allowed to be unaligned relative to their
// base type's alignment, as long as they stay within their storage unit.
if (!is_bitfield && type->alignment != 0 && offset % type->alignment != 0) {
classes[0] = MEMORY;
return true;
}
// If a struct is packed, its layout is explicit and should not be inferred
// by recursive classification. Treat it as an opaque block of memory.
// For classification purposes, this is equivalent to an integer array.
if (type->category == INFIX_TYPE_PRIMITIVE) {
(*field_count)++;
// `long double` is a special case. It is passed in memory on the stack, not x87 registers.
if (is_long_double(type)) {
classes[0] = MEMORY;
return true;
}
// Consider all eightbytes that the primitive occupies, not just the starting offset.
size_t start_offset = offset;
// Check for overflow before calculating end_offset
if (type->size == 0)
return false;
if (start_offset > SIZE_MAX - (type->size - 1)) {
classes[0] = MEMORY;
return true;
}
size_t end_offset = start_offset + type->size - 1;
size_t start_eightbyte = start_offset / 8;
size_t end_eightbyte = end_offset / 8;
arg_class_t new_class = (is_float16(type) || is_float(type) || is_double(type)) ? SSE : INTEGER;
for (size_t index = start_eightbyte; index <= end_eightbyte && index < 2; ++index) {
// Merge the new class with the existing class for this eightbyte.
// The rule is: if an eightbyte contains both SSE and INTEGER parts, it is classified as INTEGER.
if (classes[index] != new_class)
classes[index] = (classes[index] == NO_CLASS) ? new_class : INTEGER;
}
return false;
}
if (type->category == INFIX_TYPE_POINTER) {
(*field_count)++;
size_t index = offset / 8;
if (index < 2 && classes[index] != INTEGER)
classes[index] = INTEGER; // Pointers are always INTEGER class. Merge with existing class.
return false;
}
if (type->category == INFIX_TYPE_ARRAY) {
if (type->meta.array_info.element_type == nullptr)
return false;
// If the array elements have no size, iterating over them is pointless
// and can cause a timeout if num_elements is large, as the offset never advances.
// We only need to classify the element type once at the starting offset.
if (type->meta.array_info.element_type->size == 0) {
if (type->meta.array_info.num_elements > 0)
// Classify the zero-sized element just once.
return classify_recursive(
type->meta.array_info.element_type, offset, classes, depth + 1, field_count, false);
return false; // An empty array of zero-sized structs has no effect on classification.
}
for (size_t i = 0; i < type->meta.array_info.num_elements; ++i) {
// Check count *before* each recursive call inside the loop.
if (*field_count > MAX_AGGREGATE_FIELDS_TO_CLASSIFY) {
classes[0] = MEMORY;
return true;
}
size_t element_offset = offset + i * type->meta.array_info.element_type->size;
// If we are already past the 16-byte boundary relevant for
// register passing, there is no need to classify further. This prunes
// the recursion tree for large arrays.
if (element_offset >= 16)
break;
if (classify_recursive(
type->meta.array_info.element_type, element_offset, classes, depth + 1, field_count, false))
return true; // Propagate unaligned discovery up the call stack
}
return false;
}
if (type->category == INFIX_TYPE_COMPLEX) {
infix_type * base = type->meta.complex_info.base_type;
// A zero-sized base type would cause infinite recursion.
// Treat this as a malformed type and stop classification.
if (base == nullptr || base->size == 0)
return false;
// A complex number is just like a struct { base_type real; base_type imag; }
// So we classify the first element at offset 0.
if (classify_recursive(base, offset, classes, depth + 1, field_count, false))
return true; // Propagate unaligned discovery
// And the second element at offset + size of the base.
if (classify_recursive(base, offset + base->size, classes, depth + 1, field_count, false))
return true; // Propagate unaligned discovery
return false;
}
if (type->category == INFIX_TYPE_VECTOR) {
(*field_count)++;
size_t num_eightbytes = (type->size + 7) / 8;
for (size_t i = 0; i < num_eightbytes && (offset / 8 + i) < 2; ++i) {
// Merging rule: if an eightbyte contains both SSE and INTEGER parts, it is INTEGER.
// If it's NO_CLASS, it becomes SSE.
if (classes[offset / 8 + i] == NO_CLASS)
classes[offset / 8 + i] = SSE;
}
return false;
}
if (type->category == INFIX_TYPE_STRUCT || type->category == INFIX_TYPE_UNION) {
// A generated type can have num_members > 0 but a NULL members pointer.
// This is invalid and must be passed in memory.
if (type->meta.aggregate_info.members == nullptr) {
classes[0] = MEMORY;
return true;
}
// Recursively classify each member of the struct/union.
for (size_t i = 0; i < type->meta.aggregate_info.num_members; ++i) {
// Check count *before* each recursive call inside the loop.
if (*field_count > MAX_AGGREGATE_FIELDS_TO_CLASSIFY) {
classes[0] = MEMORY;
return true;
}
infix_struct_member * member = &type->meta.aggregate_info.members[i];
// A generated type can have a NULL member type.
// This is invalid, and the aggregate must be passed in memory.
if (member->type == nullptr) {
classes[0] = MEMORY;
return true;
}
size_t member_offset = offset + member->offset;
// If this member starts at or after the 16-byte boundary,
// it cannot influence register classification, so we can skip it.
if (member_offset >= 16)
continue;
if (classify_recursive(member->type, member_offset, classes, depth + 1, field_count, member->is_bitfield))
return true; // Propagate unaligned discovery
}
return false;
}
return false;
}
/**
* @internal
* @brief Classifies an aggregate type for argument passing according to the System V ABI.
* @details This function implements the complete classification algorithm. An aggregate
* is broken down into up to two "eightbytes". Each is classified as INTEGER,
* SSE, or MEMORY. If the size is > 16 bytes or classification fails, it's MEMORY.
*
* @param type The aggregate type to classify.
* @param[out] classes An array of two `arg_class_t` to be filled.
* @param[out] num_classes The number of valid classes (1 or 2).
*/
static void classify_aggregate_sysv(const infix_type * type, arg_class_t classes[2], size_t * num_classes) {
// Initialize to a clean state.
classes[0] = NO_CLASS;
classes[1] = NO_CLASS;
*num_classes = 0;
// If the size is greater than 16 bytes, it's passed in memory.
if (type->size > 16) {
classes[0] = MEMORY;
*num_classes = 1;
return;
}
// Run the recursive classification. If it returns true, an unaligned
// field was found, and the class is already set to MEMORY. We can stop.
size_t field_count = 0; // Initialize the counter for this aggregate.
if (classify_recursive(type, 0, classes, 0, &field_count, false)) { // Pass counter to initial call
*num_classes = 1;
return;
}
// Post-processing for alignment padding.
if (type->size > 0 && classes[0] == NO_CLASS)
classes[0] = INTEGER;
if (type->size > 8 && classes[1] == NO_CLASS)
classes[1] = INTEGER;
// Count the number of valid, classified eightbytes.
if (classes[0] != NO_CLASS)
(*num_classes)++;
if (classes[1] != NO_CLASS)
(*num_classes)++;
}
/**
* @internal
* @brief Stage 1 (Forward): Analyzes a signature and creates a call frame layout for System V.
* @details This function iterates through a function's arguments, classifying each one
* to determine its location (GPR, XMM, or stack) according to the SysV ABI rules.
* @param arena The temporary arena for allocations.
* @param out_layout Receives the created layout blueprint.
* @param ret_type The function's return type.
* @param arg_types Array of argument types.
* @param num_args Total number of arguments.
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
emit_pop_reg(buf, R12_REG);
emit_pop_reg(buf, RBP_REG);
emit_ret(buf);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 1 (Reverse): Calculates the stack layout for a reverse trampoline stub.
* @details This function determines the total stack space needed for the stub's local variables,
* including the return buffer, the `void**` args_array, and the saved argument data.
* @param arena The temporary arena for allocations.
* @param[out] out_layout The resulting reverse call frame layout blueprint.
* @param context The reverse trampoline context.
* @return `INFIX_SUCCESS` on success.
*/
static infix_status prepare_reverse_call_frame_sysv_x64(infix_arena_t * arena,
infix_reverse_call_frame_layout ** out_layout,
infix_reverse_t * context) {
infix_reverse_call_frame_layout * layout = infix_arena_calloc(
arena, 1, sizeof(infix_reverse_call_frame_layout), _Alignof(infix_reverse_call_frame_layout));
if (!layout)
return INFIX_ERROR_ALLOCATION_FAILED;
// Calculate space for each component, ensuring 16-byte alignment for safety and simplicity.
size_t return_size = (context->return_type->size + 15) & ~15;
size_t args_array_size = (context->num_args * sizeof(void *) + 15) & ~15;
size_t saved_args_data_size = 0;
size_t max_align = 16; // Start with 16 for stack safety
for (size_t i = 0; i < context->num_args; ++i) {
// Security: Reject excessively large types before they reach the code generator.
if (context->arg_types[i]->size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
size_t align = context->arg_types[i]->alignment;
if (align < 8)
align = 8;
if (align > max_align)
max_align = align;
saved_args_data_size = _infix_align_up(saved_args_data_size, align);
saved_args_data_size += context->arg_types[i]->size;
}
if (saved_args_data_size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
size_t total_local_space = return_size + args_array_size + saved_args_data_size + max_align;
// Safety check against allocating too much stack.
if (total_local_space > INFIX_MAX_STACK_ALLOC) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
// The total allocation for the stack frame must be aligned to the maximum required alignment.
layout->total_stack_alloc = (uint32_t)_infix_align_up(total_local_space, max_align);
// Local variables are accessed via negative offsets from the frame pointer (RBP).
// The layout is [ return_buffer | args_array | (padding) | saved_args_data ]
layout->return_buffer_offset = -(int32_t)layout->total_stack_alloc;
layout->args_array_offset = layout->return_buffer_offset + (int32_t)return_size;
// Align the start of the saved data area
layout->saved_args_offset =
(int32_t)_infix_align_up((size_t)(layout->args_array_offset + args_array_size), max_align);
layout->max_align = (uint32_t)max_align;
*out_layout = layout;
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 2 (Reverse): Generates the prologue for the reverse trampoline stub.
* @details Emits standard System V function entry code, creates a stack frame,
* and allocates all necessary local stack space.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_prologue_sysv_x64(code_buffer * buf, infix_reverse_call_frame_layout * layout) {
emit_push_reg(buf, RBP_REG); // push rbp
emit_mov_reg_reg(buf, RBP_REG, RSP_REG); // mov rbp, rsp
// FORCE ALIGNMENT.
// AND RSP, -max_align
emit_and_reg_imm8(buf, RSP_REG, (int8_t)-(int8_t)layout->max_align);
emit_sub_reg_imm32(buf, RSP_REG, layout->total_stack_alloc); // Allocate our calculated space.
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3 (Reverse): Generates code to marshal arguments from their native
* locations into the generic `void**` array for the C dispatcher.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param context The reverse trampoline context.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_argument_marshalling_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
size_t gpr_idx = 0, xmm_idx = 0, current_saved_data_offset = 0;
// Correctly determine if the return value uses a hidden pointer by performing a full ABI classification.
bool return_in_memory = false;
infix_type * ret_type = context->return_type;
bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
if (ret_is_aggregate) {
if (ret_type->size > 16)
return_in_memory = true;
else {
arg_class_t ret_classes[2];
size_t num_ret_classes;
classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
return_in_memory = true;
}
}
// The long double primitive is a special case that does not use the hidden pointer.
if (is_long_double(ret_type))
return_in_memory = false;
// If the return value is passed by reference, save the pointer from RDI.
if (return_in_memory)
emit_mov_mem_reg(buf, RBP_REG, layout->return_buffer_offset, GPR_ARGS[gpr_idx++]); // mov [rbp + offset], rdi
// Stack arguments passed by the caller start at [rbp + 16].
size_t stack_arg_offset = 16;
for (size_t i = 0; i < context->num_args; i++) {
infix_type * current_type = context->arg_types[i];
current_saved_data_offset = _infix_align_up(current_saved_data_offset, current_type->alignment);
int32_t arg_save_loc = layout->saved_args_offset + current_saved_data_offset;
// Correct classification logic for vectors/primitives vs aggregates
arg_class_t classes[2] = {NO_CLASS, NO_CLASS};
size_t num_classes = 0;
bool is_aggregate =
(current_type->category == INFIX_TYPE_STRUCT || current_type->category == INFIX_TYPE_UNION ||
current_type->category == INFIX_TYPE_ARRAY || current_type->category == INFIX_TYPE_COMPLEX);
if (is_aggregate) {
classify_aggregate_sysv(current_type, classes, &num_classes);
}
else if (is_float16(current_type) || is_float(current_type) || is_double(current_type) ||
current_type->category == INFIX_TYPE_VECTOR) {
classes[0] = SSE;
num_classes = 1;
}
else {
classes[0] = INTEGER;
num_classes = 1;
if (current_type->size > 8) {
classes[1] = INTEGER;
num_classes = 2;
}
}
bool is_from_stack = false;
// Determine if the argument is in registers or on the stack.
if (classes[0] == MEMORY)
is_from_stack = true;
else if (num_classes == 1) {
if (classes[0] == SSE)
if (xmm_idx < NUM_XMM_ARGS) {
// Use appropriate width move for vectors to prevent truncation
if (current_type->category == INFIX_TYPE_VECTOR) {
if (current_type->size == 64)
emit_vmovupd_mem_zmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else if (current_type->size == 32)
emit_vmovupd_mem_ymm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else if (current_type->size == 16)
emit_movups_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else // size 8 (or other small vector)
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
}
else if (is_float16(current_type)) {
// movd eax, xmm_reg ; mov [rbp + arg_save_loc], ax
emit_movq_gpr_xmm(buf, RAX_REG, XMM_ARGS[xmm_idx++]);
emit_mov_mem_reg16(buf, RBP_REG, arg_save_loc, RAX_REG);
}
else if (is_float(current_type))
emit_movss_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
}
else
is_from_stack = true;
( run in 0.802 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )