Affix
view release on metacpan or search on metacpan
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
/**
* Copyright (c) 2025 Sanko Robinson
*
* This source code is dual-licensed under the Artistic License 2.0 or the MIT License.
* You may choose to use this code under the terms of either license.
*
* SPDX-License-Identifier: (Artistic-2.0 OR MIT)
*
* The documentation blocks within this file are licensed under the
* Creative Commons Attribution 4.0 International License (CC BY 4.0).
*
* SPDX-License-Identifier: CC-BY-4.0
*/
/**
* @file abi_sysv_x64.c
* @brief Implements the FFI logic for the System V AMD64 ABI.
* @ingroup internal_abi_x64
*
* @internal
* This file provides the concrete implementation of the ABI spec for the System V
* x86-64 ABI, the standard calling convention for Linux, macOS, BSD, and other
* UNIX-like operating systems on this architecture.
*
* Key features of the System V ABI implemented here:
*
* - **Register Usage:**
* - GPRs for integers/pointers: RDI, RSI, RDX, RCX, R8, R9.
* - XMMs for floats/doubles: XMM0-XMM7.
*
* - **Aggregate Classification:** Structs up to 16 bytes are recursively classified
* into one or two "eightbytes" (64-bit chunks). Based on the classes of these
* eightbytes (INTEGER, SSE, MEMORY), the aggregate can be passed in up to two
* registers (GPRs and/or XMMs) or on the stack.
*
* - **Return Values:**
* - Small aggregates (<= 16 bytes) are returned in RAX/RDX and/or XMM0/XMM1.
* - Larger aggregates (> 16 bytes) are returned via a hidden pointer in RDI.
* - `long double` is a special case and is returned on the x87 FPU stack `st(0)`.
*
* - **Variadic Functions:** Before calling a variadic function, the `AL` register
* must be set to the number of XMM registers used for arguments.
* @endinternal
*/
#include "arch/x64/abi_x64_common.h"
#include "arch/x64/abi_x64_emitters.h"
#include "common/infix_internals.h"
#include "common/utility.h"
#include <stdbool.h>
#include <stdlib.h>
/** An array of GPRs used for passing the first 6 integer/pointer arguments, in order. */
static const x64_gpr GPR_ARGS[] = {RDI_REG, RSI_REG, RDX_REG, RCX_REG, R8_REG, R9_REG};
/** An array of XMM registers used for passing the first 8 floating-point arguments, in order. */
static const x64_xmm XMM_ARGS[] = {XMM0_REG, XMM1_REG, XMM2_REG, XMM3_REG, XMM4_REG, XMM5_REG, XMM6_REG, XMM7_REG};
/** The number of GPRs available for argument passing. */
#define NUM_GPR_ARGS 6
/** The number of XMM registers available for argument passing. */
#define NUM_XMM_ARGS 8
/** A safe recursion limit for the aggregate classification algorithm to prevent stack overflow. */
#define MAX_CLASSIFY_DEPTH 32
/** A safe limit on the number of fields to classify to prevent DoS from exponential complexity. */
#define MAX_AGGREGATE_FIELDS_TO_CLASSIFY 32
/**
* @internal
* @brief The System V classification for an "eightbyte" (a 64-bit chunk of a type).
*/
typedef enum {
NO_CLASS, ///< This eightbyte has not been classified yet. It's the initial state.
INTEGER, ///< This eightbyte should be passed in a general-purpose register (GPR).
SSE, ///< This eightbyte should be passed in an SSE register (XMM).
MEMORY ///< The argument is too complex or large and must be passed on the stack.
} arg_class_t;
/** The v-table of System V x64 functions for generating forward trampolines. */
static infix_status prepare_forward_call_frame_sysv_x64(infix_arena_t * arena,
infix_call_frame_layout ** out_layout,
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
size_t num_fixed_args,
void * target_fn);
static infix_status generate_forward_prologue_sysv_x64(code_buffer * buf, infix_call_frame_layout * layout);
static infix_status generate_forward_argument_moves_sysv_x64(code_buffer * buf,
infix_call_frame_layout * layout,
infix_type ** arg_types,
size_t num_args,
size_t num_fixed_args);
static infix_status generate_forward_call_instruction_sysv_x64(code_buffer *, infix_call_frame_layout *);
static infix_status generate_forward_epilogue_sysv_x64(code_buffer * buf,
infix_call_frame_layout * layout,
infix_type * ret_type);
const infix_forward_abi_spec g_sysv_x64_forward_spec = {
.prepare_forward_call_frame = prepare_forward_call_frame_sysv_x64,
.generate_forward_prologue = generate_forward_prologue_sysv_x64,
.generate_forward_argument_moves = generate_forward_argument_moves_sysv_x64,
.generate_forward_call_instruction = generate_forward_call_instruction_sysv_x64,
.generate_forward_epilogue = generate_forward_epilogue_sysv_x64};
/** The v-table of System V x64 functions for generating reverse trampolines. */
static infix_status prepare_reverse_call_frame_sysv_x64(infix_arena_t * arena,
infix_reverse_call_frame_layout ** out_layout,
infix_reverse_t * context);
static infix_status generate_reverse_prologue_sysv_x64(code_buffer * buf, infix_reverse_call_frame_layout * layout);
static infix_status generate_reverse_argument_marshalling_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context);
static infix_status generate_reverse_dispatcher_call_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context);
static infix_status generate_reverse_epilogue_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context);
const infix_reverse_abi_spec g_sysv_x64_reverse_spec = {
.prepare_reverse_call_frame = prepare_reverse_call_frame_sysv_x64,
.generate_reverse_prologue = generate_reverse_prologue_sysv_x64,
.generate_reverse_argument_marshalling = generate_reverse_argument_marshalling_sysv_x64,
.generate_reverse_dispatcher_call = generate_reverse_dispatcher_call_sysv_x64,
.generate_reverse_epilogue = generate_reverse_epilogue_sysv_x64};
/** The v-table for the new Direct Marshalling ABI. */
static infix_status prepare_direct_forward_call_frame_sysv_x64(infix_arena_t * arena,
infix_direct_call_frame_layout ** out_layout,
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
infix_direct_arg_handler_t * handlers,
void * target_fn);
static infix_status generate_direct_forward_prologue_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_argument_moves_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_call_instruction_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout);
static infix_status generate_direct_forward_epilogue_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout,
infix_type * ret_type);
const infix_direct_forward_abi_spec g_sysv_x64_direct_forward_spec = {
.prepare_direct_forward_call_frame = prepare_direct_forward_call_frame_sysv_x64,
.generate_direct_forward_prologue = generate_direct_forward_prologue_sysv_x64,
.generate_direct_forward_argument_moves = generate_direct_forward_argument_moves_sysv_x64,
.generate_direct_forward_call_instruction = generate_direct_forward_call_instruction_sysv_x64,
.generate_direct_forward_epilogue = generate_direct_forward_epilogue_sysv_x64};
/**
* @internal
* @brief Recursively classifies the eightbytes of an aggregate type.
* @details This is the core of the complex System V classification algorithm. It traverses
* the fields of a struct/array, examining each 8-byte chunk ("eightbyte") and assigning it a
* class (INTEGER, SSE, MEMORY). The classification is "merged" according to ABI rules
* (e.g., if an eightbyte contains both INTEGER and SSE parts, it becomes INTEGER).
*
* @param type The type of the current member/element being examined.
* @param offset The byte offset of this member from the start of the aggregate.
* @param[in,out] classes An array of two `arg_class_t` that is updated during classification.
* @param depth The current recursion depth (to prevent stack overflow on malicious input).
* @param field_count A counter to prevent DoS from excessively complex types.
* @param is_bitfield True if the current member is a bitfield.
* @return `true` if a condition forcing MEMORY classification is found, `false` otherwise.
*/
static bool classify_recursive(
const infix_type * type, size_t offset, arg_class_t classes[2], int depth, size_t * field_count, bool is_bitfield) {
// A recursive call can be made with a NULL type (e.g., from a malformed array from fuzzer).
if (type == nullptr)
return false; // Terminate recusion path.
// Abort classification if the type is excessively complex or too deep. Give up and pass in memory.
if (*field_count > MAX_AGGREGATE_FIELDS_TO_CLASSIFY || depth > MAX_CLASSIFY_DEPTH) {
classes[0] = MEMORY;
return true;
}
// The ABI requires natural alignment for standard members.
// Bitfields are an exception: they are allowed to be unaligned relative to their
// base type's alignment, as long as they stay within their storage unit.
if (!is_bitfield && type->alignment != 0 && offset % type->alignment != 0) {
classes[0] = MEMORY;
return true;
}
// If a struct is packed, its layout is explicit and should not be inferred
// by recursive classification. Treat it as an opaque block of memory.
// For classification purposes, this is equivalent to an integer array.
if (type->category == INFIX_TYPE_PRIMITIVE) {
(*field_count)++;
// `long double` is a special case. It is passed in memory on the stack, not x87 registers.
if (is_long_double(type)) {
classes[0] = MEMORY;
return true;
}
// Consider all eightbytes that the primitive occupies, not just the starting offset.
size_t start_offset = offset;
// Check for overflow before calculating end_offset
if (type->size == 0)
return false;
if (start_offset > SIZE_MAX - (type->size - 1)) {
classes[0] = MEMORY;
return true;
}
size_t end_offset = start_offset + type->size - 1;
size_t start_eightbyte = start_offset / 8;
size_t end_eightbyte = end_offset / 8;
arg_class_t new_class = (is_float16(type) || is_float(type) || is_double(type)) ? SSE : INTEGER;
for (size_t index = start_eightbyte; index <= end_eightbyte && index < 2; ++index) {
// Merge the new class with the existing class for this eightbyte.
// The rule is: if an eightbyte contains both SSE and INTEGER parts, it is classified as INTEGER.
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
size_t num_fixed_args,
void * target_fn) {
if (out_layout == nullptr)
return INFIX_ERROR_INVALID_ARGUMENT;
// Allocate the layout struct that will hold our results.
infix_call_frame_layout * layout =
infix_arena_calloc(arena, 1, sizeof(infix_call_frame_layout), _Alignof(infix_call_frame_layout));
if (layout == nullptr) {
*out_layout = nullptr;
return INFIX_ERROR_ALLOCATION_FAILED;
}
layout->is_variadic = num_args > num_fixed_args;
layout->target_fn = target_fn;
layout->arg_locations =
infix_arena_calloc(arena, num_args, sizeof(infix_arg_location), _Alignof(infix_arg_location));
if (layout->arg_locations == nullptr && num_args > 0) {
*out_layout = nullptr;
return INFIX_ERROR_ALLOCATION_FAILED;
}
// gpr_count and xmm_count track the next available GPR and XMM argument registers.
// current_stack_offset tracks the next available stack slot for arguments.
size_t gpr_count = 0, xmm_count = 0, current_stack_offset = 0;
// Determine if the return value requires a hidden pointer argument passed in RDI.
bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
// Rule 1: Aggregates larger than 16 bytes are always returned via hidden pointer.
// Exception: 256/512-bit vectors are returned in YMM0/ZMM0.
layout->return_value_in_memory =
(ret_is_aggregate && ret_type->category != INFIX_TYPE_VECTOR && ret_type->size > 16);
// Rule 2: Small aggregates (<= 16 bytes) must also be returned via hidden pointer
// if their classification is MEMORY. This is critical for types like packed structs
// with unaligned members.
if (ret_is_aggregate && !layout->return_value_in_memory) {
arg_class_t ret_classes[2];
size_t num_ret_classes;
classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
layout->return_value_in_memory = true;
}
// Exception: `long double` is a special case and is always returned on the x87
// FPU stack, never via a hidden pointer.
if (is_long_double(ret_type))
layout->return_value_in_memory = false;
// If a hidden pointer is used, it consumes the first GPR (RDI).
if (layout->return_value_in_memory)
gpr_count++;
layout->num_stack_args = 0;
// Main Argument Classification Loop
for (size_t i = 0; i < num_args; ++i) {
infix_type * type = arg_types[i];
// Security: Reject excessively large types before they reach the code generator.
if (type->size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
// An array passed as a function parameter decays to a pointer.
// We must treat it as a pointer (INTEGER class) for classification,
// bypassing the aggregate classification logic which would incorrectly
// treat it as a by-value struct.
if (type->category == INFIX_TYPE_ARRAY) {
if (gpr_count < NUM_GPR_ARGS) {
layout->arg_locations[i].type = ARG_LOCATION_GPR;
layout->arg_locations[i].reg_index = gpr_count++;
}
else {
layout->arg_locations[i].type = ARG_LOCATION_STACK;
layout->arg_locations[i].stack_offset = current_stack_offset;
current_stack_offset += 8; // Pointers are 8 bytes on the stack
layout->num_stack_args++;
}
continue; // Argument classified, skip the rest of the loop.
}
// Classify the argument type
// Special case: `long double` is always passed on the stack.
if (is_long_double(type)) {
layout->arg_locations[i].type = ARG_LOCATION_STACK;
size_t align = type->alignment;
if (align < 8)
align = 8; // Stack slots are minimum 8 bytes
// Align current offset up to the required alignment (e.g. 16)
current_stack_offset = (current_stack_offset + (align - 1)) & ~(align - 1);
layout->arg_locations[i].stack_offset = current_stack_offset;
current_stack_offset += (type->size + 7) & ~7; // Advance by size, 8-byte aligned
layout->num_stack_args++;
continue; // Go to next argument
}
bool is_aggregate = type->category == INFIX_TYPE_STRUCT || type->category == INFIX_TYPE_UNION ||
type->category == INFIX_TYPE_ARRAY || type->category == INFIX_TYPE_COMPLEX;
arg_class_t classes[2] = {NO_CLASS, NO_CLASS};
size_t num_classes = 0;
bool placed_in_register = false;
if (is_aggregate)
// Complex types need the full classification algorithm.
classify_aggregate_sysv(type, classes, &num_classes);
else {
// Simple primitive and vector types are classified directly.
if (is_float16(type) || is_float(type) || is_double(type) || type->category == INFIX_TYPE_VECTOR) {
classes[0] = SSE;
num_classes = 1;
// Special classification for large AVX vectors (YMM/ZMM).
// They are passed in a single register, which we model as a single SSE class.
// The size check distinguishes them from 128-bit vectors.
if (type->category == INFIX_TYPE_VECTOR && (type->size == 32 || type->size == 64))
num_classes = 1; // Treat as a single unit for classification
}
else {
classes[0] = INTEGER;
num_classes = 1;
// Primitives > 8 bytes (like __int128) are treated as two INTEGER parts.
if (type->size > 8) {
classes[1] = INTEGER;
num_classes = 2;
}
}
}
// If classification resulted in MEMORY, it must go on the stack.
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
size_t gpr_needed = (classes[0] == INTEGER) + (classes[1] == INTEGER);
size_t xmm_needed = (classes[0] == SSE) + (classes[1] == SSE);
if (gpr_count + gpr_needed <= NUM_GPR_ARGS && xmm_count + xmm_needed <= NUM_XMM_ARGS) {
if (classes[0] == INTEGER && classes[1] == INTEGER) {
layout->arg_locations[i].type = ARG_LOCATION_GPR_PAIR;
layout->arg_locations[i].reg_index = gpr_count;
layout->arg_locations[i].reg_index2 = gpr_count + 1;
}
else if (classes[0] == SSE && classes[1] == SSE) {
layout->arg_locations[i].type = ARG_LOCATION_SSE_SSE_PAIR;
layout->arg_locations[i].reg_index = xmm_count;
layout->arg_locations[i].reg_index2 = xmm_count + 1;
}
else { // Mixed GPR and SSE
if (classes[0] == INTEGER) {
layout->arg_locations[i].type = ARG_LOCATION_INTEGER_SSE_PAIR;
layout->arg_locations[i].reg_index = gpr_count;
layout->arg_locations[i].reg_index2 = xmm_count;
}
else {
layout->arg_locations[i].type = ARG_LOCATION_SSE_INTEGER_PAIR;
layout->arg_locations[i].reg_index = xmm_count;
layout->arg_locations[i].reg_index2 = gpr_count;
}
}
gpr_count += gpr_needed;
xmm_count += xmm_needed;
placed_in_register = true;
}
}
}
// Fallback to stack
if (!placed_in_register) {
layout->arg_locations[i].type = ARG_LOCATION_STACK;
// Align current offset to the argument's natural alignment requirements.
// SysV requires 16-byte alignment for long double, __int128, and __m128 on the stack.
size_t align = type->alignment;
if (align < 8)
align = 8; // Stack slots are at least 8 bytes
current_stack_offset = (current_stack_offset + (align - 1)) & ~(align - 1); // Align up
layout->arg_locations[i].stack_offset = current_stack_offset;
current_stack_offset += (type->size + 7) & ~7; // Align to 8 bytes.
layout->num_stack_args++;
}
}
// Finalize the layout properties.
layout->num_gpr_args = gpr_count;
layout->num_xmm_args = xmm_count;
// The total stack space for arguments must be 16-byte aligned before the call.
layout->total_stack_alloc = (current_stack_offset + 15) & ~15;
// Safety check against excessive stack allocation.
if (layout->total_stack_alloc > INFIX_MAX_STACK_ALLOC) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
*out_layout = layout;
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 2 (Forward): Generates the function prologue for the System V trampoline.
* @details Sets up a standard stack frame, saves registers for the trampoline's context,
* and allocates stack space for arguments.
* @param buf The code buffer.
* @param layout The call frame layout.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_prologue_sysv_x64(code_buffer * buf, infix_call_frame_layout * layout) {
// Standard Function Prologue
emit_push_reg(buf, RBP_REG); // push rbp
emit_mov_reg_reg(buf, RBP_REG, RSP_REG); // mov rbp, rsp
// Save Callee-Saved Registers
// We will use these registers to store our context (target_fn, ret_ptr, args_ptr)
// across the native function call, so we must save their original values first.
emit_push_reg(buf, R12_REG); // push r12
emit_push_reg(buf, R13_REG); // push r13
emit_push_reg(buf, R14_REG); // push r14
emit_push_reg(buf, R15_REG); // push r15
// Move Trampoline Arguments to Persistent Registers
if (layout->target_fn == nullptr) { // Unbound trampoline
// The trampoline is called with (target_fn, ret_ptr, args_ptr) in RDI, RSI, RDX.
// We move these into our saved callee-saved registers to protect them.
emit_mov_reg_reg(buf, R12_REG, RDI_REG); // r12 = target_fn
emit_mov_reg_reg(buf, R13_REG, RSI_REG); // r13 = ret_ptr
emit_mov_reg_reg(buf, R14_REG, RDX_REG); // r14 = args_ptr
}
else { // Bound trampoline
// The trampoline is called with (ret_ptr, args_ptr) in RDI, RSI.
emit_mov_reg_reg(buf, R13_REG, RDI_REG); // r13 = ret_ptr
emit_mov_reg_reg(buf, R14_REG, RSI_REG); // r14 = args_ptr
}
// Allocate Stack Space
// If any arguments are passed on the stack, allocate space for them.
// The ABI requires this space to be 16-byte aligned.
if (layout->total_stack_alloc > 0)
emit_sub_reg_imm32(buf, RSP_REG, layout->total_stack_alloc);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3 (Forward): Generates code to move arguments from the `void**` array
* into their correct native locations (registers or stack).
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param arg_types The array of argument types.
* @param num_args Total number of arguments.
* @param num_fixed_args Number of fixed arguments.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_argument_moves_sysv_x64(code_buffer * buf,
infix_call_frame_layout * layout,
infix_type ** arg_types,
size_t num_args,
c23_maybe_unused size_t num_fixed_args) {
// If returning a large struct, the hidden pointer (stored in r13) must be moved to RDI.
if (layout->return_value_in_memory)
emit_mov_reg_reg(buf, GPR_ARGS[0], R13_REG); // mov rdi, r13
// Marshall Register Arguments
// Loop over all arguments that are passed in registers.
for (size_t i = 0; i < num_args; ++i) {
infix_arg_location * loc = &layout->arg_locations[i];
if (loc->type == ARG_LOCATION_STACK)
continue; // Handle stack arguments in a separate pass.
// Load the pointer to the argument's data into a scratch register (r15).
// r14 holds the base of the `void** args_array`.
// r15 = args_array[i]
emit_mov_reg_mem(buf, R15_REG, R14_REG, i * sizeof(void *));
switch (loc->type) {
case ARG_LOCATION_GPR:
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
emit_movups_xmm_mem(buf, XMM_ARGS[loc->reg_index], R15_REG, 0);
else
// movsd xmm_reg, [r15] (Move Scalar Double-Precision)
emit_movsd_xmm_mem(buf, XMM_ARGS[loc->reg_index], R15_REG, 0);
break;
case ARG_LOCATION_GPR_PAIR:
emit_mov_reg_mem(buf, GPR_ARGS[loc->reg_index], R15_REG, 0); // mov gpr, [r15]
emit_mov_reg_mem(buf, GPR_ARGS[loc->reg_index2], R15_REG, 8); // movsd xmm, [r15 + 8]
break;
case ARG_LOCATION_INTEGER_SSE_PAIR:
emit_mov_reg_mem(buf, GPR_ARGS[loc->reg_index], R15_REG, 0); // mov gpr, [r15]
emit_movsd_xmm_mem(buf, XMM_ARGS[loc->reg_index2], R15_REG, 8); // movsd xmm2, [r15 + 8]
break;
case ARG_LOCATION_SSE_INTEGER_PAIR:
emit_movsd_xmm_mem(buf, XMM_ARGS[loc->reg_index], R15_REG, 0); // movsd xmm, [r15]
emit_mov_reg_mem(buf, GPR_ARGS[loc->reg_index2], R15_REG, 8); // mov gpr, [r15 + 8]
break;
case ARG_LOCATION_SSE_SSE_PAIR:
emit_movsd_xmm_mem(buf, XMM_ARGS[loc->reg_index], R15_REG, 0); // movsd xmm1, [r15]
emit_movsd_xmm_mem(buf, XMM_ARGS[loc->reg_index2], R15_REG, 8); // movsd xmm2, [r15 + 8]
break;
default:
// Should be unreachable if layout is correct.
break;
}
}
// Marshall Stack Arguments
if (layout->num_stack_args > 0) {
for (size_t i = 0; i < num_args; ++i) {
if (layout->arg_locations[i].type != ARG_LOCATION_STACK)
continue;
// Load pointer to argument data into r15.
emit_mov_reg_mem(buf, R15_REG, R14_REG, i * sizeof(void *)); // r15 = args_array[i]
size_t size = arg_types[i]->size;
// Copy the argument data from the user-provided buffer to the stack, 8 bytes at a time.
for (size_t offset = 0; offset < size; offset += 8) {
// mov rax, [r15 + offset] (load 8 bytes into scratch register)
emit_mov_reg_mem(buf, RAX_REG, R15_REG, offset);
// mov [rsp + stack_offset], rax (store 8 bytes onto the stack)
emit_mov_mem_reg(buf, RSP_REG, layout->arg_locations[i].stack_offset + offset, RAX_REG);
}
}
}
// Handle Variadic Calls
// The ABI requires that AL contains the number of XMM registers used for arguments.
if (layout->is_variadic)
// mov al, num_xmm_args (or mov eax, num_xmm_args)
emit_mov_reg_imm32(buf, RAX_REG, (int32_t)layout->num_xmm_args);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3.5 (Forward): Generates the null-check and call instruction.
* @param buf The code buffer.
* @param layout The call frame layout.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_call_instruction_sysv_x64(code_buffer * buf,
c23_maybe_unused infix_call_frame_layout * layout) {
// For a bound trampoline, load the hardcoded address into R12.
// For an unbound trampoline, R12 was already loaded from RDI in the prologue.
if (layout->target_fn)
emit_mov_reg_imm64(buf, R12_REG, (uint64_t)layout->target_fn);
// On SysV x64, the target function pointer is stored in R12.
emit_test_reg_reg(buf, R12_REG, R12_REG); // test r12, r12 ; check if function pointer is null
emit_jnz_short(buf, 2); // jnz +2 ; if not null, skip the crash instruction
emit_ud2(buf); // ud2 ; crash safely if null
emit_call_reg(buf, R12_REG); // call r12 ; call the function
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 4 (Forward): Generates the function epilogue for the System V trampoline.
* @details Emits code to handle the function's return value (from RAX/RDX, XMM0/XMM1, or
* the x87 FPU stack for `long double`) and properly tear down the stack frame.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param ret_type The function's return type.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_forward_epilogue_sysv_x64(code_buffer * buf,
infix_call_frame_layout * layout,
infix_type * ret_type) {
layout->epilogue_offset = (uint32_t)buf->size;
// Handle Return Value
// If the function returns something and it wasn't via a hidden pointer...
if (ret_type->category != INFIX_TYPE_VOID && !layout->return_value_in_memory) {
if (is_long_double(ret_type))
// `long double` is returned on the x87 FPU stack (st0).
// We store it into the user's return buffer (pointer held in r13).
// fstpt [r13] (Store Floating Point value and Pop)
emit_fstpt_mem(buf, R13_REG, 0);
else {
// For other types, we must classify the return type just like an argument.
arg_class_t classes[2];
size_t num_classes = 0;
bool is_aggregate = ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX;
if (is_aggregate)
classify_aggregate_sysv(ret_type, classes, &num_classes);
else if (is_float(ret_type) || is_double(ret_type) || (ret_type->category == INFIX_TYPE_VECTOR)) {
classes[0] = SSE;
num_classes = 1;
}
else {
classes[0] = INTEGER;
num_classes = 1;
if (ret_type->size > 8) {
classes[1] = INTEGER;
num_classes = 2;
}
}
if (num_classes == 1) { // Returned in a single register
if (classes[0] == SSE) {
if (is_float16(ret_type)) {
// movd eax, xmm0 ; mov [r13], ax
emit_movq_gpr_xmm(buf, RAX_REG, XMM0_REG);
emit_mov_mem_reg16(buf, R13_REG, 0, RAX_REG);
}
else if (is_float(ret_type))
emit_movss_mem_xmm(buf, R13_REG, 0, XMM0_REG); // movss [r13], xmm0
else if (ret_type->category == INFIX_TYPE_VECTOR && ret_type->size == 32)
emit_vmovupd_mem_ymm(buf, R13_REG, 0, XMM0_REG); // AVX case
else if (ret_type->category == INFIX_TYPE_VECTOR && ret_type->size == 64)
emit_vmovupd_mem_zmm(buf, R13_REG, 0, XMM0_REG); // AVX-512 case
else if (ret_type->category == INFIX_TYPE_VECTOR)
emit_movups_mem_xmm(buf, R13_REG, 0, XMM0_REG);
else
emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG); // movsd [r13], xmm0
}
else { // INTEGER class
// Use a size-appropriate move to avoid writing past the end of the buffer.
switch (ret_type->size) {
case 1:
emit_mov_mem_reg8(buf, R13_REG, 0, RAX_REG); // mov [r13], al
break;
case 2:
emit_mov_mem_reg16(buf, R13_REG, 0, RAX_REG); // mov [r13], ax
break;
case 4:
emit_mov_mem_reg32(buf, R13_REG, 0, RAX_REG); // mov [r13], eax
break;
default:
emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG); // mov [r13], rax
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
* @param arena The temporary arena for allocations.
* @param[out] out_layout The resulting reverse call frame layout blueprint.
* @param context The reverse trampoline context.
* @return `INFIX_SUCCESS` on success.
*/
static infix_status prepare_reverse_call_frame_sysv_x64(infix_arena_t * arena,
infix_reverse_call_frame_layout ** out_layout,
infix_reverse_t * context) {
infix_reverse_call_frame_layout * layout = infix_arena_calloc(
arena, 1, sizeof(infix_reverse_call_frame_layout), _Alignof(infix_reverse_call_frame_layout));
if (!layout)
return INFIX_ERROR_ALLOCATION_FAILED;
// Calculate space for each component, ensuring 16-byte alignment for safety and simplicity.
size_t return_size = (context->return_type->size + 15) & ~15;
size_t args_array_size = (context->num_args * sizeof(void *) + 15) & ~15;
size_t saved_args_data_size = 0;
size_t max_align = 16; // Start with 16 for stack safety
for (size_t i = 0; i < context->num_args; ++i) {
// Security: Reject excessively large types before they reach the code generator.
if (context->arg_types[i]->size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
size_t align = context->arg_types[i]->alignment;
if (align < 8)
align = 8;
if (align > max_align)
max_align = align;
saved_args_data_size = _infix_align_up(saved_args_data_size, align);
saved_args_data_size += context->arg_types[i]->size;
}
if (saved_args_data_size > INFIX_MAX_ARG_SIZE) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
size_t total_local_space = return_size + args_array_size + saved_args_data_size + max_align;
// Safety check against allocating too much stack.
if (total_local_space > INFIX_MAX_STACK_ALLOC) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
// The total allocation for the stack frame must be aligned to the maximum required alignment.
layout->total_stack_alloc = (uint32_t)_infix_align_up(total_local_space, max_align);
// Local variables are accessed via negative offsets from the frame pointer (RBP).
// The layout is [ return_buffer | args_array | (padding) | saved_args_data ]
layout->return_buffer_offset = -(int32_t)layout->total_stack_alloc;
layout->args_array_offset = layout->return_buffer_offset + (int32_t)return_size;
// Align the start of the saved data area
layout->saved_args_offset =
(int32_t)_infix_align_up((size_t)(layout->args_array_offset + args_array_size), max_align);
layout->max_align = (uint32_t)max_align;
*out_layout = layout;
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 2 (Reverse): Generates the prologue for the reverse trampoline stub.
* @details Emits standard System V function entry code, creates a stack frame,
* and allocates all necessary local stack space.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_prologue_sysv_x64(code_buffer * buf, infix_reverse_call_frame_layout * layout) {
emit_push_reg(buf, RBP_REG); // push rbp
emit_mov_reg_reg(buf, RBP_REG, RSP_REG); // mov rbp, rsp
// FORCE ALIGNMENT.
// AND RSP, -max_align
emit_and_reg_imm8(buf, RSP_REG, (int8_t)-(int8_t)layout->max_align);
emit_sub_reg_imm32(buf, RSP_REG, layout->total_stack_alloc); // Allocate our calculated space.
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3 (Reverse): Generates code to marshal arguments from their native
* locations into the generic `void**` array for the C dispatcher.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param context The reverse trampoline context.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_argument_marshalling_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
size_t gpr_idx = 0, xmm_idx = 0, current_saved_data_offset = 0;
// Correctly determine if the return value uses a hidden pointer by performing a full ABI classification.
bool return_in_memory = false;
infix_type * ret_type = context->return_type;
bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
if (ret_is_aggregate) {
if (ret_type->size > 16)
return_in_memory = true;
else {
arg_class_t ret_classes[2];
size_t num_ret_classes;
classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
return_in_memory = true;
}
}
// The long double primitive is a special case that does not use the hidden pointer.
if (is_long_double(ret_type))
return_in_memory = false;
// If the return value is passed by reference, save the pointer from RDI.
if (return_in_memory)
emit_mov_mem_reg(buf, RBP_REG, layout->return_buffer_offset, GPR_ARGS[gpr_idx++]); // mov [rbp + offset], rdi
// Stack arguments passed by the caller start at [rbp + 16].
size_t stack_arg_offset = 16;
for (size_t i = 0; i < context->num_args; i++) {
infix_type * current_type = context->arg_types[i];
current_saved_data_offset = _infix_align_up(current_saved_data_offset, current_type->alignment);
int32_t arg_save_loc = layout->saved_args_offset + current_saved_data_offset;
// Correct classification logic for vectors/primitives vs aggregates
arg_class_t classes[2] = {NO_CLASS, NO_CLASS};
size_t num_classes = 0;
bool is_aggregate =
(current_type->category == INFIX_TYPE_STRUCT || current_type->category == INFIX_TYPE_UNION ||
current_type->category == INFIX_TYPE_ARRAY || current_type->category == INFIX_TYPE_COMPLEX);
if (is_aggregate) {
classify_aggregate_sysv(current_type, classes, &num_classes);
}
else if (is_float16(current_type) || is_float(current_type) || is_double(current_type) ||
current_type->category == INFIX_TYPE_VECTOR) {
classes[0] = SSE;
num_classes = 1;
}
else {
classes[0] = INTEGER;
num_classes = 1;
if (current_type->size > 8) {
classes[1] = INTEGER;
num_classes = 2;
}
}
bool is_from_stack = false;
// Determine if the argument is in registers or on the stack.
if (classes[0] == MEMORY)
is_from_stack = true;
else if (num_classes == 1) {
if (classes[0] == SSE)
if (xmm_idx < NUM_XMM_ARGS) {
// Use appropriate width move for vectors to prevent truncation
if (current_type->category == INFIX_TYPE_VECTOR) {
if (current_type->size == 64)
emit_vmovupd_mem_zmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else if (current_type->size == 32)
emit_vmovupd_mem_ymm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else if (current_type->size == 16)
emit_movups_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else // size 8 (or other small vector)
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
}
else if (is_float16(current_type)) {
// movd eax, xmm_reg ; mov [rbp + arg_save_loc], ax
emit_movq_gpr_xmm(buf, RAX_REG, XMM_ARGS[xmm_idx++]);
emit_mov_mem_reg16(buf, RBP_REG, arg_save_loc, RAX_REG);
}
else if (is_float(current_type))
emit_movss_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
else
emit_movsd_mem_xmm(buf, RBP_REG, arg_save_loc, XMM_ARGS[xmm_idx++]);
}
else
is_from_stack = true;
else if (gpr_idx < NUM_GPR_ARGS)
emit_mov_mem_reg(buf, RBP_REG, arg_save_loc, GPR_ARGS[gpr_idx++]);
else
is_from_stack = true;
}
else if (num_classes == 2) {
size_t gprs_needed = (classes[0] == INTEGER) + (classes[1] == INTEGER);
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
* @internal
* @brief Stage 4 (Reverse): Generates the code to call the high-level C dispatcher function.
* @details Emits code to load the dispatcher's arguments into the correct registers
* according to the System V ABI, then calls the dispatcher.
*
* The C dispatcher's signature is:
* `void fn(infix_reverse_t* context, void* return_value_ptr, void** args_array)`
*
* The generated code performs the following argument setup:
* 1. `RDI` (Arg 1): The `context` pointer (a 64-bit immediate).
* 2. `RSI` (Arg 2): The pointer to the return value buffer. This is either a
* pointer to local stack space, or the original pointer passed by the
* caller in RDI if the function returns a large struct by reference.
* 3. `RDX` (Arg 3): The pointer to the `args_array` on the local stack.
* 4. The address of the dispatcher function itself is loaded into a scratch
* register (`RAX`), which is then called.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param context The reverse context.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_dispatcher_call_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
// Arg 1 (RDI): The infix_reverse_t context pointer.
emit_mov_reg_imm64(buf, RDI_REG, (uint64_t)context); // mov rdi, #context_addr
// Arg 2 (RSI): Pointer to the return buffer.
// Correctly determine if the hidden pointer was used for the return value.
bool return_in_memory = false;
infix_type * ret_type = context->return_type;
bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
if (ret_is_aggregate) {
if (ret_type->size > 16)
return_in_memory = true;
else {
arg_class_t ret_classes[2];
size_t num_ret_classes;
classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
return_in_memory = true;
}
}
if (is_long_double(ret_type))
return_in_memory = false;
if (return_in_memory)
// The pointer was passed to us in RDI and saved. Load it back.
emit_mov_reg_mem(buf, RSI_REG, RBP_REG, layout->return_buffer_offset); // mov rsi, [rbp + return_buffer_offset]
else
// The return buffer is a local variable. Calculate its address.
emit_lea_reg_mem(buf, RSI_REG, RBP_REG, layout->return_buffer_offset); // lea rsi, [rbp + return_buffer_offset]
// Arg 3 (RDX): Pointer to the args_array we just built.
emit_lea_reg_mem(buf, RDX_REG, RBP_REG, layout->args_array_offset); // lea rdx, [rbp + args_array_offset]
// Load the dispatcher's address into a scratch register and call it.
emit_mov_reg_imm64(buf, RAX_REG, (uint64_t)context->internal_dispatcher); // mov rax, #dispatcher_addr
emit_call_reg(buf, RAX_REG);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 5 (Reverse): Generates the epilogue for the reverse trampoline stub.
* @details Retrieves the return value from the local buffer and places it into the
* correct return registers (RAX/RDX, XMM0/XMM1) or the x87 FPU stack. Then,
* it tears down the stack frame and returns to the native caller.
* @param buf The code buffer.
* @param layout The layout blueprint.
* @param context The reverse context.
* @return `INFIX_SUCCESS`.
*/
static infix_status generate_reverse_epilogue_sysv_x64(code_buffer * buf,
infix_reverse_call_frame_layout * layout,
infix_reverse_t * context) {
if (context->return_type->category != INFIX_TYPE_VOID) {
// Correctly determine if the return value uses a hidden pointer by performing a full ABI classification.
bool return_in_memory = false;
infix_type * ret_type = context->return_type;
bool ret_is_aggregate = (ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX);
if (ret_is_aggregate) {
if (ret_type->size > 16)
return_in_memory = true;
else {
arg_class_t ret_classes[2];
size_t num_ret_classes;
classify_aggregate_sysv(ret_type, ret_classes, &num_ret_classes);
if (num_ret_classes > 0 && ret_classes[0] == MEMORY)
return_in_memory = true;
}
}
if (is_long_double(ret_type))
return_in_memory = false;
// Now, handle the return value based on the correct classification.
if (is_long_double(context->return_type))
emit_fldt_mem(buf, RBP_REG, layout->return_buffer_offset);
else if (return_in_memory)
// The return value was written directly via the hidden pointer.
// The ABI requires this pointer to be returned in RAX.
emit_mov_reg_mem(buf, RAX_REG, RBP_REG, layout->return_buffer_offset);
else {
// Classify the return type to determine which registers to load.
arg_class_t classes[2];
size_t num_classes;
// Ensure 128-bit vectors are also classified as SSE
if (context->return_type->category == INFIX_TYPE_VECTOR &&
(context->return_type->size == 16 || context->return_type->size == 32 ||
context->return_type->size == 64)) {
classes[0] = SSE;
num_classes = 1;
}
else
classify_aggregate_sysv(context->return_type, classes, &num_classes);
if (num_classes >= 1) { // First eightbyte
if (classes[0] == SSE) {
if (is_float16(context->return_type)) {
emit_movzx_reg64_mem16(buf, RAX_REG, RBP_REG, layout->return_buffer_offset);
emit_movq_xmm_gpr(buf, XMM0_REG, RAX_REG);
}
else if (is_float(context->return_type))
emit_movss_xmm_mem(buf, XMM0_REG, RBP_REG, layout->return_buffer_offset);
else if (context->return_type->category == INFIX_TYPE_VECTOR && context->return_type->size == 32)
emit_vmovupd_ymm_mem(buf, XMM0_REG, RBP_REG, layout->return_buffer_offset);
else if (context->return_type->category == INFIX_TYPE_VECTOR && context->return_type->size == 64)
emit_vmovupd_zmm_mem(buf, XMM0_REG, RBP_REG, layout->return_buffer_offset);
// Use 128-bit move for standard vectors
else if (context->return_type->category == INFIX_TYPE_VECTOR)
emit_movups_xmm_mem(buf, XMM0_REG, RBP_REG, layout->return_buffer_offset);
else
emit_movsd_xmm_mem(buf, XMM0_REG, RBP_REG, layout->return_buffer_offset);
}
else // INTEGER
emit_mov_reg_mem(buf, RAX_REG, RBP_REG, layout->return_buffer_offset);
}
if (num_classes == 2) { // Second eightbyte
if (classes[1] == SSE)
if (context->return_type->category == INFIX_TYPE_VECTOR && context->return_type->size == 32)
emit_vmovupd_ymm_mem(buf, XMM1_REG, RBP_REG, layout->return_buffer_offset + 32);
else
emit_movsd_xmm_mem(buf, XMM1_REG, RBP_REG, layout->return_buffer_offset + 8);
else // INTEGER
emit_mov_reg_mem(buf, RDX_REG, RBP_REG, layout->return_buffer_offset + 8);
}
}
}
// Standard function epilogue: tear down stack frame and return.
emit_mov_reg_reg(buf, RSP_REG, RBP_REG);
emit_pop_reg(buf, RBP_REG);
emit_ret(buf);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 1 (Direct): Analyzes a signature and creates a call frame layout for System V.
* @details This is the direct-marshalling equivalent of the standard `prepare` function.
* It performs the same argument classification, but populates the new `infix_direct_call_frame_layout`
* struct, which also stores pointers to the argument types and user-provided handlers.
* It also calculates the necessary scratch space on the stack for marshalling.
*/
static infix_status prepare_direct_forward_call_frame_sysv_x64(infix_arena_t * arena,
infix_direct_call_frame_layout ** out_layout,
infix_type * ret_type,
infix_type ** arg_types,
size_t num_args,
infix_direct_arg_handler_t * handlers,
void * target_fn) {
// Use the standard classifier to determine the final ABI locations for all arguments.
infix_call_frame_layout * standard_layout = nullptr;
infix_status status = prepare_forward_call_frame_sysv_x64(
arena, &standard_layout, ret_type, arg_types, num_args, num_args, target_fn);
if (status != INFIX_SUCCESS)
return status;
// Create the new direct layout and copy basic info.
infix_direct_call_frame_layout * layout =
infix_arena_calloc(arena, 1, sizeof(infix_direct_call_frame_layout), _Alignof(infix_direct_call_frame_layout));
if (!layout)
return INFIX_ERROR_ALLOCATION_FAILED;
layout->args =
infix_arena_calloc(arena, num_args, sizeof(infix_direct_arg_layout), _Alignof(infix_direct_arg_layout));
if (!layout->args && num_args > 0)
return INFIX_ERROR_ALLOCATION_FAILED;
layout->num_args = num_args;
layout->target_fn = target_fn;
layout->return_value_in_memory = standard_layout->return_value_in_memory;
// Calculate scratch space needed on the stack.
size_t scratch_space_needed = 0;
for (size_t i = 0; i < num_args; ++i) {
layout->args[i].location = standard_layout->arg_locations[i];
layout->args[i].type = arg_types[i];
layout->args[i].handler = &handlers[i];
if (handlers[i].aggregate_marshaller) {
scratch_space_needed = _infix_align_up(scratch_space_needed, arg_types[i]->alignment);
layout->args[i].location.num_regs = (uint32_t)scratch_space_needed;
scratch_space_needed += arg_types[i]->size;
}
else if (handlers[i].scalar_marshaller) {
scratch_space_needed = _infix_align_up(scratch_space_needed, 16);
layout->args[i].location.num_regs = (uint32_t)scratch_space_needed;
scratch_space_needed += 16;
}
else if (handlers[i].writeback_handler) {
const infix_type * pointee = (arg_types[i]->category == INFIX_TYPE_POINTER)
? arg_types[i]->meta.pointer_info.pointee_type
: arg_types[i];
scratch_space_needed = _infix_align_up(scratch_space_needed, pointee->alignment);
layout->args[i].location.num_regs = (uint32_t)scratch_space_needed;
scratch_space_needed += pointee->size;
}
}
// Calculate total stack allocation and finalize offsets.
size_t total_stack_arg_size = standard_layout->total_stack_alloc;
// Use scratch_space_needed, not the uninitialized temp_space_offset variable.
size_t total_needed = total_stack_arg_size + scratch_space_needed;
layout->total_stack_alloc = (total_needed + 15) & ~15;
// Adjust temp offsets to be relative to RSP after allocation.
// Standard args are at the bottom (lower offsets), scratch space is above them.
size_t temp_base_offset = total_stack_arg_size;
for (size_t i = 0; i < num_args; ++i) {
if (layout->args[i].handler->aggregate_marshaller || layout->args[i].handler->scalar_marshaller ||
layout->args[i].handler->writeback_handler) {
layout->args[i].location.num_regs += (uint32_t)temp_base_offset;
}
}
if (layout->total_stack_alloc > INFIX_MAX_STACK_ALLOC) {
*out_layout = nullptr;
return INFIX_ERROR_LAYOUT_FAILED;
}
*out_layout = layout;
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 2 (Direct): Generates the direct marshalling prologue for System V.
* @details Establishes a stack frame, saves callee-saved registers for context,
* moves the direct CIF arguments (`ret_ptr`, `lang_args`) into them, and allocates all
* stack space required for outgoing arguments and local marshalling buffers.
*/
static infix_status generate_direct_forward_prologue_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout) {
emit_push_reg(buf, RBP_REG);
emit_mov_reg_reg(buf, RBP_REG, RSP_REG);
// Save callee-saved registers we will use for our context.
// We push 4 registers (32 bytes) to maintain 16-byte stack alignment
// (Previous stack state: [RetAddr]+[OldRBP] = 16 bytes. +32 bytes = 48 bytes. Aligned.)
emit_push_reg(buf, R12_REG); // Will hold scratch data / target function
emit_push_reg(buf, R13_REG); // Will hold return value pointer
emit_push_reg(buf, R14_REG); // Will hold language objects array pointer
emit_push_reg(buf, R15_REG); // Padding/Scratch (keeps stack aligned)
// The direct CIF is called with (ret_ptr, lang_args) in RDI, RSI.
emit_mov_reg_reg(buf, R13_REG, RDI_REG); // r13 = ret_ptr
emit_mov_reg_reg(buf, R14_REG, RSI_REG); // r14 = lang_objects array
// Allocate all stack space.
if (layout->total_stack_alloc > 0)
emit_sub_reg_imm32(buf, RSP_REG, layout->total_stack_alloc);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3 (Direct): Generates code to call marshallers and move arguments for System V.
* @details This corrected implementation uses a two-phase approach:
* 1. MARSHALL & SAVE: Call each user handler and save the C value to a temporary
* local stack buffer. This prevents register clobbering.
* 2. PLACE: Load the C value from its temporary location and move it to its final
* destination (the register or stack slot required by the System V ABI).
*/
static infix_status generate_direct_forward_argument_moves_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout) {
// PHASE 1: MARSHALL & SAVE
for (size_t i = 0; i < layout->num_args; ++i) {
const infix_direct_arg_layout * arg_layout = &layout->args[i];
int32_t temp_offset = (int32_t)arg_layout->location.num_regs;
if (!arg_layout->handler->scalar_marshaller && !arg_layout->handler->aggregate_marshaller)
continue;
// Arg 1 (RDI) for marshaller: the language object pointer.
emit_mov_reg_mem(buf, RDI_REG, R14_REG, i * sizeof(void *));
if (arg_layout->handler->scalar_marshaller) {
emit_mov_reg_imm64(buf, R10_REG, (uint64_t)arg_layout->handler->scalar_marshaller);
#if INFIX_SANITY_CHECK_ENABLE
emit_mov_reg_reg(buf, R12_REG, RSP_REG);
#endif
emit_call_reg(buf, R10_REG); // Result is now in RAX or XMM0.
#if INFIX_SANITY_CHECK_ENABLE
emit_cmp_reg_reg(buf, R12_REG, RSP_REG);
emit_je_short(buf, 2);
emit_ud2(buf);
#endif
// Store RAX to stack. PLACE phase will load to XMM if needed.
emit_mov_mem_reg(buf, RSP_REG, temp_offset, RAX_REG);
}
else if (arg_layout->handler->aggregate_marshaller) {
infix/src/arch/x64/abi_sysv_x64.c view on Meta::CPAN
else
emit_mov_reg_mem(buf, GPR_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
break;
case ARG_LOCATION_XMM:
if (is_float(arg_layout->type))
emit_cvtsd2ss_xmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
else if (arg_layout->type->category == INFIX_TYPE_VECTOR && arg_layout->type->size == 32)
emit_vmovupd_ymm_mem(buf, XMM_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
else if (arg_layout->type->category == INFIX_TYPE_VECTOR && arg_layout->type->size == 64)
emit_vmovupd_zmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
else if (arg_layout->type->category == INFIX_TYPE_VECTOR)
emit_movups_xmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
else
emit_movsd_xmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
break;
case ARG_LOCATION_GPR_PAIR:
emit_mov_reg_mem(buf, GPR_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
emit_mov_reg_mem(buf, GPR_ARGS[arg_layout->location.reg_index2], RSP_REG, temp_offset + 8);
break;
case ARG_LOCATION_SSE_SSE_PAIR:
emit_movsd_xmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
emit_movsd_xmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index2], RSP_REG, temp_offset + 8);
break;
case ARG_LOCATION_INTEGER_SSE_PAIR:
emit_mov_reg_mem(buf, GPR_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
emit_movsd_xmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index2], RSP_REG, temp_offset + 8);
break;
case ARG_LOCATION_SSE_INTEGER_PAIR:
emit_movsd_xmm_mem(buf, XMM_ARGS[arg_layout->location.reg_index], RSP_REG, temp_offset);
emit_mov_reg_mem(buf, GPR_ARGS[arg_layout->location.reg_index2], RSP_REG, temp_offset + 8);
break;
case ARG_LOCATION_STACK:
for (size_t offset = 0; offset < arg_layout->type->size; offset += 8) {
emit_mov_reg_mem(buf, RAX_REG, RSP_REG, temp_offset + offset);
emit_mov_mem_reg(buf, RSP_REG, arg_layout->location.stack_offset + offset, RAX_REG);
}
break;
default:
break;
}
}
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 3.5 (Direct): Generates the call instruction for System V.
*/
static infix_status generate_direct_forward_call_instruction_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout) {
emit_mov_reg_imm64(buf, R12_REG, (uint64_t)layout->target_fn);
emit_test_reg_reg(buf, R12_REG, R12_REG);
emit_jnz_short(buf, 2);
emit_ud2(buf);
emit_call_reg(buf, R12_REG);
return INFIX_SUCCESS;
}
/**
* @internal
* @brief Stage 4 (Direct): Generates the function epilogue for System V.
*/
static infix_status generate_direct_forward_epilogue_sysv_x64(code_buffer * buf,
infix_direct_call_frame_layout * layout,
infix_type * ret_type) {
layout->epilogue_offset = (uint32_t)buf->size;
if (ret_type->category != INFIX_TYPE_VOID && !layout->return_value_in_memory) {
// Use full ABI classification for return values
if (is_long_double(ret_type))
emit_fstpt_mem(buf, R13_REG, 0);
else {
arg_class_t classes[2];
size_t num_classes = 0;
bool is_aggregate = ret_type->category == INFIX_TYPE_STRUCT || ret_type->category == INFIX_TYPE_UNION ||
ret_type->category == INFIX_TYPE_ARRAY || ret_type->category == INFIX_TYPE_COMPLEX;
if (is_aggregate)
classify_aggregate_sysv(ret_type, classes, &num_classes);
else if (is_float(ret_type) || is_double(ret_type) || (ret_type->category == INFIX_TYPE_VECTOR)) {
classes[0] = SSE;
num_classes = 1;
}
else {
classes[0] = INTEGER;
num_classes = 1;
if (ret_type->size > 8) {
classes[1] = INTEGER;
num_classes = 2;
}
}
if (num_classes == 1) {
if (classes[0] == SSE) {
if (is_float16(ret_type)) {
emit_movq_gpr_xmm(buf, RAX_REG, XMM0_REG);
emit_mov_mem_reg16(buf, R13_REG, 0, RAX_REG);
}
else if (is_float(ret_type))
emit_movss_mem_xmm(buf, R13_REG, 0, XMM0_REG);
else if (ret_type->category == INFIX_TYPE_VECTOR && ret_type->size == 32)
emit_vmovupd_mem_ymm(buf, R13_REG, 0, XMM0_REG);
else if (ret_type->category == INFIX_TYPE_VECTOR && ret_type->size == 64)
emit_vmovupd_mem_zmm(buf, R13_REG, 0, XMM0_REG);
else if (ret_type->category == INFIX_TYPE_VECTOR)
emit_movups_mem_xmm(buf, R13_REG, 0, XMM0_REG);
else
emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
}
else { // INTEGER
switch (ret_type->size) {
case 1:
emit_mov_mem_reg8(buf, R13_REG, 0, RAX_REG);
break;
case 2:
emit_mov_mem_reg16(buf, R13_REG, 0, RAX_REG);
break;
case 4:
emit_mov_mem_reg32(buf, R13_REG, 0, RAX_REG);
break;
default:
emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
break;
}
}
}
else if (num_classes == 2) {
if (classes[0] == INTEGER && classes[1] == INTEGER) {
emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
emit_mov_mem_reg(buf, R13_REG, 8, RDX_REG);
}
else if (classes[0] == SSE && classes[1] == SSE) {
emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
emit_movsd_mem_xmm(buf, R13_REG, 8, XMM1_REG);
}
else if (classes[0] == INTEGER && classes[1] == SSE) {
emit_mov_mem_reg(buf, R13_REG, 0, RAX_REG);
emit_movsd_mem_xmm(buf, R13_REG, 8, XMM0_REG);
}
else { // SSE, INTEGER
emit_movsd_mem_xmm(buf, R13_REG, 0, XMM0_REG);
emit_mov_mem_reg(buf, R13_REG, 8, RAX_REG);
}
}
}
}
// Call Write-Back Handlers
for (size_t i = 0; i < layout->num_args; ++i) {
const infix_direct_arg_layout * arg = &layout->args[i];
if (arg->handler->writeback_handler) {
// Save return registers before call
emit_push_reg(buf, RAX_REG); // +8
emit_push_reg(buf, RDX_REG); // +8
emit_sub_reg_imm32(buf, RSP_REG, 32); // +32 (space for XMM0/XMM1)
// Total stack shift: +48 bytes
emit_movsd_mem_xmm(buf, RSP_REG, 0, XMM0_REG);
// Set up args for write-back call
emit_mov_reg_mem(buf, RDI_REG, R14_REG, i * sizeof(void *));
// Arg 2 (RSI): Pointer to the C data (in our scratch space)
// Offsets are relative to the *original* RSP of the body.
// Since we just pushed/subbed 48 bytes, we must add 48 to reach the original frame.
int32_t temp_offset = (int32_t)arg->location.num_regs;
emit_lea_reg_mem(buf, RSI_REG, RSP_REG, temp_offset + 48);
emit_mov_reg_imm64(buf, RDX_REG, (uint64_t)arg->type);
emit_mov_reg_imm64(buf, R10_REG, (uint64_t)arg->handler->writeback_handler);
emit_call_reg(buf, R10_REG);
// Restore return registers
emit_movsd_xmm_mem(buf, XMM0_REG, RSP_REG, 0);
emit_add_reg_imm32(buf, RSP_REG, 32);
emit_pop_reg(buf, RDX_REG);
emit_pop_reg(buf, RAX_REG);
}
}
// Standard Epilogue
if (layout->total_stack_alloc > 0)
emit_add_reg_imm32(buf, RSP_REG, (int32_t)layout->total_stack_alloc);
emit_pop_reg(buf, R15_REG);
emit_pop_reg(buf, R14_REG);
emit_pop_reg(buf, R13_REG);
emit_pop_reg(buf, R12_REG);
emit_pop_reg(buf, RBP_REG);
emit_ret(buf);
return INFIX_SUCCESS;
}
( run in 1.154 second using v1.01-cache-2.11-cpan-5735350b133 )