Affix

 view release on metacpan or  search on metacpan

infix/src/jit/executor.c  view on Meta::CPAN

} UNWIND_CODE;

typedef struct _UNWIND_INFO {
    uint8_t Version : 3;
    uint8_t Flags : 5;
    uint8_t SizeOfPrologue;
    uint8_t CountOfCodes;
    uint8_t FrameRegister : 4;
    uint8_t FrameOffset : 4;
    UNWIND_CODE UnwindCode[1];  // Variable length array
} UNWIND_INFO;

// We reserve 512 bytes at the end of every JIT block for SEH metadata.
#define INFIX_SEH_METADATA_SIZE 256
#elif defined(INFIX_OS_WINDOWS) && defined(INFIX_ARCH_AARCH64)
#pragma pack(push, 1)
typedef struct _UNWIND_INFO_ARM64 {
    uint32_t FunctionLength : 18;
    uint32_t Version : 2;
    uint32_t X : 1;
    uint32_t E : 1;
    uint32_t EpilogueCount : 5;
    uint32_t CodeWords : 5;
} UNWIND_INFO_ARM64;
#pragma pack(pop)
#define INFIX_SEH_METADATA_SIZE 256
#else
#define INFIX_SEH_METADATA_SIZE 0
#endif

// macOS JIT Security Hardening Logic
#if defined(INFIX_OS_MACOS)
/**
 * @internal
 * @brief macOS-specific function pointers and types for checking JIT entitlements.
 *
 * @details To support hardened runtimes on Apple platforms (especially Apple Silicon),
 * `infix` must use special APIs like `MAP_JIT` and `pthread_jit_write_protect_np`.
 * However, these are only effective if the host application has been granted the
 * `com.apple.security.cs.allow-jit` entitlement.
 *
 * This logic performs a runtime check for these APIs and the entitlement, gracefully
 * falling back to the legacy (but less secure) `mprotect` method if they are not
 * available. This provides maximum security for production apps while maintaining
 * maximum convenience for developers who may not have codesigned their test executables.
 */
typedef const struct __CFString * CFStringRef;
typedef const void * CFTypeRef;
typedef struct __SecTask * SecTaskRef;
typedef struct __CFError * CFErrorRef;
#define kCFStringEncodingUTF8 0x08000100
// A struct to hold dynamically loaded function pointers from macOS frameworks.
static struct {
    void (*CFRelease)(CFTypeRef);
    bool (*CFBooleanGetValue)(CFTypeRef boolean);
    CFStringRef (*CFStringCreateWithCString)(CFTypeRef allocator, const char * cStr, uint32_t encoding);
    CFTypeRef kCFAllocatorDefault;
    SecTaskRef (*SecTaskCreateFromSelf)(CFTypeRef allocator);
    CFTypeRef (*SecTaskCopyValueForEntitlement)(SecTaskRef task, CFStringRef entitlement, CFErrorRef * error);
    void (*pthread_jit_write_protect_np)(int enabled);
    void (*sys_icache_invalidate)(void * start, size_t len);
} g_macos_apis;
/**
 * @internal
 * @brief One-time initialization to dynamically load macOS framework functions.
 * @details Uses `dlopen` and `dlsym` to find the necessary CoreFoundation and Security
 * framework functions at runtime. This avoids a hard link-time dependency,
 * making the library more portable and resilient if these frameworks change.
 */
static void initialize_macos_apis(void) {
    // We don't need to link against these frameworks, which makes building simpler.
    void * cf = dlopen("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", RTLD_LAZY);
    void * sec = dlopen("/System/Library/Frameworks/Security.framework/Security", RTLD_LAZY);

    // Hardened Runtime helpers found in libSystem/libpthread
    g_macos_apis.pthread_jit_write_protect_np = dlsym(RTLD_DEFAULT, "pthread_jit_write_protect_np");
    g_macos_apis.sys_icache_invalidate = dlsym(RTLD_DEFAULT, "sys_icache_invalidate");

    if (!cf || !sec) {
        INFIX_DEBUG_PRINTF("Warning: Could not dlopen macOS frameworks. JIT security features will be degraded.");
        if (cf)
            dlclose(cf);
        if (sec)
            dlclose(sec);
        return;
    }
    g_macos_apis.CFRelease = dlsym(cf, "CFRelease");
    g_macos_apis.CFBooleanGetValue = dlsym(cf, "CFBooleanGetValue");
    g_macos_apis.CFStringCreateWithCString = dlsym(cf, "CFStringCreateWithCString");
    void ** pAlloc = (void **)dlsym(cf, "kCFAllocatorDefault");
    if (pAlloc)
        g_macos_apis.kCFAllocatorDefault = *pAlloc;
    g_macos_apis.SecTaskCreateFromSelf = dlsym(sec, "SecTaskCreateFromSelf");
    g_macos_apis.SecTaskCopyValueForEntitlement = dlsym(sec, "SecTaskCopyValueForEntitlement");
    dlclose(cf);
    dlclose(sec);
}
/**
 * @internal
 * @brief Checks if the current process has the `com.apple.security.cs.allow-jit` entitlement.
 * @return `true` if the entitlement is present and set to true, `false` otherwise.
 */
static bool has_jit_entitlement(void) {
    // Use pthread_once to ensure the dynamic loading happens exactly once, thread-safely.
    static pthread_once_t init_once = PTHREAD_ONCE_INIT;
    pthread_once(&init_once, initialize_macos_apis);

    // Secure JIT path on macOS requires both the entitlement check and the toggle API.
    if (!g_macos_apis.pthread_jit_write_protect_np)
        return false;

    if (!g_macos_apis.SecTaskCopyValueForEntitlement || !g_macos_apis.CFStringCreateWithCString)
        return false;
    bool result = false;
    SecTaskRef task = g_macos_apis.SecTaskCreateFromSelf(g_macos_apis.kCFAllocatorDefault);
    if (!task)
        return false;
    CFStringRef key = g_macos_apis.CFStringCreateWithCString(
        g_macos_apis.kCFAllocatorDefault, "com.apple.security.cs.allow-jit", kCFStringEncodingUTF8);
    CFTypeRef value = nullptr;
    if (key) {

infix/src/jit/executor.c  view on Meta::CPAN

            munmap(exec.rw_ptr, size);
        if (exec.rx_ptr != MAP_FAILED)
            munmap(exec.rx_ptr, size);
        close(exec.shm_fd);
        _infix_set_system_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_EXECUTABLE_MEMORY_FAILURE, err, "mmap failed");
        return (infix_executable_t){.rx_ptr = nullptr, .rw_ptr = nullptr, .size = 0, .shm_fd = -1};
    }

    // The mmap mappings hold a reference to the shared memory object, so we don't
    // need the FD anymore. Keeping it open consumes a file descriptor per trampoline,
    // causing "shm_open failed" after ~1024 trampolines.
    close(exec.shm_fd);
    exec.shm_fd = -1;
#endif
    exec.size = size;
    INFIX_DEBUG_PRINTF("Allocated JIT memory. RW at %p, RX at %p", exec.rw_ptr, exec.rx_ptr);
    return exec;
}

#if defined(INFIX_OS_WINDOWS)
/**
 * @internal
 * @brief The personality routine for safe trampolines on Windows.
 *
 * @details This function is called by the Windows unwinder when an exception
 * occurs within a safe trampoline or its callees. It catches the exception,
 * sets the `INFIX_CODE_NATIVE_EXCEPTION` error, and redirects execution to
 * the trampoline's epilogue by modifying the instruction pointer in the
 * current context record and continuing execution.
 */
static EXCEPTION_DISPOSITION _infix_seh_personality_routine(PEXCEPTION_RECORD ExceptionRecord,
                                                            void * EstablisherFrame,
                                                            c23_maybe_unused PCONTEXT ContextRecord,
                                                            void * DispatcherContext) {
    PDISPATCHER_CONTEXT dc = (PDISPATCHER_CONTEXT)DispatcherContext;

    // If we are already unwinding, don't do anything.
    if (ExceptionRecord->ExceptionFlags & (EXCEPTION_UNWINDING | EXCEPTION_EXIT_UNWIND))
        return ExceptionContinueSearch;

    // Set the thread-local error.
    _infix_set_error(INFIX_CATEGORY_ABI, INFIX_CODE_NATIVE_EXCEPTION, 0);

    // Retrieve the target epilogue IP from our HandlerData.
    // The HandlerData points to the 4-byte epilogue offset we stored in UNWIND_INFO.
    uint32_t epilogue_offset = *(uint32_t *)dc->HandlerData;
    void * target_ip = (void *)(dc->ImageBase + epilogue_offset);

    // Perform a non-local unwind to the epilogue.
    RtlUnwind(EstablisherFrame, target_ip, ExceptionRecord, nullptr);

    return ExceptionContinueSearch;  // Unreachable
}

#if defined(INFIX_ARCH_X64)
// Internal: Populates and registers SEH metadata for a Windows x64 JIT block.
static void _infix_register_seh_windows_x64(infix_executable_t * exec,
                                            infix_executable_category_t category,
                                            uint32_t prologue_size,
                                            uint32_t epilogue_offset) {
    // metadata_ptr starts after the machine code.
    uint8_t * metadata_base = (uint8_t *)exec->rw_ptr + exec->size;

    // RUNTIME_FUNCTION (PDATA) - Must be 4-byte aligned.
    RUNTIME_FUNCTION * rf = (RUNTIME_FUNCTION *)_infix_align_up((size_t)metadata_base, 4);

    // UNWIND_INFO (XDATA) - Follows PDATA.
    UNWIND_INFO * ui = (UNWIND_INFO *)_infix_align_up((size_t)(rf + 1), 2);

    ui->Version = 1;
    ui->Flags = 0;
    if (category == INFIX_EXECUTABLE_SAFE_FORWARD)
        ui->Flags |= UNW_FLAG_EHANDLER;
    ui->FrameRegister = 5;  // RBP
    ui->FrameOffset = 0;
    ui->SizeOfPrologue = (uint8_t)prologue_size;

    if (category == INFIX_EXECUTABLE_REVERSE) {
        // Reverse Trampoline: push rbp, push rsi, push rdi, mov rbp, rsp, and rsp -mask, [sub rsp, alloc]
        ui->CountOfCodes = 4;
        ui->UnwindCode[0].CodeOffset = 6;  // After mov rbp, rsp
        ui->UnwindCode[0].UnwindOp = UWOP_SET_FPREG;
        ui->UnwindCode[0].OpInfo = 0;

        ui->UnwindCode[1].CodeOffset = 3;  // After push rdi
        ui->UnwindCode[1].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[1].OpInfo = 7;  // RDI

        ui->UnwindCode[2].CodeOffset = 2;  // After push rsi
        ui->UnwindCode[2].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[2].OpInfo = 6;  // RSI

        ui->UnwindCode[3].CodeOffset = 1;  // After push rbp
        ui->UnwindCode[3].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[3].OpInfo = 5;  // RBP
    }
    else {
        // Forward or Direct Trampoline: push rbp, push r12-r15, mov rbp, rsp, and rsp -16, [sub rsp, alloc]
        ui->CountOfCodes = 6;
        // Opcodes in reverse order:
        ui->UnwindCode[0].CodeOffset = 12;  // After mov rbp, rsp
        ui->UnwindCode[0].UnwindOp = UWOP_SET_FPREG;
        ui->UnwindCode[0].OpInfo = 0;

        ui->UnwindCode[1].CodeOffset = 9;  // After push r15
        ui->UnwindCode[1].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[1].OpInfo = 15;  // R15

        ui->UnwindCode[2].CodeOffset = 7;  // After push r14
        ui->UnwindCode[2].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[2].OpInfo = 14;  // R14

        ui->UnwindCode[3].CodeOffset = 5;  // After push r13
        ui->UnwindCode[3].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[3].OpInfo = 13;  // R13

        ui->UnwindCode[4].CodeOffset = 3;  // After push r12
        ui->UnwindCode[4].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[4].OpInfo = 12;  // R12

        ui->UnwindCode[5].CodeOffset = 1;  // After push rbp

infix/src/jit/executor.c  view on Meta::CPAN

        // are located at the end of the XDATA, which is 4-byte aligned.
        handler_info_ptr[0] = rva_offset + (uint32_t)(stub - (uint8_t *)exec->rx_ptr);
        handler_info_ptr[1] = epilogue_offset;
    }

    if (RtlAddFunctionTable(rf, 2, base_address)) {
        exec->seh_registration = rf;
        INFIX_DEBUG_PRINTF(
            "Registered SEH PDATA at %p (XDATA at %p, Stub at %p) for JIT code at %p", rf, ui, stub, exec->rx_ptr);
    }
    else {
        fprintf(stderr, "infix: RtlAddFunctionTable failed! GetLastError=%lu\n", GetLastError());
    }
}
#endif
#endif

#if defined(INFIX_OS_LINUX) && defined(INFIX_ARCH_X64)
/**
 * @internal
 * @brief Registers DWARF unwind information for a JIT-compiled block on Linux x64.
 * @details This allows the C++ exception unwinder to correctly walk through
 *          JIT-compiled frames. We manually construct a Common Information Entry (CIE)
 *          and a Frame Description Entry (FDE) that match the stack behavior
 *          of our trampolines (standard RBP-based frame).
 */
static void _infix_register_eh_frame_linux_x64(infix_executable_t * exec, infix_executable_category_t category) {
    // Simplified .eh_frame layout: [ CIE | FDE | Terminator ]
    const size_t cie_size = 32;
    const size_t fde_size = 64;
    const size_t total_size = cie_size + fde_size + 4;  // +4 for null terminator

    uint8_t * eh = infix_malloc(total_size);
    if (!eh)
        return;
    infix_memset(eh, 0, total_size);

    uint8_t * p = eh;

    // CIE
    *(uint32_t *)p = (uint32_t)(cie_size - 4);
    p += 4;
    *(uint32_t *)p = 0;
    p += 4;
    *p++ = 1;     // version
    *p++ = '\0';  // augmentation
    *p++ = 1;     // code align
    *p++ = 0x78;  // data align (-8)
    *p++ = 16;    // ret reg (rip)

    // Initial state: CFA = rsp + 8, rip at CFA - 8
    *p++ = 0x0c;
    *p++ = 0x07;
    *p++ = 0x08;
    *p++ = 0x90;
    *p++ = 0x01;
    while ((size_t)(p - eh) < cie_size)
        *p++ = 0;

    // FDE
    uint8_t * fde_start = eh + cie_size;
    p = fde_start;
    *(uint32_t *)p = (uint32_t)(fde_size - 4);
    p += 4;
    *(uint32_t *)p = (uint32_t)(p - eh);
    p += 4;  // back-offset

    *(void **)p = exec->rx_ptr;
    p += 8;
    *(uint64_t *)p = (uint64_t)exec->size;
    p += 8;
    *p++ = 0;  // aug data len

    // Instructions:
    if (category == INFIX_EXECUTABLE_REVERSE) {
        // push rbp; mov rbp, rsp; push rsi; push rdi
        *p++ = 0x41;  // loc +1 (after push rbp)
        *p++ = 0x0e;
        *p++ = 16;  // def_cfa_offset 16
        *p++ = 0x86;
        *p++ = 0x02;  // offset rbp (6), 2
        *p++ = 0x43;  // loc +3 (after mov rbp, rsp)
        *p++ = 0x0d;
        *p++ = 0x06;  // def_cfa_register rbp (6)
        *p++ = 0x41;  // loc +1 (after push rsi)
        *p++ = 0x84;
        *p++ = 0x03;  // offset rsi (4), 3
        *p++ = 0x41;  // loc +1 (after push rdi)
        *p++ = 0x85;
        *p++ = 0x04;  // offset rdi (5), 4
    }
    else {
        // push rbp; mov rbp, rsp; push r12; push r13; push r14; push r15
        *p++ = 0x41;  // loc +1 (after push rbp)
        *p++ = 0x0e;
        *p++ = 16;  // def_cfa_offset 16
        *p++ = 0x86;
        *p++ = 0x02;  // offset rbp (6), 2
        *p++ = 0x43;  // loc +3 (after mov rbp, rsp)
        *p++ = 0x0d;
        *p++ = 0x06;  // def_cfa_register rbp (6)
        *p++ = 0x42;  // loc +2 (after push r12)
        *p++ = 0x8c;
        *p++ = 0x03;  // offset r12, 3
        *p++ = 0x42;  // loc +2 (after push r13)
        *p++ = 0x8d;
        *p++ = 0x04;  // offset r13, 4
        *p++ = 0x42;  // loc +2 (after push r14)
        *p++ = 0x8e;
        *p++ = 0x05;  // offset r14, 5
        *p++ = 0x42;  // loc +2 (after push r15)
        *p++ = 0x8f;
        *p++ = 0x06;  // offset r15, 6
    }

    while ((size_t)(p - eh) < (cie_size + fde_size))
        *p++ = 0;
    *(uint32_t *)p = 0;  // Terminator

    extern void __register_frame(void *);
    pthread_mutex_lock(&g_dwarf_mutex);
    __register_frame(eh);
    pthread_mutex_unlock(&g_dwarf_mutex);

    exec->eh_frame_ptr = eh;
    INFIX_DEBUG_PRINTF("Registered DWARF .eh_frame at %p for JIT code at %p", (void *)eh, exec->rx_ptr);
}
#elif defined(INFIX_OS_LINUX) && defined(INFIX_ARCH_AARCH64)
/**
 * @internal
 * @brief Registers DWARF unwind information for a JIT-compiled block on ARM64 Linux.
 * @details This allows the C++ exception unwinder to correctly walk through
 *          JIT-compiled frames. We manually construct a Common Information Entry (CIE)
 *          and a Frame Description Entry (FDE) that match the stack behavior
 *          of our ARM64 trampolines.
 */
static void _infix_register_eh_frame_arm64(infix_executable_t * exec, infix_executable_category_t category) {
    // Simplified .eh_frame layout: [ CIE | FDE | Terminator ]
    const size_t cie_size = 32;
    const size_t fde_size = 64;
    const size_t total_size = cie_size + fde_size + 4;  // +4 for null terminator

    uint8_t * eh = infix_malloc(total_size);
    if (!eh)
        return;
    infix_memset(eh, 0, total_size);

    uint8_t * p = eh;

    // CIE (Common Information Entry)
    *(uint32_t *)p = (uint32_t)(cie_size - 4);
    p += 4;  // length
    *(uint32_t *)p = 0;
    p += 4;       // cie_id (0)
    *p++ = 1;     // version
    *p++ = '\0';  // augmentation string ("")
    *p++ = 4;     // code_alignment_factor (AArch64 instructions are 4 bytes)
    *p++ = 0x78;  // data_alignment_factor (-8 in SLEB128)
    *p++ = 30;    // return_address_register (30 = lr on arm64)

    // CIE Instructions: Initial state
    // DW_CFA_def_cfa sp, 0
    *p++ = 0x0c;
    *p++ = 31;
    *p++ = 0;
    while ((size_t)(p - eh) < cie_size)
        *p++ = 0;

    // FDE (Frame Description Entry)
    uint8_t * fde_start = eh + cie_size;
    p = fde_start;
    *(uint32_t *)p = (uint32_t)(fde_size - 4);
    p += 4;  // length
    *(uint32_t *)p = (uint32_t)(p - eh);
    p += 4;  // cie_pointer (back-offset)

    *(void **)p = exec->rx_ptr;
    p += 8;  // pc_begin (absolute)
    *(uint64_t *)p = (uint64_t)exec->size;
    p += 8;    // pc_range (absolute)
    *p++ = 0;  // aug data len

    // Instructions: match our trampoline prologue
    if (category == INFIX_EXECUTABLE_REVERSE) {
        // stp x29, x30, [sp, #-16]!; mov x29, sp
        *p++ = 0x41;  // loc +1 (4 bytes, after stp)
        *p++ = 0x0e;
        *p++ = 16;  // def_cfa_offset 16
        *p++ = 0x9d;
        *p++ = 2;  // offset r29 (x29), 2 (CFA - 16)
        *p++ = 0x9e;
        *p++ = 1;     // offset r30 (x30/lr), 1 (CFA - 8)
        *p++ = 0x41;  // loc +1 (4 bytes, after mov)
        *p++ = 0x0d;
        *p++ = 29;  // def_cfa_register r29
    }
    else {
        // stp x29, x30, [sp, #-16]!; stp x19, x20, ...; stp x21, x22, ...; mov x29, sp
        *p++ = 0x41;  // after stp x29, x30
        *p++ = 0x0e;
        *p++ = 16;
        *p++ = 0x9d;
        *p++ = 2;  // x29 at CFA - 16
        *p++ = 0x9e;
        *p++ = 1;     // x30 at CFA - 8
        *p++ = 0x41;  // after stp x19, x20
        *p++ = 0x0e;
        *p++ = 32;
        *p++ = 0x93;
        *p++ = 4;  // x19 at CFA - 32
        *p++ = 0x94;
        *p++ = 3;     // x20 at CFA - 24
        *p++ = 0x41;  // after stp x21, x22
        *p++ = 0x0e;
        *p++ = 48;
        *p++ = 0x95;
        *p++ = 6;  // x21 at CFA - 48
        *p++ = 0x96;
        *p++ = 5;     // x22 at CFA - 40
        *p++ = 0x41;  // after mov x29, sp
        *p++ = 0x0d;
        *p++ = 29;  // def_cfa_register x29 (offset remains 48)
    }

    while ((size_t)(p - eh) < (cie_size + fde_size))
        *p++ = 0;
    *(uint32_t *)p = 0;  // Terminator

    // Register the frame with the runtime.
    extern void __register_frame(void *);
    pthread_mutex_lock(&g_dwarf_mutex);

infix/src/jit/executor.c  view on Meta::CPAN

#else
    // Dual-mapping POSIX: protect and unmap both views.
    if (exec.eh_frame_ptr) {
        extern void __deregister_frame(void *);
        pthread_mutex_lock(&g_dwarf_mutex);
        __deregister_frame(exec.eh_frame_ptr);
        pthread_mutex_unlock(&g_dwarf_mutex);
        infix_free(exec.eh_frame_ptr);
    }
    if (exec.rx_ptr)
        mprotect(exec.rx_ptr, exec.size, PROT_NONE);
    if (exec.rw_ptr)
        munmap(exec.rw_ptr, exec.size);
    if (exec.rx_ptr && exec.rx_ptr != exec.rw_ptr)  // rw_ptr might be same as rx_ptr on some platforms
        munmap(exec.rx_ptr, exec.size);
    if (exec.shm_fd >= 0)
        close(exec.shm_fd);
#endif
}
/**
 * @internal
 * @brief Makes a block of JIT memory executable and flushes instruction caches.
 *
 * @details This function completes the W^X process.
 * - On single-mapping platforms, it changes the memory protection from RW to RX.
 * - On dual-mapping platforms, this is a no-op as the RX mapping already exists.
 *
 * Crucially, it also handles flushing the CPU's instruction cache on architectures
 * that require it (like AArch64). This is necessary because the CPU may have cached
 * old (zeroed) data from the memory location, and it must be explicitly told to
 * re-read the newly written machine code instructions.
 *
 * @param exec The executable memory block.
 * @param category The category of the trampoline.
 * @param prologue_size The size of the prologue.
 * @return `true` on success, `false` on failure.
 */
c23_nodiscard bool infix_executable_make_executable(infix_executable_t * exec,
                                                    c23_maybe_unused infix_executable_category_t category,
                                                    c23_maybe_unused uint32_t prologue_size,
                                                    c23_maybe_unused uint32_t epilogue_offset) {
    if (exec->rw_ptr == nullptr || exec->size == 0)
        return false;

    // On AArch64 (and other RISC architectures), the instruction and data caches can be
    // separate. We must explicitly flush the D-cache (where the JIT wrote the code)
    // and invalidate the I-cache so the CPU fetches the new instructions.
    // We might as well do it on x64 too.
#if defined(INFIX_COMPILER_MSVC)
    // Use the Windows-specific API.
    FlushInstructionCache(GetCurrentProcess(), exec->rw_ptr, exec->size);
#elif defined(INFIX_OS_MACOS)
    // Use the Apple-specific API if available (required for Apple Silicon correctness)
    if (g_macos_apis.sys_icache_invalidate)
        g_macos_apis.sys_icache_invalidate(exec->rw_ptr, exec->size);
    else
        __builtin___clear_cache((char *)exec->rw_ptr, (char *)exec->rw_ptr + exec->size);
#elif defined(INFIX_ARCH_AARCH64)
    // Robust manual cache clearing for AArch64 Linux/BSD.
    // We clean the D-cache to point of unification and invalidate the I-cache.
    uintptr_t start = (uintptr_t)exec->rw_ptr;
    uintptr_t end = start + exec->size;
    uintptr_t ctr_el0;
    __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));

    // D-cache line size is in bits [19:16] as log2 of number of words.
    uintptr_t d_line_size = 4 << ((ctr_el0 >> 16) & 0xf);
    for (uintptr_t addr = start & ~(d_line_size - 1); addr < end; addr += d_line_size)
        __asm__ __volatile__("dc cvau, %0" ::"r"(addr) : "memory");
    __asm__ __volatile__("dsb ish" ::: "memory");

    // I-cache line size is in bits [3:0] as log2 of number of words.
    uintptr_t i_line_size = 4 << (ctr_el0 & 0xf);
    for (uintptr_t addr = start & ~(i_line_size - 1); addr < end; addr += i_line_size)
        __asm__ __volatile__("ic ivau, %0" ::"r"(addr) : "memory");
    __asm__ __volatile__("dsb ish\n\tisb" ::: "memory");
#else
    // Use the GCC/Clang built-in for other platforms.
    __builtin___clear_cache((char *)exec->rw_ptr, (char *)exec->rw_ptr + exec->size);
#endif

    bool result = false;
#if defined(INFIX_OS_WINDOWS)
    // On Windows, we register SEH unwind info before making the memory executable.
#if defined(INFIX_ARCH_X64)
    _infix_register_seh_windows_x64(exec, category, prologue_size, epilogue_offset);
#elif defined(INFIX_ARCH_AARCH64)
    _infix_register_seh_windows_arm64(exec, category, prologue_size, epilogue_offset);
#endif
    // Finalize permissions to Read+Execute.
    // We include the SEH metadata in the protected region.
    result = VirtualProtect(exec->rw_ptr, exec->size + INFIX_SEH_METADATA_SIZE, PAGE_EXECUTE_READ, &(DWORD){0});
    if (!result)
        _infix_set_system_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_PROTECTION_FAILURE, GetLastError(), nullptr);
#elif defined(INFIX_OS_MACOS)
    static bool g_use_secure_jit_path = false;
    static bool g_checked_jit_support = false;
    if (!g_checked_jit_support) {
        g_use_secure_jit_path = has_jit_entitlement();
        g_checked_jit_support = true;
    }

    if (g_use_secure_jit_path && g_macos_apis.pthread_jit_write_protect_np) {
        // Switch thread state to Execute allowed (enabled=1)
        g_macos_apis.pthread_jit_write_protect_np(1);
        result = true;
    }
    else {
        result = (mprotect(exec->rw_ptr, exec->size, PROT_READ | PROT_EXEC) == 0);
    }
    if (!result)
        _infix_set_system_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_PROTECTION_FAILURE, errno, nullptr);
#elif defined(INFIX_OS_ANDROID) || defined(INFIX_OS_OPENBSD) || defined(INFIX_OS_DRAGONFLY)
    // Other single-mapping POSIX platforms use mprotect.
    result = (mprotect(exec->rw_ptr, exec->size, PROT_READ | PROT_EXEC) == 0);
    if (!result)
        _infix_set_system_error(INFIX_CATEGORY_ALLOCATION, INFIX_CODE_PROTECTION_FAILURE, errno, nullptr);
#else
    // Dual-mapping POSIX (Linux, FreeBSD).
    // The RX mapping is already executable.
#if defined(INFIX_OS_LINUX) && defined(INFIX_ARCH_X64)
    _infix_register_eh_frame_linux_x64(exec, category);
#elif defined(INFIX_OS_LINUX) && defined(INFIX_ARCH_AARCH64)
    _infix_register_eh_frame_arm64(exec, category);
#endif
    // SECURITY CRITICAL: We MUST unmap the RW view now. If we leave it mapped,
    // an attacker with a heap disclosure could find it and overwrite the JIT code,
    // bypassing W^X.
    if (munmap(exec->rw_ptr, exec->size) == 0) {
        exec->rw_ptr = nullptr;  // Clear the pointer to prevent double-free or misuse.
        result = true;
    }
    else {
        _infix_set_system_error(



( run in 0.909 second using v1.01-cache-2.11-cpan-f56aa216473 )