Affix

 view release on metacpan or  search on metacpan

infix/src/jit/executor.c  view on Meta::CPAN

        ui->UnwindCode[0].CodeOffset = 12;  // After mov rbp, rsp
        ui->UnwindCode[0].UnwindOp = UWOP_SET_FPREG;
        ui->UnwindCode[0].OpInfo = 0;

        ui->UnwindCode[1].CodeOffset = 9;  // After push r15
        ui->UnwindCode[1].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[1].OpInfo = 15;  // R15

        ui->UnwindCode[2].CodeOffset = 7;  // After push r14
        ui->UnwindCode[2].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[2].OpInfo = 14;  // R14

        ui->UnwindCode[3].CodeOffset = 5;  // After push r13
        ui->UnwindCode[3].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[3].OpInfo = 13;  // R13

        ui->UnwindCode[4].CodeOffset = 3;  // After push r12
        ui->UnwindCode[4].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[4].OpInfo = 12;  // R12

        ui->UnwindCode[5].CodeOffset = 1;  // After push rbp
        ui->UnwindCode[5].UnwindOp = UWOP_PUSH_NONVOL;
        ui->UnwindCode[5].OpInfo = 5;  // RBP
    }

    // 3. Personality Routine Stub - Follows UNWIND_INFO.
    // The ExceptionHandler field is at offset: 4 + ((CountOfCodes + 1) & ~1) * 2
    uint32_t * eh_field_ptr = (uint32_t *)&ui->UnwindCode[(ui->CountOfCodes + 1) & ~1];

    // Position the stub AFTER the ExceptionHandler RVA and HandlerData (8 bytes total).
    uint8_t * stub = (uint8_t *)_infix_align_up((size_t)(eh_field_ptr + 2), 16);

    stub[0] = 0x48;
    stub[1] = 0xB8;  // mov rax, imm64
    *(uint64_t *)(stub + 2) = (uint64_t)_infix_seh_personality_routine;
    stub[10] = 0xFF;
    stub[11] = 0xE0;  // jmp rax

    // BaseAddress should be 64KB aligned for maximum compatibility.
    DWORD64 base_address = (DWORD64)exec->rx_ptr & ~0xFFFF;
    DWORD rva_offset = (DWORD)((uint8_t *)exec->rx_ptr - (uint8_t *)base_address);

    rf->BeginAddress = rva_offset;  // Relative to BaseAddress
    // EndAddress covers the entire code block.
    rf->EndAddress = rva_offset + (DWORD)exec->size;
    rf->UnwindData = rva_offset + (DWORD)((uint8_t *)ui - (uint8_t *)exec->rx_ptr);

    if (ui->Flags & UNW_FLAG_EHANDLER) {
        // ExceptionHandler RVA points to our absolute jump stub.
        eh_field_ptr[0] = rva_offset + (uint32_t)(stub - (uint8_t *)exec->rx_ptr);
        // HandlerData field stores our target epilogue offset.
        eh_field_ptr[1] = epilogue_offset;
    }

    if (RtlAddFunctionTable(rf, 1, base_address)) {
        exec->seh_registration = rf;
        INFIX_DEBUG_PRINTF(
            "Registered SEH PDATA at %p (XDATA at %p, Stub at %p) for JIT code at %p", rf, ui, stub, exec->rx_ptr);
    }
    else {
        fprintf(stderr, "infix: RtlAddFunctionTable failed! GetLastError=%lu\n", GetLastError());
    }
}
#elif defined(INFIX_ARCH_AARCH64)
// Internal: Populates and registers SEH metadata for a Windows ARM64 JIT block.
static void _infix_register_seh_windows_arm64(infix_executable_t * exec,
                                              infix_executable_category_t category,
                                              uint32_t prologue_size,
                                              uint32_t epilogue_offset) {
    uint8_t * metadata_base = (uint8_t *)exec->rw_ptr + exec->size;

    // RUNTIME_FUNCTION (PDATA) - Must be 4-byte aligned.
    // On ARM64, we use two entries: one for the function and a sentinel for the end.
    RUNTIME_FUNCTION * rf = (RUNTIME_FUNCTION *)_infix_align_up((size_t)metadata_base, 4);

    // UNWIND_INFO (XDATA) - Follows PDATA.
    UNWIND_INFO_ARM64 * ui = (UNWIND_INFO_ARM64 *)_infix_align_up((size_t)(rf + 2), 4);
    infix_memset(ui, 0, sizeof(UNWIND_INFO_ARM64));

    ui->FunctionLength = (uint32_t)(exec->size / 4);
    ui->Version = 0;
    ui->X = (category == INFIX_EXECUTABLE_SAFE_FORWARD);
    ui->E = 0;
    ui->EpilogueCount = 1;

    uint8_t * unwind_codes = (uint8_t *)(ui + 1);
    uint32_t code_idx = 0;

    if (category == INFIX_EXECUTABLE_REVERSE) {
        // Reverse Prologue: stp x29, x30, [sp, #-16]!; mov x29, sp; sub sp, sp, #alloc
        // Opcodes in REVERSE order:
        unwind_codes[code_idx++] = 0xE1;  // mov x29, sp
        unwind_codes[code_idx++] = 0xC8;  // stp x29, x30, [sp, #-16]!
        unwind_codes[code_idx++] = 0xE4;  // end
    }
    else {
        // Forward or Direct Prologue: stp x29, x30, [sp, #-16]!; stp x19, x20, ...; stp x21, x22, ...; mov x29, sp; sub
        // sp, sp, #alloc
        unwind_codes[code_idx++] = 0xE1;  // mov x29, sp
        unwind_codes[code_idx++] = 0xD4;  // stp x21, x22, [sp, #-16]!
        unwind_codes[code_idx++] = 0xD2;  // stp x19, x20, [sp, #-16]!
        unwind_codes[code_idx++] = 0xC8;  // stp x29, x30, [sp, #-16]!
        unwind_codes[code_idx++] = 0xE4;  // end
    }

    ui->CodeWords = (code_idx + 3) / 4;

    // On ARM64, if X=1, the Exception Handler RVA and Handler Data follow the epilogue scopes
    // and unwind codes.
    // XDATA layout: [Header] [Epilogue Scopes] [Unwind Codes] [Padding] [Handler RVA] [Handler Data]

    uint32_t * epilogue_scopes = (uint32_t *)(ui + 1);
    // Each epilogue scope is 4 bytes. We have ui->EpilogueCount of them.
    epilogue_scopes[0] = (epilogue_offset / 4);  // Epilogue Start Index (instructions)

    uint8_t * unwind_codes_ptr = (uint8_t *)(epilogue_scopes + ui->EpilogueCount);
    // Clear and then copy the codes
    infix_memset(unwind_codes_ptr, 0, ui->CodeWords * 4);
    infix_memcpy(unwind_codes_ptr, unwind_codes, code_idx);

    // Handler info must follow unwind codes (which are already padded to 4 bytes by ui->CodeWords).
    uint32_t * handler_info_ptr = (uint32_t *)(unwind_codes_ptr + ui->CodeWords * 4);

    uint8_t * stub = (uint8_t *)_infix_align_up((size_t)(handler_info_ptr + 2), 16);

    // stub:
    // ldr x9, personality_addr
    // br x9
    // personality_addr: .quad _infix_seh_personality_routine
    *(uint32_t *)stub = 0x58000049;        // ldr x9, #8
    *(uint32_t *)(stub + 4) = 0xD61F0120;  // br x9
    *(uint64_t *)(stub + 8) = (uint64_t)_infix_seh_personality_routine;

    DWORD64 base_address = (DWORD64)exec->rx_ptr & ~0xFFFF;
    DWORD rva_offset = (DWORD)((uint8_t *)exec->rx_ptr - (uint8_t *)base_address);

    rf[0].BeginAddress = rva_offset;
    rf[0].UnwindData = rva_offset + (DWORD)((uint8_t *)ui - (uint8_t *)exec->rx_ptr);

    // Sentinel entry defines the end of the previous function
    rf[1].BeginAddress = rva_offset + (DWORD)exec->size;
    rf[1].UnwindData = 0;

    if (ui->X) {
        // According to the spec, the Exception Handler RVA and Handler Data
        // are located at the end of the XDATA, which is 4-byte aligned.
        handler_info_ptr[0] = rva_offset + (uint32_t)(stub - (uint8_t *)exec->rx_ptr);
        handler_info_ptr[1] = epilogue_offset;
    }

    if (RtlAddFunctionTable(rf, 2, base_address)) {
        exec->seh_registration = rf;
        INFIX_DEBUG_PRINTF(
            "Registered SEH PDATA at %p (XDATA at %p, Stub at %p) for JIT code at %p", rf, ui, stub, exec->rx_ptr);
    }
    else {
        fprintf(stderr, "infix: RtlAddFunctionTable failed! GetLastError=%lu\n", GetLastError());
    }
}
#endif
#endif

#if defined(INFIX_OS_LINUX) && defined(INFIX_ARCH_X64)
/**
 * @internal
 * @brief Registers DWARF unwind information for a JIT-compiled block on Linux x64.
 * @details This allows the C++ exception unwinder to correctly walk through
 *          JIT-compiled frames. We manually construct a Common Information Entry (CIE)
 *          and a Frame Description Entry (FDE) that match the stack behavior
 *          of our trampolines (standard RBP-based frame).
 */
static void _infix_register_eh_frame_linux_x64(infix_executable_t * exec, infix_executable_category_t category) {
    // Simplified .eh_frame layout: [ CIE | FDE | Terminator ]
    const size_t cie_size = 32;
    const size_t fde_size = 64;
    const size_t total_size = cie_size + fde_size + 4;  // +4 for null terminator

    uint8_t * eh = infix_malloc(total_size);
    if (!eh)
        return;
    infix_memset(eh, 0, total_size);

    uint8_t * p = eh;

    // CIE
    *(uint32_t *)p = (uint32_t)(cie_size - 4);
    p += 4;
    *(uint32_t *)p = 0;
    p += 4;
    *p++ = 1;     // version
    *p++ = '\0';  // augmentation
    *p++ = 1;     // code align
    *p++ = 0x78;  // data align (-8)
    *p++ = 16;    // ret reg (rip)

    // Initial state: CFA = rsp + 8, rip at CFA - 8
    *p++ = 0x0c;
    *p++ = 0x07;
    *p++ = 0x08;
    *p++ = 0x90;
    *p++ = 0x01;
    while ((size_t)(p - eh) < cie_size)
        *p++ = 0;

    // FDE
    uint8_t * fde_start = eh + cie_size;
    p = fde_start;
    *(uint32_t *)p = (uint32_t)(fde_size - 4);
    p += 4;
    *(uint32_t *)p = (uint32_t)(p - eh);
    p += 4;  // back-offset

    *(void **)p = exec->rx_ptr;
    p += 8;
    *(uint64_t *)p = (uint64_t)exec->size;
    p += 8;
    *p++ = 0;  // aug data len



( run in 0.731 second using v1.01-cache-2.11-cpan-39bf76dae61 )