Inline-Lua
view release on metacpan or search on metacpan
ffi/target/release/build/mlua-sys-6a99a2ae50f12319/out/luajit-build/build/src/vm_x86.dasc view on Meta::CPAN
|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
|.macro ins_AB_; movzx RB, RCH; .endmacro
|.macro ins_A_C; movzx RC, RCL; .endmacro
|.macro ins_AND; not RDa; .endmacro
|
|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
|.macro ins_NEXT
| mov RC, [PC]
| movzx RA, RCH
| movzx OP, RCL
| add PC, 4
| shr RC, 16
|.if X64
| jmp aword [DISPATCH+OP*8]
|.else
| jmp aword [DISPATCH+OP*4]
|.endif
|.endmacro
|
|// Instruction footer.
|.if 1
| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
| .define ins_next, ins_NEXT
| .define ins_next_, ins_NEXT
|.else
| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
| // Affects only certain kinds of benchmarks (and only with -j off).
| // Around 10%-30% slower on Core2, a lot more slower on P4.
| .macro ins_next
| jmp ->ins_next
| .endmacro
| .macro ins_next_
| ->ins_next:
| ins_NEXT
| .endmacro
|.endif
|
|// Call decode and dispatch.
|.macro ins_callt
| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
| mov PC, LFUNC:RB->pc
| mov RA, [PC]
| movzx OP, RAL
| movzx RA, RAH
| add PC, 4
|.if X64
| jmp aword [DISPATCH+OP*8]
|.else
| jmp aword [DISPATCH+OP*4]
|.endif
|.endmacro
|
|.macro ins_call
| // BASE = new base, RB = LFUNC, RD = nargs+1
| mov [BASE-4], PC
| ins_callt
|.endmacro
|
|//-----------------------------------------------------------------------
|
|// Macros to test operand types.
|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
|
|// These operands must be used with movzx.
|.define PC_OP, byte [PC-4]
|.define PC_RA, byte [PC-3]
|.define PC_RB, byte [PC-1]
|.define PC_RC, byte [PC-2]
|.define PC_RD, word [PC-2]
|
|.macro branchPC, reg
| lea PC, [PC+reg*4-BCBIAS_J*4]
|.endmacro
|
|// Assumes DISPATCH is relative to GL.
#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
|
#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|// Decrement hashed hotcount and trigger trace recorder if zero.
|.macro hotloop, reg
| mov reg, PC
| shr reg, 1
| and reg, HOTCOUNT_PCMASK
| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
| jb ->vm_hotloop
|.endmacro
|
|.macro hotcall, reg
| mov reg, PC
| shr reg, 1
| and reg, HOTCOUNT_PCMASK
| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
| jb ->vm_hotcall
|.endmacro
|
|// Set current VM state.
|.macro set_vmstate, st
| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
|.endmacro
|
|// x87 compares.
|.macro fcomparepp // Compare and pop st0 >< st1.
| fucomip st1
| fpop
|.endmacro
|
|.macro fpop1; fstp st1; .endmacro
|
|// Synthesize SSE FP constants.
|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
|.if X64
| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
|.else
| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
|.endif
( run in 0.704 second using v1.01-cache-2.11-cpan-39bf76dae61 )