Inline-Lua

 view release on metacpan or  search on metacpan

ffi/target/release/build/mlua-sys-6a99a2ae50f12319/out/luajit-build/build/src/vm_x86.dasc  view on Meta::CPAN

|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
|.macro ins_AB_; movzx RB, RCH; .endmacro
|.macro ins_A_C; movzx RC, RCL; .endmacro
|.macro ins_AND; not RDa; .endmacro
|
|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
|.macro ins_NEXT
|  mov RC, [PC]
|  movzx RA, RCH
|  movzx OP, RCL
|  add PC, 4
|  shr RC, 16
|.if X64
|  jmp aword [DISPATCH+OP*8]
|.else
|  jmp aword [DISPATCH+OP*4]
|.endif
|.endmacro
|
|// Instruction footer.
|.if 1
|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
|  .define ins_next, ins_NEXT
|  .define ins_next_, ins_NEXT
|.else
|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
|  // Affects only certain kinds of benchmarks (and only with -j off).
|  // Around 10%-30% slower on Core2, a lot more slower on P4.
|  .macro ins_next
|    jmp ->ins_next
|  .endmacro
|  .macro ins_next_
|  ->ins_next:
|    ins_NEXT
|  .endmacro
|.endif
|
|// Call decode and dispatch.
|.macro ins_callt
|  // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
|  mov PC, LFUNC:RB->pc
|  mov RA, [PC]
|  movzx OP, RAL
|  movzx RA, RAH
|  add PC, 4
|.if X64
|  jmp aword [DISPATCH+OP*8]
|.else
|  jmp aword [DISPATCH+OP*4]
|.endif
|.endmacro
|
|.macro ins_call
|  // BASE = new base, RB = LFUNC, RD = nargs+1
|  mov [BASE-4], PC
|  ins_callt
|.endmacro
|
|//-----------------------------------------------------------------------
|
|// Macros to test operand types.
|.macro checktp, reg, tp;  cmp dword [BASE+reg*8+4], tp; .endmacro
|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
|
|// These operands must be used with movzx.
|.define PC_OP, byte [PC-4]
|.define PC_RA, byte [PC-3]
|.define PC_RB, byte [PC-1]
|.define PC_RC, byte [PC-2]
|.define PC_RD, word [PC-2]
|
|.macro branchPC, reg
|  lea PC, [PC+reg*4-BCBIAS_J*4]
|.endmacro
|
|// Assumes DISPATCH is relative to GL.
#define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
#define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
|
#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|// Decrement hashed hotcount and trigger trace recorder if zero.
|.macro hotloop, reg
|  mov reg, PC
|  shr reg, 1
|  and reg, HOTCOUNT_PCMASK
|  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
|  jb ->vm_hotloop
|.endmacro
|
|.macro hotcall, reg
|  mov reg, PC
|  shr reg, 1
|  and reg, HOTCOUNT_PCMASK
|  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
|  jb ->vm_hotcall
|.endmacro
|
|// Set current VM state.
|.macro set_vmstate, st
|  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
|.endmacro
|
|// x87 compares.
|.macro fcomparepp			// Compare and pop st0 >< st1.
|  fucomip st1
|  fpop
|.endmacro
|
|.macro fpop1; fstp st1; .endmacro
|
|// Synthesize SSE FP constants.
|.macro sseconst_abs, reg, tmp		// Synthesize abs mask.
|.if X64
|  mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
|.else
|  pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
|.endif



( run in 0.704 second using v1.01-cache-2.11-cpan-39bf76dae61 )