Inline-Lua
view release on metacpan or search on metacpan
ffi/target/release/build/mlua-sys-6a99a2ae50f12319/out/luajit-build/build/src/vm_x64.dasc view on Meta::CPAN
|.endif
|
|//-----------------------------------------------------------------------
|
|// Instruction headers.
|.macro ins_A; .endmacro
|.macro ins_AD; .endmacro
|.macro ins_AJ; .endmacro
|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
|.macro ins_AB_; movzx RBd, RCH; .endmacro
|.macro ins_A_C; movzx RCd, RCL; .endmacro
|.macro ins_AND; not RD; .endmacro
|
|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
|.macro ins_NEXT
| mov RCd, [PC]
| movzx RAd, RCH
| movzx OP, RCL
| add PC, 4
| shr RCd, 16
| jmp aword [DISPATCH+OP*8]
|.endmacro
|
|// Instruction footer.
|.if 1
| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
| .define ins_next, ins_NEXT
| .define ins_next_, ins_NEXT
|.else
| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
| // Affects only certain kinds of benchmarks (and only with -j off).
| // Around 10%-30% slower on Core2, a lot more slower on P4.
| .macro ins_next
| jmp ->ins_next
| .endmacro
| .macro ins_next_
| ->ins_next:
| ins_NEXT
| .endmacro
|.endif
|
|// Call decode and dispatch.
|.macro ins_callt
| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
| mov PC, LFUNC:RB->pc
| mov RAd, [PC]
| movzx OP, RAL
| movzx RAd, RAH
| add PC, 4
| jmp aword [DISPATCH+OP*8]
|.endmacro
|
|.macro ins_call
| // BASE = new base, RB = LFUNC, RD = nargs+1
| mov [BASE-8], PC
| ins_callt
|.endmacro
|
|//-----------------------------------------------------------------------
|
|// Macros to clear or set tags.
|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
|.macro settp, reg, tp
| mov64 ITYPE, ((uint64_t)tp<<47)
| or reg, ITYPE
|.endmacro
|.macro settp, dst, reg, tp
| mov64 dst, ((uint64_t)tp<<47)
| or dst, reg
|.endmacro
|.macro setint, reg
| settp reg, LJ_TISNUM
|.endmacro
|.macro setint, dst, reg
| settp dst, reg, LJ_TISNUM
|.endmacro
|
|// Macros to test operand types.
|.macro checktp_nc, reg, tp, target
| mov ITYPE, reg
| sar ITYPE, 47
| cmp ITYPEd, tp
| jne target
|.endmacro
|.macro checktp, reg, tp, target
| mov ITYPE, reg
| cleartp reg
| sar ITYPE, 47
| cmp ITYPEd, tp
| jne target
|.endmacro
|.macro checktptp, src, tp, target
| mov ITYPE, src
| sar ITYPE, 47
| cmp ITYPEd, tp
| jne target
|.endmacro
|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
|
|.macro checknumx, reg, target, jump
| mov ITYPE, reg
| sar ITYPE, 47
| cmp ITYPEd, LJ_TISNUM
| jump target
|.endmacro
|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
|
|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
|
|// These operands must be used with movzx.
|.define PC_OP, byte [PC-4]
|.define PC_RA, byte [PC-3]
|.define PC_RB, byte [PC-1]
|.define PC_RC, byte [PC-2]
|.define PC_RD, word [PC-2]
|
|.macro branchPC, reg
| lea PC, [PC+reg*4-BCBIAS_J*4]
|.endmacro
|
|// Assumes DISPATCH is relative to GL.
#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
|
#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|// Decrement hashed hotcount and trigger trace recorder if zero.
|.macro hotloop, reg
| mov reg, PCd
| shr reg, 1
| and reg, HOTCOUNT_PCMASK
( run in 2.562 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )