ISAL-Crypto
view release on metacpan or search on metacpan
isa-l_crypto/sha256_mb/sha256_opt_x1.asm view on Meta::CPAN
%xdefine XTMP0 xmm0
%xdefine XTMP1 xmm1
%xdefine XTMP2 xmm2
%xdefine XTMP3 xmm3
%xdefine XTMP4 xmm8
%xdefine XFER xmm9
%define SHUF_00BA xmm10 ; shuffle xBxA -> 00BA
%define SHUF_DC00 xmm11 ; shuffle xDxC -> DC00
%define BYTE_FLIP_MASK xmm12
; arg index is start from 0 while mgr_flush/submit is from 1
%define MGR arg0 ; rdi or rcx
%define NBLK arg1 ; rsi or rdx
%define IDX r8 ; local variable -- consistent with caller
%define NLANX4 r10 ; consistent with caller, should be r10
%define TMGR r9 ; data pointer stored in stack named _TMGR
%define INP r9 ; data pointer stored in stack named _INP
%define SRND r9 ; clobbers INP
%define TMP r9 ; local variable -- assistant to address digest
%xdefine TBL rbp
%xdefine c ecx
%xdefine d esi
%xdefine e edx
%xdefine a eax
%xdefine b ebx
%xdefine f edi
%xdefine g r12d
%xdefine h r11d
%xdefine y0 r13d
%xdefine y1 r14d
%xdefine y2 r15d
;; FRAMESZ plus pushes must be an odd multiple of 8
%define _STACK_ALIGN_SIZE 8 ; 0 or 8 depends on pushes
%define _INP_END_SIZE 8
%define _INP_SIZE 8
%define _TMGR_SIZE 8
%define _XFER_SIZE 16
%define _XMM_SAVE_SIZE 0
%define _GPR_SAVE_SIZE 8*9 ;rbx, rdx, rbp, (rdi, rsi), r12~r15
%define _STACK_ALIGN 0
%define _INP_END (_STACK_ALIGN + _STACK_ALIGN_SIZE)
%define _INP (_INP_END + _INP_END_SIZE)
%define _TMGR (_INP + _INP_SIZE)
%define _XFER (_TMGR + _TMGR_SIZE)
%define _XMM_SAVE (_XFER + _XFER_SIZE)
%define _GPR_SAVE (_XMM_SAVE + _XMM_SAVE_SIZE)
%define STACK_SIZE (_GPR_SAVE + _GPR_SAVE_SIZE)
;; assume buffers not aligned
%define MOVDQ movdqu
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros
; addm [mem], reg
; Add reg to mem using reg-mem add and store
%macro addm 2
add %2, %1 ;changed
mov %1, %2 ;changed
%endmacro
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
; Load xmm with mem and byte swap each dword
%macro COPY_XMM_AND_BSWAP 3
MOVDQ %1, %2 ;changed
pshufb %1, %3 ;changed
%endmacro
; rotate_Xs
; Rotate values of symbols X0...X3
%macro rotate_Xs 0
%xdefine X_ X0
%xdefine X0 X1
%xdefine X1 X2
%xdefine X2 X3
%xdefine X3 X_
%endmacro
; ROTATE_ARGS
; Rotate values of symbols a...h
%macro ROTATE_ARGS 0
%xdefine TMP_ h
%xdefine h g
%xdefine g f
%xdefine f e
%xdefine e d
%xdefine d c
%xdefine c b
%xdefine b a
%xdefine a TMP_
%endmacro
%macro FOUR_ROUNDS_AND_SCHED 0
;; compute s0 four at a time and s1 two at a time
;; compute W[-16] + W[-7] 4 at a time
movdqa XTMP0, X3
mov y0, e ; y0 = e
ror y0, (25-11) ; y0 = e >> (25-11)
mov y1, a ; y1 = a
palignr XTMP0, X2, 4 ; XTMP0 = W[-7]
ror y1, (22-13) ; y1 = a >> (22-13)
xor y0, e ; y0 = e ^ (e >> (25-11))
mov y2, f ; y2 = f
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6))
movdqa XTMP1, X1
xor y1, a ; y1 = a ^ (a >> (22-13)
xor y2, g ; y2 = f^g
paddd XTMP0, X0 ; XTMP0 = W[-7] + W[-16]
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
and y2, e ; y2 = (f^g)&e
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2))
( run in 0.724 second using v1.01-cache-2.11-cpan-5b529ec07f3 )