Image-PNG-Simple

 view release on metacpan or  search on metacpan

zlib-1.2.8/contrib/masmx64/inffasx64.asm  view on Meta::CPAN

; inffasx64.asm is a hand tuned assembler version of inffast.c - fast decoding
; version for AMD64 on Windows using Microsoft C compiler
;
; inffasx64.asm is automatically convert from AMD64 portion of inffas86.c
; inffasx64.asm is called by inffas8664.c, which contain more info.


; to compile this file, I use option
;   ml64.exe /Flinffasx64 /c /Zi inffasx64.asm
;   with Microsoft Macro Assembler (x64) for AMD64
;

; This file compile with Microsoft Macro Assembler (x64) for AMD64
;
;   ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK
;
;   (you can get Windows WDK with ml64 for AMD64 from
;      http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price)
;


.code
inffas8664fnc PROC

; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
;
; All registers must be preserved across the call, except for
;   rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch.


	mov [rsp-8],rsi
	mov [rsp-16],rdi
	mov [rsp-24],r12
	mov [rsp-32],r13
	mov [rsp-40],r14
	mov [rsp-48],r15
	mov [rsp-56],rbx

	mov rax,rcx

	mov	[rax+8], rbp       ; /* save regs rbp and rsp */
	mov	[rax], rsp

	mov	rsp, rax          ; /* make rsp point to &ar */

	mov	rsi, [rsp+16]      ; /* rsi  = in */
	mov	rdi, [rsp+32]      ; /* rdi  = out */
	mov	r9, [rsp+24]       ; /* r9   = last */
	mov	r10, [rsp+48]      ; /* r10  = end */
	mov	rbp, [rsp+64]      ; /* rbp  = lcode */
	mov	r11, [rsp+72]      ; /* r11  = dcode */
	mov	rdx, [rsp+80]      ; /* rdx  = hold */
	mov	ebx, [rsp+88]      ; /* ebx  = bits */
	mov	r12d, [rsp+100]    ; /* r12d = lmask */
	mov	r13d, [rsp+104]    ; /* r13d = dmask */
                                          ; /* r14d = len */
                                          ; /* r15d = dist */


	cld
	cmp	r10, rdi
	je	L_one_time           ; /* if only one decode left */
	cmp	r9, rsi

    jne L_do_loop


L_one_time:
	mov	r8, r12           ; /* r8 = lmask */
	cmp	bl, 32
	ja	L_get_length_code_one_time



( run in 1.101 second using v1.01-cache-2.11-cpan-39bf76dae61 )