Alien-LibJIT

 view release on metacpan or  search on metacpan

libjit/jit/jit-apply-x86-64.c  view on Meta::CPAN

/*
 * jit-apply-x86-64.c - Apply support routines for x86_64.
 *
 * Copyright (C) 2008  Southern Storm Software, Pty Ltd.
 *
 * This file is part of the libjit library.
 *
 * The libjit library is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation, either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * The libjit library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with the libjit library.  If not, see
 * <http://www.gnu.org/licenses/>.
 */

#include "jit-internal.h"
#include "jit-apply-rules.h"
#include "jit-apply-func.h"

#if defined(__amd64) || defined(__amd64__) || defined(_x86_64) || defined(_x86_64__)

#include "jit-gen-x86-64.h"

/*
 * X86_64 argument types as specified in the X86_64 SysV ABI.
 */
#define X86_64_ARG_NO_CLASS		0x00
#define X86_64_ARG_INTEGER		0x01
#define X86_64_ARG_MEMORY		0x02
#define X86_64_ARG_SSE			0x11
#define X86_64_ARG_SSEUP		0x12
#define X86_64_ARG_X87			0x21
#define X86_64_ARG_X87UP		0x22

#define X86_64_ARG_IS_SSE(arg)	(((arg) & 0x10) != 0)
#define X86_64_ARG_IS_X87(arg)	(((arg) & 0x20) != 0)


void _jit_create_closure(unsigned char *buf, void *func,
                         void *closure, void *_type)
{
	jit_nint offset;
	jit_type_t signature = (jit_type_t)_type;

	/* Set up the local stack frame */
	x86_64_push_reg_size(buf, X86_64_RBP, 8);
	x86_64_mov_reg_reg_size(buf, X86_64_RBP, X86_64_RSP, 8);

	/* Create the apply argument block on the stack */
	x86_64_sub_reg_imm_size(buf, X86_64_RSP, 192, 8);

	/* fill the apply buffer */
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x08, X86_64_RDI, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x10, X86_64_RSI, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x18, X86_64_RDX, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x20, X86_64_RCX, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x28, X86_64_R8, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x30, X86_64_R9, 8);

	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x40, X86_64_XMM0);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x50, X86_64_XMM1);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x60, X86_64_XMM2);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x70, X86_64_XMM3);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x80, X86_64_XMM4);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x90, X86_64_XMM5);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0xA0, X86_64_XMM6);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0xB0, X86_64_XMM7);

	/* Now fill the arguments for the closure function */
	/* the closure function is #1 */
	x86_64_mov_reg_imm_size(buf, X86_64_RDI, (jit_nint)closure, 8);
	/* the apply buff is #2 */
	x86_64_mov_reg_reg_size(buf, X86_64_RSI, X86_64_RSP, 8);

	/* Call the closure handling function */
	offset = (jit_nint)func - ((jit_nint)buf + 5);
	if((offset < jit_min_int) || (offset > jit_max_int))
	{
		/* offset is outside the 32 bit offset range */
		/* so we have to do an indirect call */
		/* We use R11 here because it's the only temporary caller saved */
		/* register not used for argument passing. */
		x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)func, 8);
		x86_64_call_reg(buf, X86_64_R11);
	}
	else
	{
		x86_64_call_imm(buf, (jit_int)offset);
	}

	/* Pop the current stack frame */
	x86_64_mov_reg_reg_size(buf, X86_64_RSP, X86_64_RBP, 8);
	x86_64_pop_reg_size(buf, X86_64_RBP, 8);

	/* Return from the closure */
	x86_64_ret(buf);
}

void *_jit_create_redirector(unsigned char *buf, void *func,
							 void *user_data, int abi)
{
	jit_nint offset;
	void *start = (void *)buf;

	/* Save all registers used for argument passing */
	/* At this point RSP is not aligned on a 16 byte boundary because */
	/* the return address is pushed on the stack. */
	/* We need (7 * 8) + (8 * 16) bytes for the registers */
	x86_64_sub_reg_imm_size(buf, X86_64_RSP, 0xB8, 8);

	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xB0, X86_64_RAX, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xA8, X86_64_RDI, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xA0, X86_64_RSI, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x98, X86_64_RDX, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x90, X86_64_RCX, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x88, X86_64_R8, 8);
	x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x80, X86_64_R9, 8);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x70, X86_64_XMM0);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x60, X86_64_XMM1);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x50, X86_64_XMM2);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x40, X86_64_XMM3);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x30, X86_64_XMM4);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x20, X86_64_XMM5);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x10, X86_64_XMM6);
	x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x00, X86_64_XMM7);

	/* Fill the pointer to the stack args */
	x86_64_lea_membase_size(buf, X86_64_RDI, X86_64_RSP, 0xD0, 8);
	x86_64_mov_regp_reg_size(buf, X86_64_RSP, X86_64_RDI, 8);

	/* Load the user data argument */
	x86_64_mov_reg_imm_size(buf, X86_64_RDI, (jit_nint)user_data, 8);

	/* Call "func" (the pointer result will be in RAX) */
	offset = (jit_nint)func - ((jit_nint)buf + 5);
	if((offset < jit_min_int) || (offset > jit_max_int))
	{
		/* offset is outside the 32 bit offset range */
		/* so we have to do an indirect call */
		/* We use R11 here because it's the only temporary caller saved */
		/* register not used for argument passing. */
		x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)func, 8);
		x86_64_call_reg(buf, X86_64_R11);
	}
	else
	{
		x86_64_call_imm(buf, (jit_int)offset);
	}

	/* store the returned address in R11 */
	x86_64_mov_reg_reg_size(buf, X86_64_R11, X86_64_RAX, 8);

libjit/jit/jit-apply-x86-64.c  view on Meta::CPAN

				else if(arg_class != arg_class2)
				{
					if(arg_class == X86_64_ARG_MEMORY ||
					   arg_class2 == X86_64_ARG_MEMORY)
					{
						arg_class = X86_64_ARG_MEMORY;
					}
					else if(arg_class == X86_64_ARG_INTEGER ||
					   arg_class2 == X86_64_ARG_INTEGER)
					{
						arg_class = X86_64_ARG_INTEGER;
					}
					else if(arg_class == X86_64_ARG_X87 ||
					   arg_class2 == X86_64_ARG_X87)
					{
						arg_class = X86_64_ARG_MEMORY;
					}
					else
					{
						arg_class = X86_64_ARG_SSE;
					}
				}
			}
		}
	}
	return arg_class;
}

int
_jit_classify_struct(jit_param_passing_t *passing,
					_jit_param_t *param, jit_type_t param_type)
{
	jit_nuint size = (jit_nuint)jit_type_get_size(param_type);

	if(size <= 8)
	{
		int arg_class;
	
		arg_class = _jit_classify_structpart(param_type, 0, 0, size - 1);
		if(arg_class == X86_64_ARG_NO_CLASS)
		{
			arg_class = X86_64_ARG_SSE;
		}
		if(arg_class == X86_64_ARG_INTEGER)
		{
			if(passing->word_index < passing->max_word_regs)
			{
				/* Set the arg class to the number of registers used */
				param->arg_class = 1;

				/* Set the first register to the register used */
				param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
				param->un.reg_info[0].value = param->value;
				++(passing->word_index);
			}
			else
			{
				/* Set the arg class to stack */
				param->arg_class = JIT_ARG_CLASS_STACK;

				/* Allocate the slot in the arg passing frame */
				_jit_alloc_param_slot(passing, param, param_type);
			}			
		}
		else if(arg_class == X86_64_ARG_SSE)
		{
			if(passing->float_index < passing->max_float_regs)
			{
				/* Set the arg class to the number of registers used */
				param->arg_class = 1;

				/* Set the first register to the register used */
				param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
				param->un.reg_info[0].value = param->value;
				++(passing->float_index);
			}
			else
			{
				/* Set the arg class to stack */
				param->arg_class = JIT_ARG_CLASS_STACK;

				/* Allocate the slot in the arg passing frame */
				_jit_alloc_param_slot(passing, param, param_type);
			}
		}
		else
		{
			/* Set the arg class to stack */
			param->arg_class = JIT_ARG_CLASS_STACK;

			/* Allocate the slot in the arg passing frame */
			_jit_alloc_param_slot(passing, param, param_type);
		}
	}
	else if(size <= 16)
	{
		int arg_class1;
		int arg_class2;

		arg_class1 = _jit_classify_structpart(param_type, 0, 0, 7);
		arg_class2 = _jit_classify_structpart(param_type, 0, 8, size - 1);
		if(arg_class1 == X86_64_ARG_NO_CLASS)
		{
			arg_class1 = X86_64_ARG_SSE;
		}
		if(arg_class2 == X86_64_ARG_NO_CLASS)
		{
			arg_class2 = X86_64_ARG_SSE;
		}
		if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE)
		{
			/* We use only one sse register in this case */
			if(passing->float_index < passing->max_float_regs)
			{
				/* Set the arg class to the number of registers used */
				param->arg_class = 1;

				/* Set the first register to the register used */
				param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
				param->un.reg_info[0].value = param->value;
				++(passing->float_index);
			}
			else
			{
				/* Set the arg class to stack */
				param->arg_class = JIT_ARG_CLASS_STACK;

				/* Allocate the slot in the arg passing frame */
				_jit_alloc_param_slot(passing, param, param_type);
			}
		}
		else if(arg_class1 == X86_64_ARG_MEMORY ||
				arg_class2 == X86_64_ARG_MEMORY)
		{
			/* Set the arg class to stack */
			param->arg_class = JIT_ARG_CLASS_STACK;

			/* Allocate the slot in the arg passing frame */
			_jit_alloc_param_slot(passing, param, param_type);
		}
		else if(arg_class1 == X86_64_ARG_INTEGER &&
				arg_class2 == X86_64_ARG_INTEGER)
		{
			/* We need two general purpose registers in this case */
			if((passing->word_index + 1) < passing->max_word_regs)
			{
				/* Set the arg class to the number of registers used */
				param->arg_class = 2;

				/* Assign the registers */
				param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
				++(passing->word_index);
				param->un.reg_info[1].reg = passing->word_regs[passing->word_index];
				++(passing->word_index);
			}
			else
			{
				/* Set the arg class to stack */
				param->arg_class = JIT_ARG_CLASS_STACK;

				/* Allocate the slot in the arg passing frame */
				_jit_alloc_param_slot(passing, param, param_type);
			}			
		}
		else
		{
			/* We need one xmm and one general purpose register */
			if((passing->word_index < passing->max_word_regs) &&
			   (passing->float_index < passing->max_float_regs))
			{
				/* Set the arg class to the number of registers used */
				param->arg_class = 2;

				if(arg_class1 == X86_64_ARG_INTEGER)
				{
					param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
					++(passing->word_index);
					param->un.reg_info[1].reg =	passing->float_regs[passing->float_index];
					++(passing->float_index);
				}
				else
				{
					param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
					++(passing->float_index);
					param->un.reg_info[1].reg = passing->word_regs[passing->word_index];
					++(passing->word_index);
				}
			}
			else
			{
				/* Set the arg class to stack */
				param->arg_class = JIT_ARG_CLASS_STACK;

				/* Allocate the slot in the arg passing frame */
				_jit_alloc_param_slot(passing, param, param_type);
			}
		}
	}
	else
	{
		/* Set the arg class to stack */
		param->arg_class = JIT_ARG_CLASS_STACK;

		/* Allocate the slot in the arg passing frame */
		_jit_alloc_param_slot(passing, param, param_type);
	}
	return 1;
}

int
_jit_classify_param(jit_param_passing_t *passing,
					_jit_param_t *param, jit_type_t param_type)
{
	if(is_struct_or_union(param_type))
	{
		return _jit_classify_struct(passing, param, param_type);
	}
	else
	{
		int arg_class;

		arg_class = _jit_classify_arg(param_type, 0);

		switch(arg_class)
		{
			case X86_64_ARG_INTEGER:
			{
				if(passing->word_index < passing->max_word_regs)
				{
					/* Set the arg class to the number of registers used */
					param->arg_class = 1;

					/* Set the first register to the register used */
					param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
					param->un.reg_info[0].value = param->value;
					++(passing->word_index);
				}
				else
				{
					/* Set the arg class to stack */
					param->arg_class = JIT_ARG_CLASS_STACK;

					/* Allocate the slot in the arg passing frame */
					_jit_alloc_param_slot(passing, param, param_type);
				}
			}
			break;

			case X86_64_ARG_SSE:
			{
				if(passing->float_index < passing->max_float_regs)
				{
					/* Set the arg class to the number of registers used */
					param->arg_class = 1;

					/* Set the first register to the register used */
					param->un.reg_info[0].reg =	passing->float_regs[passing->float_index];
					param->un.reg_info[0].value = param->value;
					++(passing->float_index);
				}
				else
				{
					/* Set the arg class to stack */
					param->arg_class = JIT_ARG_CLASS_STACK;

					/* Allocate the slot in the arg passing frame */
					_jit_alloc_param_slot(passing, param, param_type);
				}
			}
			break;

			case X86_64_ARG_MEMORY:
			{
				/* Set the arg class to stack */
				param->arg_class = JIT_ARG_CLASS_STACK;

				/* Allocate the slot in the arg passing frame */
				_jit_alloc_param_slot(passing, param, param_type);
			}
			break;
		}
	}
	return 1;
}

void
_jit_builtin_apply_add_struct(jit_apply_builder *builder,
							  void *value,
							  jit_type_t struct_type)
{
	unsigned int size = jit_type_get_size(struct_type);

	if(size <= 16)
	{
		if(size <= 8)
		{
			int arg_class;
	
			arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1);
			if(arg_class == X86_64_ARG_NO_CLASS)
			{
				arg_class = X86_64_ARG_SSE;
			}
			if((arg_class == X86_64_ARG_INTEGER) &&
			   (builder->word_used < JIT_APPLY_NUM_WORD_REGS))
			{
				/* The struct is passed in a general purpose register */
				jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]),
												value, size);
				++(builder->word_used);
			}
			else if((arg_class == X86_64_ARG_SSE) &&
					(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS))
			{
				/* The struct is passed in one sse register */
				jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]),
												value, size);
				++(builder->float_used);
			}
			else
			{
				unsigned int align = jit_type_get_alignment(struct_type);

				jit_apply_builder_add_struct(builder, value, size, align);
			}
		}
		else
		{
			int arg_class1;
			int arg_class2;

			arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7);
			arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1);
			if(arg_class1 == X86_64_ARG_NO_CLASS)
			{
				arg_class1 = X86_64_ARG_SSE;
			}



( run in 1.029 second using v1.01-cache-2.11-cpan-df04353d9ac )