Alien-LibJIT
view release on metacpan or search on metacpan
libjit/jit/jit-apply-x86-64.c view on Meta::CPAN
/*
* jit-apply-x86-64.c - Apply support routines for x86_64.
*
* Copyright (C) 2008 Southern Storm Software, Pty Ltd.
*
* This file is part of the libjit library.
*
* The libjit library is free software: you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation, either version 2.1 of
* the License, or (at your option) any later version.
*
* The libjit library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with the libjit library. If not, see
* <http://www.gnu.org/licenses/>.
*/
#include "jit-internal.h"
#include "jit-apply-rules.h"
#include "jit-apply-func.h"
#if defined(__amd64) || defined(__amd64__) || defined(_x86_64) || defined(_x86_64__)
#include "jit-gen-x86-64.h"
/*
* X86_64 argument types as specified in the X86_64 SysV ABI.
*/
#define X86_64_ARG_NO_CLASS 0x00
#define X86_64_ARG_INTEGER 0x01
#define X86_64_ARG_MEMORY 0x02
#define X86_64_ARG_SSE 0x11
#define X86_64_ARG_SSEUP 0x12
#define X86_64_ARG_X87 0x21
#define X86_64_ARG_X87UP 0x22
#define X86_64_ARG_IS_SSE(arg) (((arg) & 0x10) != 0)
#define X86_64_ARG_IS_X87(arg) (((arg) & 0x20) != 0)
void _jit_create_closure(unsigned char *buf, void *func,
void *closure, void *_type)
{
jit_nint offset;
jit_type_t signature = (jit_type_t)_type;
/* Set up the local stack frame */
x86_64_push_reg_size(buf, X86_64_RBP, 8);
x86_64_mov_reg_reg_size(buf, X86_64_RBP, X86_64_RSP, 8);
/* Create the apply argument block on the stack */
x86_64_sub_reg_imm_size(buf, X86_64_RSP, 192, 8);
/* fill the apply buffer */
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x08, X86_64_RDI, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x10, X86_64_RSI, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x18, X86_64_RDX, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x20, X86_64_RCX, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x28, X86_64_R8, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x30, X86_64_R9, 8);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x40, X86_64_XMM0);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x50, X86_64_XMM1);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x60, X86_64_XMM2);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x70, X86_64_XMM3);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x80, X86_64_XMM4);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x90, X86_64_XMM5);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0xA0, X86_64_XMM6);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0xB0, X86_64_XMM7);
/* Now fill the arguments for the closure function */
/* the closure function is #1 */
x86_64_mov_reg_imm_size(buf, X86_64_RDI, (jit_nint)closure, 8);
/* the apply buff is #2 */
x86_64_mov_reg_reg_size(buf, X86_64_RSI, X86_64_RSP, 8);
/* Call the closure handling function */
offset = (jit_nint)func - ((jit_nint)buf + 5);
if((offset < jit_min_int) || (offset > jit_max_int))
{
/* offset is outside the 32 bit offset range */
/* so we have to do an indirect call */
/* We use R11 here because it's the only temporary caller saved */
/* register not used for argument passing. */
x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)func, 8);
x86_64_call_reg(buf, X86_64_R11);
}
else
{
x86_64_call_imm(buf, (jit_int)offset);
}
/* Pop the current stack frame */
x86_64_mov_reg_reg_size(buf, X86_64_RSP, X86_64_RBP, 8);
x86_64_pop_reg_size(buf, X86_64_RBP, 8);
/* Return from the closure */
x86_64_ret(buf);
}
void *_jit_create_redirector(unsigned char *buf, void *func,
void *user_data, int abi)
{
jit_nint offset;
void *start = (void *)buf;
/* Save all registers used for argument passing */
/* At this point RSP is not aligned on a 16 byte boundary because */
/* the return address is pushed on the stack. */
/* We need (7 * 8) + (8 * 16) bytes for the registers */
x86_64_sub_reg_imm_size(buf, X86_64_RSP, 0xB8, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xB0, X86_64_RAX, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xA8, X86_64_RDI, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xA0, X86_64_RSI, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x98, X86_64_RDX, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x90, X86_64_RCX, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x88, X86_64_R8, 8);
x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0x80, X86_64_R9, 8);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x70, X86_64_XMM0);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x60, X86_64_XMM1);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x50, X86_64_XMM2);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x40, X86_64_XMM3);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x30, X86_64_XMM4);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x20, X86_64_XMM5);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x10, X86_64_XMM6);
x86_64_movaps_membase_reg(buf, X86_64_RSP, 0x00, X86_64_XMM7);
/* Fill the pointer to the stack args */
x86_64_lea_membase_size(buf, X86_64_RDI, X86_64_RSP, 0xD0, 8);
x86_64_mov_regp_reg_size(buf, X86_64_RSP, X86_64_RDI, 8);
/* Load the user data argument */
x86_64_mov_reg_imm_size(buf, X86_64_RDI, (jit_nint)user_data, 8);
/* Call "func" (the pointer result will be in RAX) */
offset = (jit_nint)func - ((jit_nint)buf + 5);
if((offset < jit_min_int) || (offset > jit_max_int))
{
/* offset is outside the 32 bit offset range */
/* so we have to do an indirect call */
/* We use R11 here because it's the only temporary caller saved */
/* register not used for argument passing. */
x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)func, 8);
x86_64_call_reg(buf, X86_64_R11);
}
else
{
x86_64_call_imm(buf, (jit_int)offset);
}
/* store the returned address in R11 */
x86_64_mov_reg_reg_size(buf, X86_64_R11, X86_64_RAX, 8);
libjit/jit/jit-apply-x86-64.c view on Meta::CPAN
else if(arg_class != arg_class2)
{
if(arg_class == X86_64_ARG_MEMORY ||
arg_class2 == X86_64_ARG_MEMORY)
{
arg_class = X86_64_ARG_MEMORY;
}
else if(arg_class == X86_64_ARG_INTEGER ||
arg_class2 == X86_64_ARG_INTEGER)
{
arg_class = X86_64_ARG_INTEGER;
}
else if(arg_class == X86_64_ARG_X87 ||
arg_class2 == X86_64_ARG_X87)
{
arg_class = X86_64_ARG_MEMORY;
}
else
{
arg_class = X86_64_ARG_SSE;
}
}
}
}
}
return arg_class;
}
int
_jit_classify_struct(jit_param_passing_t *passing,
_jit_param_t *param, jit_type_t param_type)
{
jit_nuint size = (jit_nuint)jit_type_get_size(param_type);
if(size <= 8)
{
int arg_class;
arg_class = _jit_classify_structpart(param_type, 0, 0, size - 1);
if(arg_class == X86_64_ARG_NO_CLASS)
{
arg_class = X86_64_ARG_SSE;
}
if(arg_class == X86_64_ARG_INTEGER)
{
if(passing->word_index < passing->max_word_regs)
{
/* Set the arg class to the number of registers used */
param->arg_class = 1;
/* Set the first register to the register used */
param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
param->un.reg_info[0].value = param->value;
++(passing->word_index);
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
else if(arg_class == X86_64_ARG_SSE)
{
if(passing->float_index < passing->max_float_regs)
{
/* Set the arg class to the number of registers used */
param->arg_class = 1;
/* Set the first register to the register used */
param->un.reg_info[0].reg = passing->float_regs[passing->float_index];
param->un.reg_info[0].value = param->value;
++(passing->float_index);
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
else if(size <= 16)
{
int arg_class1;
int arg_class2;
arg_class1 = _jit_classify_structpart(param_type, 0, 0, 7);
arg_class2 = _jit_classify_structpart(param_type, 0, 8, size - 1);
if(arg_class1 == X86_64_ARG_NO_CLASS)
{
arg_class1 = X86_64_ARG_SSE;
}
if(arg_class2 == X86_64_ARG_NO_CLASS)
{
arg_class2 = X86_64_ARG_SSE;
}
if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE)
{
/* We use only one sse register in this case */
if(passing->float_index < passing->max_float_regs)
{
/* Set the arg class to the number of registers used */
param->arg_class = 1;
/* Set the first register to the register used */
param->un.reg_info[0].reg = passing->float_regs[passing->float_index];
param->un.reg_info[0].value = param->value;
++(passing->float_index);
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
else if(arg_class1 == X86_64_ARG_MEMORY ||
arg_class2 == X86_64_ARG_MEMORY)
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
else if(arg_class1 == X86_64_ARG_INTEGER &&
arg_class2 == X86_64_ARG_INTEGER)
{
/* We need two general purpose registers in this case */
if((passing->word_index + 1) < passing->max_word_regs)
{
/* Set the arg class to the number of registers used */
param->arg_class = 2;
/* Assign the registers */
param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
++(passing->word_index);
param->un.reg_info[1].reg = passing->word_regs[passing->word_index];
++(passing->word_index);
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
else
{
/* We need one xmm and one general purpose register */
if((passing->word_index < passing->max_word_regs) &&
(passing->float_index < passing->max_float_regs))
{
/* Set the arg class to the number of registers used */
param->arg_class = 2;
if(arg_class1 == X86_64_ARG_INTEGER)
{
param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
++(passing->word_index);
param->un.reg_info[1].reg = passing->float_regs[passing->float_index];
++(passing->float_index);
}
else
{
param->un.reg_info[0].reg = passing->float_regs[passing->float_index];
++(passing->float_index);
param->un.reg_info[1].reg = passing->word_regs[passing->word_index];
++(passing->word_index);
}
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
return 1;
}
int
_jit_classify_param(jit_param_passing_t *passing,
_jit_param_t *param, jit_type_t param_type)
{
if(is_struct_or_union(param_type))
{
return _jit_classify_struct(passing, param, param_type);
}
else
{
int arg_class;
arg_class = _jit_classify_arg(param_type, 0);
switch(arg_class)
{
case X86_64_ARG_INTEGER:
{
if(passing->word_index < passing->max_word_regs)
{
/* Set the arg class to the number of registers used */
param->arg_class = 1;
/* Set the first register to the register used */
param->un.reg_info[0].reg = passing->word_regs[passing->word_index];
param->un.reg_info[0].value = param->value;
++(passing->word_index);
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
break;
case X86_64_ARG_SSE:
{
if(passing->float_index < passing->max_float_regs)
{
/* Set the arg class to the number of registers used */
param->arg_class = 1;
/* Set the first register to the register used */
param->un.reg_info[0].reg = passing->float_regs[passing->float_index];
param->un.reg_info[0].value = param->value;
++(passing->float_index);
}
else
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
}
break;
case X86_64_ARG_MEMORY:
{
/* Set the arg class to stack */
param->arg_class = JIT_ARG_CLASS_STACK;
/* Allocate the slot in the arg passing frame */
_jit_alloc_param_slot(passing, param, param_type);
}
break;
}
}
return 1;
}
void
_jit_builtin_apply_add_struct(jit_apply_builder *builder,
void *value,
jit_type_t struct_type)
{
unsigned int size = jit_type_get_size(struct_type);
if(size <= 16)
{
if(size <= 8)
{
int arg_class;
arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1);
if(arg_class == X86_64_ARG_NO_CLASS)
{
arg_class = X86_64_ARG_SSE;
}
if((arg_class == X86_64_ARG_INTEGER) &&
(builder->word_used < JIT_APPLY_NUM_WORD_REGS))
{
/* The struct is passed in a general purpose register */
jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]),
value, size);
++(builder->word_used);
}
else if((arg_class == X86_64_ARG_SSE) &&
(builder->float_used < JIT_APPLY_NUM_FLOAT_REGS))
{
/* The struct is passed in one sse register */
jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]),
value, size);
++(builder->float_used);
}
else
{
unsigned int align = jit_type_get_alignment(struct_type);
jit_apply_builder_add_struct(builder, value, size, align);
}
}
else
{
int arg_class1;
int arg_class2;
arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7);
arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1);
if(arg_class1 == X86_64_ARG_NO_CLASS)
{
arg_class1 = X86_64_ARG_SSE;
}
( run in 1.029 second using v1.01-cache-2.11-cpan-df04353d9ac )