AI-MXNetCAPI
view release on metacpan or search on metacpan
%module "AI::MXNetCAPI"
%rename("%(strip:[MX])s") "";
%include typemaps.i
%include mxnet_typemaps.i
%inline %{
#include <c_api.h>
// Taken as is from http://cpansearch.perl.org/src/COLEMINOR/Games-EternalLands-Binary-Float16-0.01/Float16.xs
/* This method is faster than the OpenEXR implementation (very often
* used, eg. in Ogre), with the additional benefit of rounding, inspired
* by James Tursa's half-precision code. */
static inline uint16_t _float_to_half(uint32_t x) {
uint16_t bits = (x >> 16) & 0x8000;
uint16_t m = (x >> 12) & 0x07ff;
unsigned int e = (x >> 23) & 0xff;
if (e < 103)
return bits;
if (e > 142) {
bits |= 0x7c00u;
bits |= e == 255 && (x & 0x007fffffu);
return bits;
}
if (e < 113) {
m |= 0x0800u;
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
return bits;
}
bits |= ((e - 112) << 10) | (m >> 1);
bits += m & 1;
return bits;
}
static int const shifttable[32] = {
23, 14, 22, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 20, 0,
15, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 17, 0, 18, 19, 0,
};
static uint32_t const shiftmagic = 0x07c4acddu;
/* This algorithm is similar to the OpenEXR implementation, except it
* uses branchless code in the denormal path. This is slower than a
* table version, but will be more friendly to the cache for occasional
* uses. */
static inline uint32_t _half_to_float(uint16_t x) {
uint32_t s = (x & 0x8000u) << 16;
if ((x & 0x7fffu) == 0)
return (uint32_t)x << 16;
uint32_t e = x & 0x7c00u;
uint32_t m = x & 0x03ffu;
if (e == 0) {
uint32_t v = m | (m >> 1);
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
e = shifttable[(v * shiftmagic) >> 27];
return s | (((125 - e) << 23) + (m << e));
}
if (e == 0x7c00u) {
if (m == 0)
return s | 0x7f800000u;
return s | 0x7fc00000u;
}
return s | (((e >> 10) + 112) << 23) | (m << 13);
}
/*!
* \brief Executor forward method
*
* \param handle executor handle
* \param is_train bool value to indicate whether the forward pass is for evaluation
* \return 0 when success, -1 when failure happens
*/
int MXExecutorForward(ExecutorHandle handle, int is_train);
/*!
* \brief Excecutor run backward
*
* \param handle execute handle
* \param len lenth
* \param head_grads NDArray handle for heads' gradient
*
* \return 0 when success, -1 when failure happens
*/
int MXExecutorBackward(ExecutorHandle handle,
mx_uint len,
NDArrayHandle *in);
/*!
* \brief Get executor's head NDArray
*
* \param handle executor handle
* \param out_size output ndarray vector size
* \param out out put ndarray handles
* \return 0 when success, -1 when failure happens
*/
int MXExecutorOutputs(ExecutorHandle handle,
mx_uint *out_size,
NDArrayHandle **out_array);
/*!
* \brief Generate Executor from symbol
*
* \param symbol_handle symbol handle
* \param dev_type device type
* \param dev_id device id
* \param len length
* \param in_args in args array
* \param arg_grad_store arg grads handle array
* \param grad_req_type grad req array
* \param aux_states_len length of auxiliary states
* \param aux_states auxiliary states array
* \param out output executor handle
* \return 0 when success, -1 when failure happens
*/
int MXExecutorBind(SymbolHandle symbol_handle,
int dev_type,
int dev_id,
mx_uint len,
NDArrayHandle *in,
NDArrayHandle *in,
mx_uint *in,
mx_uint aux_states_len,
NDArrayHandle *in,
ExecutorHandle *out);
/*!
* \brief Generate Executor from symbol,
* This is advanced function, allow specify group2ctx map.
* The user can annotate "ctx_group" attribute to name each group.
*
* \param symbol_handle symbol handle
* \param dev_type device type of default context
* \param dev_id device id of default context
* \param num_map_keys size of group2ctx map
* \param map_keys keys of group2ctx map
* \param map_dev_types device type of group2ctx map
* \param map_dev_ids device id of group2ctx map
* \param len length
* \param in_args in args array
* \param arg_grad_store arg grads handle array
* \param grad_req_type grad req array
* \param aux_states_len length of auxiliary states
* \param aux_states auxiliary states array
* \param out output executor handle
* \return 0 when success, -1 when failure happens
*/
int MXExecutorBindX(SymbolHandle symbol_handle,
int dev_type,
int dev_id,
mx_uint num_map_keys,
const char** in,
const int* in,
const int* in,
mx_uint len,
NDArrayHandle *in,
NDArrayHandle *in,
mx_uint *in,
mx_uint aux_states_len,
NDArrayHandle *in,
ExecutorHandle *out);
/*!
* \brief Generate Executor from symbol,
* This is advanced function, allow specify group2ctx map.
* The user can annotate "ctx_group" attribute to name each group.
*
* \param symbol_handle symbol handle
* \param dev_type device type of default context
* \param dev_id device id of default context
* \param num_map_keys size of group2ctx map
* \param map_keys keys of group2ctx map
* \param map_dev_types device type of group2ctx map
* \param map_dev_ids device id of group2ctx map
* \param len length
* \param in_args in args array
* \param arg_grad_store arg grads handle array
* \param grad_req_type grad req array
* \param aux_states_len length of auxiliary states
* \param aux_states auxiliary states array
* \param shared_exec input executor handle for memory sharing
* \param out output executor handle
* \return 0 when success, -1 when failure happens
*/
int MXExecutorBindEX(SymbolHandle symbol_handle,
int dev_type,
int dev_id,
mx_uint num_map_keys,
const char** in,
const int* in,
const int* in,
mx_uint len,
NDArrayHandle *in,
NDArrayHandle *in,
mx_uint *in,
mx_uint aux_states_len,
NDArrayHandle *in,
ExecutorHandle shared_exec,
ExecutorHandle *out);
int MXExecutorSimpleBind(SymbolHandle symbol_handle,
int dev_type,
int dev_id,
const mx_uint num_g2c_keys,
const char** in, // g2c_keys,
const int* in, // g2c_dev_types,
const int* in, // g2c_dev_ids,
const mx_uint provided_grad_req_list_len,
const char** in, // provided_grad_req_names,
const char** in, // provided_grad_req_types,
const mx_uint num_provided_arg_shapes,
const char** in, // provided_arg_shape_names,
const mx_uint* in, // provided_arg_shape_data,
const mx_uint* in, // provided_arg_shape_idx,
const mx_uint num_provided_arg_dtypes,
const char** in, // provided_arg_dtype_names,
const int* in, // provided_arg_dtypes,
const mx_uint num_shared_arg_names,
const char** in, // shared_arg_name_list,
//------------
int* shared_buffer_len,
const char** shared_buffer_name_list,
NDArrayHandle* shared_buffer_handle_list,
const char*** updated_shared_buffer_name_list,
NDArrayHandle** updated_shared_buffer_handle_list,
//------------------
mx_uint* num_in_args,
NDArrayHandle** in_args,
NDArrayHandle** arg_grads,
//-----------------
mx_uint* num_aux_states,
NDArrayHandle** aux_states,
//----------
ExecutorHandle shared_exec_handle,
ExecutorHandle* out
);
/*!
* \brief set a call back to notify the completion of operation
*/
int MXExecutorSetMonitorCallback(ExecutorHandle handle,
ExecutorMonitorCallback callback,
void* callback_handle);
//--------------------------------------------
// Part 5: IO Interface
//--------------------------------------------
/*!
( run in 0.726 second using v1.01-cache-2.11-cpan-140bd7fdf52 )