Generic SIMD Intrinsic Library API  0.6
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
Classes | Macros | Typedefs | Functions
gsimd_utility.h File Reference
#include <stdint.h>
#include <iostream>

Go to the source code of this file.

Classes

struct  invalid_template_arguments< Lanes, T >
 
union  BitcastUnion
 

Macros

#define DUMP(v)   std::cout << #v << ":" << (v) << std::endl
 
#define IS32BIT
 
#define INC_STATS_NAME(stat, inc, opname)
 
#define INC_STATS(stat, inc)
 
#define NOT_IMPLEMENTED(msg)
 
#define FORCEINLINE   inline __attribute__((always_inline))
 
#define PRE_ALIGN(x)
 
#define POST_ALIGN(x)   __attribute__ ((aligned(x)))
 
#define DEFINE_TYPE_NAME(type, name)   template<> FORCEINLINE const char *iu_get_type_name<type>(){return name;} \
 
#define SUBSCRIPT_FUNC_DECL(STYPE)
 macros to define a intrinsic based subscript opertor More...
 
#define SUBSCRIPT_FUNC_BOOL_DECL(STYPE)
 
#define COUT_FUNC_BOOL_DECL()
 
#define COUT_FUNC_CHAR_DECL(STYPE)
 
#define COUT_FUNC_DECL(STYPE)
 
#define VEC_CMP_DECL(STYPE)
 macros to define compare methods == and != are available for all the types. More...
 
#define VEC_UNARY_DECL(STYPE)
 macros for unary. note "-" means neg or complement More...
 
#define VEC_BIN_DECL(STYPE)
 macros for binary operations. More...
 
#define SVEC_BOOL_CLASS_METHOD_DECL()
 macros for svec<N,bool> class's class method More...
 
#define VEC_CLASS_METHOD_DECL(STYPE)
 macros for non-mask i8 - double types's method More...
 
#define VEC_INT_CLASS_METHOD_DECL(STYPE, USTYPE)
 macros method definition for integer vector only Note: shift's operator can only be unsigned vector More...
 
#define VEC_FLOAT_CLASS_METHOD_DECL(STYPE)
 
#define INSERT_EXTRACT(STYPE)
 macros for svec's insert extract method implementation The implementation is based on vector type's subscript operator More...
 
#define LOAD_STORE(STYPE)
 
#define SELECT(STYPE)
 macros for svec's select by mask vector method generic implementation More...
 
#define SELECT_BOOLCOND(STYPE)
 macros for svec's select by bool scalar method implementation More...
 
#define BROADCAST(STYPE)
 macro for broadcast method implementation All broadcast are slow implementation More...
 
#define BROADCAST_L4(STYPE)
 macro for broadcast method implementation for lanes4 All broadcast are slow implementation More...
 
#define ROTATE(STYPE)
 macro for rotate method implementation More...
 
#define ROTATE_L4(STYPE)
 macro for rotate method implementation More...
 
#define SHUFFLES(STYPE)
 macro for shuffle/shuffle2 methods implementation More...
 
#define SHUFFLES_L4(STYPE)
 macro for shuffle/shuffle2 methods implementation More...
 
#define ZERO(STYPE, NAME)
 macro for setzero method implementation More...
 
#define LOAD_CONST(STYPE)
 
#define GATHER_GENERAL(STYPE, PSTYPE)
 slow implementation of gather general Must use template to specify the return type More...
 
#define GATHER_GENERAL_L4(STYPE, PSTYPE)
 slow implementation of gather general Must use template to specify the return type More...
 
#define GATHER_BASE_OFFSETS(STYPE, OSTYPE)
 
#define GATHER_BASE_OFFSETS_L4(STYPE, OSTYPE)
 
#define GATHER_STRIDE(STYPE, OSTYPE)
 macros for general impl of gather base step More...
 
#define GATHER_STRIDE_L4(STYPE, OSTYPE)
 macros for fast impl of gather base step More...
 
#define SCATTER_STRIDE(STYPE, OSTYPE)
 
#define SCATTER_STRIDE_L4(STYPE, OSTYPE)
 
#define SCATTER_GENERAL(STYPE, PSTYPE)
 
#define SCATTER_GENERAL_L4(STYPE, PSTYPE)
 
#define SCATTER_BASE_OFFSETS(STYPE, OSTYPE)
 
#define SCATTER_BASE_OFFSETS_L4(STYPE, OSTYPE)
 
#define MASKED_LOAD_STORE_L4(STYPE)
 
#define MASKED_LOAD_STORE_L8(STYPE)
 
#define UNARY_OP(STYPE, NAME, OP)
 
#define UNARY_OP_L4(STYPE, NAME, OP)
 
#define BINARY_OP(STYPE, NAME, OP)
 macros for generic slow impl of binary operation More...
 
#define BINARY_OP2(STYPE, STYPE2, NAME, OP)
 
#define BINARY_OP_FUNC(STYPE, NAME, FUNC)
 
#define BINARY_OP_L4(STYPE, NAME, OP)
 macros for generic slow imple of binary operation More...
 
#define BINARY_OP2_L4(STYPE, STYPE2, NAME, OP)
 macros for generic slow imple of binary operation, style 2 More...
 
#define BINARY_OP_FUNC_L4(STYPE, NAME, FUNC)
 
#define BINARY_OP_SCALAR_L4(STYPE, STYPE2, NAME, OP)
 macros for binary: vector op scalar More...
 
#define BINARY_OP_SCALAR(STYPE, NAME, OP)
 macros for binary: vector op scalar More...
 
#define BINARY_SHT_SCALAR(STYPE, SHTTYPE, NAME, OP)
 macros for binary shift: vector op intvalue More...
 
#define BINARY_SCALAR_OP(STYPE, NAME, OP)
 macros for binary: scalar op vector More...
 
#define TERNERY(STYPE)
 
#define TERNERY_L4(STYPE)
 
#define BINARY_OP_REDUCE_FUNC(STYPE, NAME, FUNC)
 
#define BINARY_OP_REDUCE_FUNC_L4(STYPE, NAME, FUNC)
 
#define CMP_OP(STYPE, NAME, OP)
 macros for binary: vector op scalar More...
 
#define CMP_OP_L4(STYPE, NAME, OP)
 
#define CMP_MASKED_OP(STYPE, NAME, OP)
 
#define CMP_ALL_NOMASK_OP(STYPE)
 
#define CMP_ALL_NOMASK_OP_L4(STYPE)
 
#define CMP_ALL_MASKED_OP(STYPE)
 
#define CMP_ALL_OP(STYPE)
 
#define CAST(SFROM, STO)
 
#define CAST_L4(SFROM, STO)
 
#define CAST_BITS(SFROM, FROM_F, STO, TO_F)
 
#define SUBSCRIPT_FUNC_IMPL(STYPE)
 
#define VEC_CMP_IMPL(STYPE)
 
#define VEC_UNARY_IMPL(STYPE)
 
#define VEC_BIN_IMPL(STYPE)
 
#define MVEC_CLASS_METHOD_IMPL(STYPE)
 mask class's class method impl More...
 
#define VEC_CLASS_METHOD_IMPL(STYPE)
 
#define VEC_INT_CLASS_METHOD_IMPL(STYPE, STYPE2)
 
#define VEC_FLOAT_CLASS_METHOD_IMPL(STYPE)
 

Typedefs

typedef uint8_t svec1_u8
 
typedef int8_t svec1_i8
 
typedef uint16_t svec1_u16
 
typedef int16_t svec1_i16
 
typedef uint32_t svec1_u32
 
typedef int32_t svec1_i32
 
typedef uint64_t svec1_u64
 
typedef int64_t svec1_i64
 
typedef float svec1_f
 
typedef double svec1_d
 

Functions

template<typename T >
const char * iu_get_type_name ()
 
template<>
const char * iu_get_type_name< int8_t > ()
 
template<>
const char * iu_get_type_name< uint8_t > ()
 
template<>
const char * iu_get_type_name< int16_t > ()
 
template<>
const char * iu_get_type_name< uint16_t > ()
 
template<>
const char * iu_get_type_name< int32_t > ()
 
template<>
const char * iu_get_type_name< uint32_t > ()
 
template<>
const char * iu_get_type_name< int64_t > ()
 
template<>
const char * iu_get_type_name< uint64_t > ()
 
template<>
const char * iu_get_type_name< float > ()
 
template<>
const char * iu_get_type_name< double > ()
 
template<typename T >
void stdout_scalar (std::ostream &out, T v)
 
template<>
void stdout_scalar< int8_t > (std::ostream &out, int8_t v)
 
template<>
void stdout_scalar< uint8_t > (std::ostream &out, uint8_t v)
 
template<int N>
const bool check_lanes (int n)
 
template<>
const bool check_lanes< 2 > (int n)
 
template<>
const bool check_lanes< 4 > (int n)
 
template<>
const bool check_lanes< 8 > (int n)
 
template<>
const bool check_lanes< 16 > (int n)
 

Macro Definition Documentation

#define BINARY_OP (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = a[i] OP b[i]; } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for generic slow impl of binary operation

#define BINARY_OP2 (   STYPE,
  STYPE2,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE2> b) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = a[i] OP b[i]; } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define BINARY_OP2_L4 (   STYPE,
  STYPE2,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE2> b) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret(svec_extract(a, 0) OP svec_extract(b, 0),\
svec_extract(a, 1) OP svec_extract(b, 1),\
svec_extract(a, 2) OP svec_extract(b, 2),\
svec_extract(a, 3) OP svec_extract(b, 3));\
return ret; \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for generic slow imple of binary operation, style 2

#define BINARY_OP_FUNC (   STYPE,
  NAME,
  FUNC 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = FUNC(a[i], b[i]); } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define BINARY_OP_FUNC_L4 (   STYPE,
  NAME,
  FUNC 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret(FUNC(svec_extract(a, 0), svec_extract(b, 0)),\
FUNC(svec_extract(a, 1), svec_extract(b, 1)),\
FUNC(svec_extract(a, 2), svec_extract(b, 2)),\
FUNC(svec_extract(a, 3), svec_extract(b, 3))); \
return ret; \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define BINARY_OP_L4 (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret(svec_extract(a, 0) OP svec_extract(b, 0),\
svec_extract(a, 1) OP svec_extract(b, 1),\
svec_extract(a, 2) OP svec_extract(b, 2),\
svec_extract(a, 3) OP svec_extract(b, 3));\
return ret; \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for generic slow imple of binary operation

#define BINARY_OP_REDUCE_FUNC (   STYPE,
  NAME,
  FUNC 
)
Value:
static FORCEINLINE STYPE NAME(svec<LANES,STYPE> a) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "reduce"); \
STYPE r = a[0]; \
for(int i = 1; i < LANES; ++i) { r = FUNC(r, a[i]); } \
return r; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define BINARY_OP_REDUCE_FUNC_L4 (   STYPE,
  NAME,
  FUNC 
)
Value:
static FORCEINLINE STYPE NAME(svec<LANES,STYPE> a) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "reduce"); \
return FUNC(FUNC(FUNC(a[0], a[1]), a[2]), a[3]); \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define BINARY_OP_SCALAR (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, STYPE s) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = a[i] OP s; } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for binary: vector op scalar

#define BINARY_OP_SCALAR_L4 (   STYPE,
  STYPE2,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, STYPE2 s) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret(svec_extract(a, 0) OP s,\
svec_extract(a, 1) OP s,\
svec_extract(a, 2) OP s,\
svec_extract(a, 3) OP s);\
return ret; \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for binary: vector op scalar

#define BINARY_SCALAR_OP (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(STYPE s, svec<LANES,STYPE> a) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = s OP a[i]; }\
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for binary: scalar op vector

#define BINARY_SHT_SCALAR (   STYPE,
  SHTTYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, SHTTYPE s) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = a[i] OP s; } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for binary shift: vector op intvalue

#define BROADCAST (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_broadcast(svec<LANES,STYPE> v, int index) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "broadcast"); \
STYPE bval = v[index]; \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = bval;} \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macro for broadcast method implementation All broadcast are slow implementation

#define BROADCAST_L4 (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_broadcast(svec<LANES,STYPE> v, int index) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "broadcast"); \
STYPE bval = v[index]; \
svec<LANES,STYPE> ret(bval,bval,bval,bval); \
return ret; \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macro for broadcast method implementation for lanes4 All broadcast are slow implementation

#define CAST (   SFROM,
  STO 
)
Value:
template <class T> static T svec_cast(svec<LANES,SFROM> val); \ \
template <> FORCEINLINE svec<LANES,STO> svec_cast<svec<LANES,STO> >(svec<LANES,SFROM> val) { \
INC_STATS_NAME(STATS_CAST_SLOW, 1, "svec<LANES,"#SFROM">-svec<LANES,"#STO">"); \
svec<LANES,STO> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = (STO)val[i]; } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define CAST_BITS (   SFROM,
  FROM_F,
  STO,
  TO_F 
)
Value:
template <class T> static T svec_cast_bits(svec<LANES,SFROM> val); \
template <> FORCEINLINE svec<LANES,STO> svec_cast_bits<svec<LANES,STO> >(svec<LANES,SFROM> val) { \
INC_STATS_NAME(STATS_CAST_SLOW, 1, "svec<LANES,"#SFROM">-svec<LANES,"#STO">"); \
BitcastUnion u[LANES]; \
svec<LANES,STO> ret; \
for(int i = 0; i < LANES; ++i) {u[i].FROM_F = val[i]; ret[i] = u[i].TO_F;} \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define CAST_L4 (   SFROM,
  STO 
)
Value:
template <class T> static T svec_cast(svec<LANES,SFROM> val); \ \
template <> FORCEINLINE svec<LANES,STO> svec_cast<svec<LANES,STO> >(svec<LANES,SFROM> val) { \
INC_STATS_NAME(STATS_CAST_SLOW, 1, "svec<LANES,"#SFROM">-svec<LANES,"#STO">"); \
return svec<LANES,STO>((STO)val[0],(STO)val[1],(STO)val[2],(STO)val[3]); \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define CMP_ALL_MASKED_OP (   STYPE)
Value:
CMP_MASKED_OP(STYPE, equal, ==) \
CMP_MASKED_OP(STYPE, not_equal, !=) \
CMP_MASKED_OP(STYPE, less_than, <) \
CMP_MASKED_OP(STYPE, less_equal, <=) \
CMP_MASKED_OP(STYPE, greater_than, >) \
CMP_MASKED_OP(STYPE, greater_equal, >=)
#define CMP_MASKED_OP(STYPE, NAME, OP)
Definition: gsimd_utility.h:1070
#define CMP_ALL_NOMASK_OP (   STYPE)
Value:
CMP_OP(STYPE, equal, ==) \
CMP_OP(STYPE, not_equal, !=) \
CMP_OP(STYPE, less_than, <) \
CMP_OP(STYPE, less_equal, <=) \
CMP_OP(STYPE, greater_than, >) \
CMP_OP(STYPE, greater_equal, >=)
#define CMP_OP(STYPE, NAME, OP)
macros for binary: vector op scalar
Definition: gsimd_utility.h:1049
#define CMP_ALL_NOMASK_OP_L4 (   STYPE)
Value:
CMP_OP_L4(STYPE, equal, ==) \
CMP_OP_L4(STYPE, not_equal, !=) \
CMP_OP_L4(STYPE, less_than, <) \
CMP_OP_L4(STYPE, less_equal, <=) \
CMP_OP_L4(STYPE, greater_than, >) \
CMP_OP_L4(STYPE, greater_equal, >=)
#define CMP_OP_L4(STYPE, NAME, OP)
Definition: gsimd_utility.h:1057
#define CMP_ALL_OP (   STYPE)
Value:
#define CMP_ALL_MASKED_OP(STYPE)
Definition: gsimd_utility.h:1099
#define CMP_ALL_NOMASK_OP(STYPE)
Definition: gsimd_utility.h:1083
#define CMP_MASKED_OP (   STYPE,
  NAME,
  OP 
)
Value:
FORCEINLINE svec<LANES,bool> svec_masked_##NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b, \
svec<LANES,bool> mask) { \
return svec_and(svec_##NAME(a,b) , mask); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175

Macros for masked operation based on fast operation

#define CMP_OP (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,bool> svec_##NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
INC_STATS_NAME(STATS_BINARY_SLOW, 1, #NAME); \
svec<LANES,bool> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = a[i] OP b[i]; } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for binary: vector op scalar

#define CMP_OP_L4 (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,bool> svec_##NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
INC_STATS_NAME(STATS_COMPARE_SLOW, 1, #NAME); \
uint32_t r0 = (a[0] OP b[0]); \
uint32_t r1 = (a[1] OP b[1]); \
uint32_t r2 = (a[2] OP b[2]); \
uint32_t r3 = (a[3] OP b[3]); \
return svec<LANES,bool>(r0,r1,r2,r3); \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define COUT_FUNC_BOOL_DECL ( )
Value:
friend std::ostream& operator<< (std::ostream &out, const svec<LANES,bool> &v) { \
out << "svec<" << LANES << ",bool> " << "[" << (v[0]?1:0); \
for(int i = 1; i < LANES ; i++) { out << ", " << (v[i]?1:0);} \
out << "]"; \
return out; \
} \
#define LANES
Definition: power_vsx4.h:119
#define COUT_FUNC_CHAR_DECL (   STYPE)
Value:
friend std::ostream& operator<< (std::ostream &out, const svec<LANES,STYPE> &v) { \
out << "svec<" << LANES << "," << #STYPE <<"> [" << short(v[0]); \
for(int i = 1; i < LANES ; i++) { out << ", " << short(v[i]);} \
out << "]"; \
return out; \
} \
#define LANES
Definition: power_vsx4.h:119
#define COUT_FUNC_DECL (   STYPE)
Value:
friend std::ostream& operator<< (std::ostream &out, const svec<LANES,STYPE> &v) { \
out << "svec<" << LANES << "," << #STYPE <<"> [" << v[0]; \
for(int i = 1; i < LANES ; i++) { out << ", " << v[i];} \
out << "]"; \
return out; \
} \
#define LANES
Definition: power_vsx4.h:119
#define DEFINE_TYPE_NAME (   type,
  name 
)    template<> FORCEINLINE const char *iu_get_type_name<type>(){return name;} \
#define DUMP (   v)    std::cout << #v << ":" << (v) << std::endl

Copyright 2012 the Generic SIMD Intrinsic Library project authors. All rights reserved.

Copyright IBM Corp. 2013, 2013. All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

 Redistributions of source code must retain the above copyright
 notice, this list of conditions and the following disclaimer.
 Redistributions in binary form must reproduce the above
 copyright notice, this list of conditions and the following
 disclaimer in the documentation and/or other materials provided
 with the distribution.
 Neither the name of IBM Corp. nor the names of its contributors may be
 used to endorse or promote products derived from this software
 without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The original source code covered by the above license above has been modified significantly by IBM Corp. Copyright 2013 the Generic SIMD Intrinsic Library project authors. All rights reserved.

Copyright (c) 2010-2012, Intel Corporation All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. gsimd_utility.h

Created on: Jun 12, 2013

Author
: Haichuan Wang (haich.nosp@m.uan@.nosp@m.us.ib.nosp@m.m.co.nosp@m.m, hwang.nosp@m.154@.nosp@m.illin.nosp@m.ois..nosp@m.edu)

Contains the utility functions for intrinsics implementation. Should be included in all intrinsics header files

#define FORCEINLINE   inline __attribute__((always_inline))
#define GATHER_BASE_OFFSETS (   STYPE,
  OSTYPE 
)
Value:
FORCEINLINE svec<LANES,STYPE> svec_gather_base_offsets(STYPE* b, uint32_t scale, svec<LANES,OSTYPE> offsets, svec<LANES,bool> mask) { \
svec<LANES,STYPE> ret;\
for(int i = 0; i < LANES; ++i) {if(mask[i]){ret[i] = *(STYPE*)((uint8_t*)b + scale * offsets[i]);} }\
INC_STATS_NAME(STATS_GATHER_SLOW,1, "Gather offset with select"); \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define GATHER_BASE_OFFSETS_L4 (   STYPE,
  OSTYPE 
)
Value:
FORCEINLINE svec<LANES,STYPE> svec_gather_base_offsets(STYPE* b, uint32_t scale, svec<LANES,OSTYPE> offsets, svec<LANES,bool> mask) { \
return lGatherBaseOffsets<svec<LANES,STYPE>, STYPE, svec<LANES,OSTYPE>, svec<LANES,bool> >((uint8_t*)b, scale, offsets, mask); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175

@ macros for generic impl of gather base offsets

#define GATHER_GENERAL (   STYPE,
  PSTYPE 
)
Value:
template<> \
FORCEINLINE svec<LANES,STYPE> svec_gather<svec<LANES,STYPE> >(svec<LANES,PSTYPE> ptrs, svec<LANES,bool> mask) { \
svec<LANES,STYPE> ret;\
for(int i = 0; i < LANES; ++i) {if(mask[i]){ret[i] = *(STYPE*)(ptrs[i]); } }\
INC_STATS_NAME(STATS_GATHER_SLOW, 1, "Gather genera"); \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

slow implementation of gather general Must use template to specify the return type

Parameters
mask
Returns
#define GATHER_GENERAL_L4 (   STYPE,
  PSTYPE 
)
Value:
template<> \
FORCEINLINE svec<LANES,STYPE> svec_gather<svec<LANES,STYPE> >(svec<LANES,PSTYPE> ptrs, svec<LANES,bool> mask) { \
return lGatherGeneral<svec<LANES,STYPE>, STYPE, svec<LANES,PSTYPE>, svec<LANES,bool> >(ptrs, mask); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175

slow implementation of gather general Must use template to specify the return type

Parameters
mask
Returns
#define GATHER_STRIDE (   STYPE,
  OSTYPE 
)
Value:
template <class RetVecType> static RetVecType svec_gather_stride(STYPE* b, OSTYPE o, OSTYPE s); \
template<> \
FORCEINLINE svec<LANES,STYPE> svec_gather_stride<svec<LANES,STYPE> >(STYPE* b, OSTYPE o, OSTYPE s) { \
svec<LANES,STYPE> ret; \
b += o; \
for(int i = 0; i < LANES; ++i, b+=s) { \
ret[i] = *b; \
} \
INC_STATS_NAME(STATS_GATHER_SLOW,1, "Gather Steps"); \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for general impl of gather base step

#define GATHER_STRIDE_L4 (   STYPE,
  OSTYPE 
)
Value:
template <class RetVecType> static RetVecType svec_gather_stride(STYPE* b, OSTYPE o, OSTYPE s); \
template<> \
FORCEINLINE svec<LANES,STYPE> svec_gather_stride<svec<LANES,STYPE> >(STYPE* b, OSTYPE o, OSTYPE s) { \
int64_t off = (int64_t)o; int64_t stride = (int64_t)s;\
OSTYPE stride2 = stride * 2; \
STYPE v0 = *(b + off); \
STYPE v1 = *(b + off + stride); \
STYPE v2 = *(b + off + stride2); \
STYPE v3 = *(b + off + stride2 + stride); \
return svec<LANES,STYPE>(v0, v1, v2, v3); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for fast impl of gather base step

#define INC_STATS (   stat,
  inc 
)
#define INC_STATS_NAME (   stat,
  inc,
  opname 
)
#define INSERT_EXTRACT (   STYPE)
Value:
static FORCEINLINE STYPE svec_extract(svec<LANES,STYPE> v, int index) { \
return v.v[index]; \
} \
static FORCEINLINE void svec_insert(svec<LANES,STYPE> *v, int index, STYPE val) { \
v->v[index] = val; \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for svec's insert extract method implementation The implementation is based on vector type's subscript operator

#define IS32BIT
#define LOAD_CONST (   STYPE)
Value:
template <class RetVecType> static RetVecType svec_load_const(const STYPE* p); \
template<> \
FORCEINLINE svec<LANES,STYPE> svec_load_const<svec<LANES,STYPE> >(const STYPE* p) { \
svec<LANES,STYPE> ret; \
INC_STATS_NAME(STATS_LOAD_SLOW, 1, "load const"); \
for (int i = 0; i < LANES; ++i) { ret[i] = *p; }\
return ret; \
} \
template <class RetVecType> static RetVecType svec_load_and_splat(STYPE* p); \
template<> \
FORCEINLINE svec<LANES,STYPE> svec_load_and_splat<svec<LANES,STYPE> >(STYPE* p) { \
svec<LANES,STYPE> ret; \
INC_STATS_NAME(STATS_LOAD_SLOW, 1, "load const"); \
for (int i = 0; i < LANES; ++i) { ret[i] = *p; }\
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define LOAD_STORE (   STYPE)
Value:
\
static FORCEINLINE svec<LANES,STYPE> svec_load(const svec<LANES,STYPE> *p) { \
STYPE *ptr = (STYPE *)p; \
svec<LANES,STYPE> ret; \
INC_STATS_NAME(STATS_LOAD_SLOW, 1, "load:svec_"#STYPE); \
for (int i = 0; i < LANES; ++i) {ret[i] = ptr[i];} \
return ret; \
} \ \
static FORCEINLINE void svec_store(svec<LANES,STYPE> *p, svec<LANES,STYPE> v) { \
STYPE *ptr = (STYPE *)p; \
INC_STATS_NAME(STATS_STORE_SLOW, 1, "store:svec_"#STYPE); \
for (int i = 0; i < LANES; ++i) { ptr[i] = v[i]; } \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define MASKED_LOAD_STORE_L4 (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_masked_load(svec<LANES,STYPE> *p, svec<LANES,bool> mask) { \
return svec_gather_base_offsets((STYPE*)p, sizeof(STYPE), svec<LANES,int32_t>(0,1,2,3), mask); \
} \
static FORCEINLINE void svec_masked_store(svec<LANES,STYPE> *p, svec<LANES,STYPE> v, svec<LANES,bool> mask) { \
svec_scatter_base_offsets((STYPE*)p, sizeof(STYPE), svec<LANES,int32_t>(0,1,2,3), v, mask); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define MASKED_LOAD_STORE_L8 (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_masked_load(svec<LANES,STYPE> *p, svec<LANES,bool> mask) { \
return svec_gather_base_offsets((STYPE*)p, sizeof(STYPE), svec<LANES,int32_t>(0,1,2,3,4,5,6,7), mask); \
} \
static FORCEINLINE void svec_masked_store(svec<LANES,STYPE> *p, svec<LANES,STYPE> v, svec<LANES,bool> mask) { \
svec_scatter_base_offsets((STYPE*)p, sizeof(STYPE), svec<LANES,int32_t>(0,1,2,3,4,5,6,7), v, mask); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define MVEC_CLASS_METHOD_IMPL (   STYPE)
Value:
FORCEINLINE svec<LANES,STYPE> svec<LANES,STYPE>::load(svec<LANES,STYPE>* p){ return svec_load(p); } \\
FORCEINLINE void svec<LANES,STYPE>::store(svec<LANES,STYPE>* p){ svec_store(p, *this); }
#define FORCEINLINE
Definition: gsimd_utility.h:175

mask class's class method impl

#define NOT_IMPLEMENTED (   msg)
Value:
std::cout << "WARNING: operation " << msg << " is not implemented yet" << std::endl; \
assert(false);
#define POST_ALIGN (   x)    __attribute__ ((aligned(x)))
#define PRE_ALIGN (   x)
#define ROTATE (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_rotate(svec<LANES,STYPE> v, int index) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "rotate"); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = v[(i+index) & (LANES-1)];} \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macro for rotate method implementation

#define ROTATE_L4 (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_rotate(svec<LANES,STYPE> v, int index) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "rotate"); \
svec<LANES,STYPE> ret (v[(0+index) & 0x3], \
v[(1+index) & 0x3], \
v[(2+index) & 0x3], \
v[(3+index) & 0x3]); \
return ret; \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macro for rotate method implementation

#define SCATTER_BASE_OFFSETS (   STYPE,
  OSTYPE 
)
Value:
FORCEINLINE void svec_scatter_base_offsets(STYPE* b, uint32_t scale, svec<LANES,OSTYPE> offsets, svec<LANES,STYPE> val, svec<LANES,bool> mask) { \
for(int i=0;i<LANES;++i){if(mask[i]){*(STYPE*)((uint8_t*)b + scale * offsets[i]) = val[i];}}\
INC_STATS_NAME(STATS_SCATTER_SLOW,1,"scatter offset svec<LANES,"#STYPE">"); \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

@ macros for generic impl of scatter base offsets

#define SCATTER_BASE_OFFSETS_L4 (   STYPE,
  OSTYPE 
)
Value:
FORCEINLINE void svec_scatter_base_offsets(STYPE* b, uint32_t scale, svec<LANES,OSTYPE> offsets, svec<LANES,STYPE> val, svec<LANES,bool> mask) { \
lScatterBaseOffsets<STYPE, svec<LANES,OSTYPE>, svec<LANES,STYPE>, svec<LANES,bool> >((uint8_t*)b, scale, offsets, val, mask); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175

@ macros for generic impl of scatter base offsets

#define SCATTER_GENERAL (   STYPE,
  PSTYPE 
)
Value:
static FORCEINLINE void svec_scatter(svec<LANES,PSTYPE> ptrs, svec<LANES,STYPE> val, svec<LANES,bool> mask) { \
for(int i = 0; i < LANES; ++i) { if(mask[i]){ *((STYPE*)ptrs[i]) = val[i];} } \
INC_STATS_NAME(STATS_SCATTER_SLOW,1, "scatter general svec<LANES,"#STYPE">"); \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define SCATTER_GENERAL_L4 (   STYPE,
  PSTYPE 
)
Value:
static FORCEINLINE void svec_scatter(svec<LANES,PSTYPE> ptrs, svec<LANES,STYPE> val, svec<LANES,bool> mask) { \
lScatterGeneral<STYPE, svec<LANES,PSTYPE>, svec<LANES,STYPE>, svec<LANES,bool> >(ptrs, val, mask); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define SCATTER_STRIDE (   STYPE,
  OSTYPE 
)
Value:
FORCEINLINE void svec_scatter_stride(STYPE* b, OSTYPE o, OSTYPE s, svec<LANES,STYPE> val) { \
b += o; \
for(int i = 0; i < LANES; ++i, b+=s) { \
*b = svec_extract(val, i); \
}\
INC_STATS_NAME(STATS_SCATTER_SLOW,1, "scatter stride general svec<LANES,"#STYPE">"); \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define SCATTER_STRIDE_L4 (   STYPE,
  OSTYPE 
)
Value:
FORCEINLINE void svec_scatter_stride(STYPE* b, OSTYPE o, OSTYPE s, svec<LANES,STYPE> val) { \
int64_t off = (int64_t)o; int64_t stride = (int64_t)s;\
OSTYPE stride2 = stride * 2; \
*(b + off) = svec_extract(val, 0); \
*(b + off + stride) = svec_extract(val, 1); \
*(b + off + stride2) = svec_extract(val, 2); \
*(b + off + stride2 + stride) = svec_extract(val, 3); \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define SELECT (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_select(svec<LANES,bool> mask, svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
svec<LANES,STYPE> ret; \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "select:svec_"#STYPE); \
for (int i = 0; i < LANES; ++i) {ret[i] = mask[i] ? a[i] : b[i];} \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
svec< 4, bool > svec_select(svec< 4, bool > mask, svec< 4, bool > a, svec< 4, bool > b)
construct c by selecting elements from two input vectors according to the mask
Definition: power_vsx4.h:1126
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for svec's select by mask vector method generic implementation

#define SELECT_BOOLCOND (   STYPE)
Value:
FORCEINLINE svec<LANES,STYPE> svec_select(bool cond, svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
return cond ? a : b; \
}
svec< 4, bool > svec_select(svec< 4, bool > mask, svec< 4, bool > a, svec< 4, bool > b)
construct c by selecting elements from two input vectors according to the mask
Definition: power_vsx4.h:1126
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for svec's select by bool scalar method implementation

#define SHUFFLES (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_shuffle(svec<LANES,STYPE> v, svec<LANES,int32_t> index) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "shuffle"); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = v[index[i] & (LANES-1)]; }\
return ret; \
} \
static FORCEINLINE svec<LANES,STYPE> svec_shuffle2(svec<LANES,STYPE> v0, svec<LANES,STYPE> v1, svec<LANES,int32_t> index) { \
svec<LANES,STYPE> ret; \
NOT_IMPLEMENTED("shuffle 2"); \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define NOT_IMPLEMENTED(msg)
Definition: gsimd_utility.h:160
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macro for shuffle/shuffle2 methods implementation

#define SHUFFLES_L4 (   STYPE)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_shuffle(svec<LANES,STYPE> v, svec<LANES,int32_t> index) { \
INC_STATS_NAME(STATS_OTHER_SLOW, 1, "shuffle"); \
svec<LANES,STYPE> ret (v[index[0] & 0x3], \
v[index[1] & 0x3], \
v[index[2] & 0x3], \
v[index[3] & 0x3] ); \
return ret; \
} \
static FORCEINLINE svec<LANES,STYPE> svec_shuffle2(svec<LANES,STYPE> v0, svec<LANES,STYPE> v1, svec<LANES,int32_t> index) { \
svec<LANES,STYPE> ret; \
NOT_IMPLEMENTED("shuffle 2"); \
return ret; \
}
#define NOT_IMPLEMENTED(msg)
Definition: gsimd_utility.h:160
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175

macro for shuffle/shuffle2 methods implementation

#define SUBSCRIPT_FUNC_BOOL_DECL (   STYPE)
Value:
\
struct Helper { \
int m_index; svec<LANES,bool> *m_self; \
FORCEINLINE Helper(svec<LANES,bool> *p_vec, int index): m_self(p_vec), m_index(index) {} \
FORCEINLINE void operator=(STYPE value); \
FORCEINLINE void operator=(Helper helper); \
FORCEINLINE operator STYPE() const; \
}; \
FORCEINLINE Helper operator[](int index) { return Helper(this, index);} \
const FORCEINLINE STYPE operator[](int index) const;
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define SUBSCRIPT_FUNC_DECL (   STYPE)
Value:
FORCEINLINE STYPE& operator[](int index); \
const FORCEINLINE STYPE operator[](int index) const;
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros to define a intrinsic based subscript opertor

#define SUBSCRIPT_FUNC_IMPL (   STYPE)
Value:
FORCEINLINE STYPE& svec<LANES,STYPE>::operator[](int index) { \
INC_STATS_NAME(STATS_INSERT, 1, "insert "#STYPE); \
return v[index]; \
} \
const FORCEINLINE STYPE svec<LANES,STYPE>::operator[](int index) const { \
return v[index]; \
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define SVEC_BOOL_CLASS_METHOD_DECL ( )
Value:
FORCEINLINE svec<LANES,bool> operator==(svec<LANES,bool> a); \
FORCEINLINE svec<LANES,bool> operator!=(svec<LANES,bool> a); \
static FORCEINLINE svec<LANES,bool> load(svec<LANES,bool>* p); \
FORCEINLINE void store(svec<LANES,bool>* p); \
FORCEINLINE bool any_true(); \
FORCEINLINE bool all_true(); \
FORCEINLINE bool none_true(); \
FORCEINLINE svec<LANES,bool> operator|(svec<LANES,bool>); \
FORCEINLINE svec<LANES,bool> operator&(svec<LANES,bool> a); \
FORCEINLINE svec<LANES,bool> operator^(svec<LANES,bool> a); \
FORCEINLINE svec<LANES,bool> operator~(); \
FORCEINLINE svec<LANES,bool> operator!(); \
FORCEINLINE svec<LANES,bool> operator&&(svec<LANES,bool> a); \
FORCEINLINE svec<LANES,bool> operator||(svec<LANES,bool> a);
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for svec<N,bool> class's class method

#define TERNERY (   STYPE)
Value:
\
FORCEINLINE svec<LANES,STYPE> svec_madd(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
svec<LANES,STYPE> res; \
for(int i = 0; i < LANES; ++i) { res[i] = a[i]*b[i]+c[i]; } \
return res; \
} \ \
FORCEINLINE svec<LANES,STYPE> svec_msub(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
svec<LANES,STYPE> res; \
for(int i = 0; i < LANES; ++i) { res[i] = a[i]*b[i]-c[i]; } \
return res; \
} \ \
FORCEINLINE svec<LANES,STYPE> svec_nmsub(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
svec<LANES,STYPE> res; \
for(int i = 0; i < LANES; ++i) { res[i] = -(a[i]*b[i]-c[i]); } \
return res; \
}
#define LANES
Definition: power_vsx4.h:119
svec< 4, int32_t > svec_madd(svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
vector multiply and add operation. return a * b + c.
Definition: power_vsx4.h:2802
svec< 4, int32_t > svec_nmsub(svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
vector multiply and add operation. return - ( a * b - c).
Definition: power_vsx4.h:2802
svec< 4, int32_t > svec_msub(svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
vector multiply and add operation. return a * b - c.
Definition: power_vsx4.h:2802
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define TERNERY_L4 (   STYPE)
Value:
\
FORCEINLINE svec<LANES,STYPE> svec_madd(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
svec<LANES,STYPE> ret(svec_extract(a, 0) * svec_extract(b, 0) + svec_extract(c, 0),\
svec_extract(a, 1) * svec_extract(b, 1) + svec_extract(c, 1),\
svec_extract(a, 2) * svec_extract(b, 2) + svec_extract(c, 2),\
svec_extract(a, 3) * svec_extract(b, 3) + svec_extract(c, 3));\
return ret; \
} \ \
FORCEINLINE svec<LANES,STYPE> svec_msub(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
svec<LANES,STYPE> ret(svec_extract(a, 0) * svec_extract(b, 0) - svec_extract(c, 0),\
svec_extract(a, 1) * svec_extract(b, 1) - svec_extract(c, 1),\
svec_extract(a, 2) * svec_extract(b, 2) - svec_extract(c, 2),\
svec_extract(a, 3) * svec_extract(b, 3) - svec_extract(c, 3));\
return ret; \
} \ \
FORCEINLINE svec<LANES,STYPE> svec_nmsub(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
svec<LANES,STYPE> ret(- (svec_extract(a, 0) * svec_extract(b, 0) - svec_extract(c, 0)),\
- (svec_extract(a, 1) * svec_extract(b, 1) - svec_extract(c, 1)),\
- (svec_extract(a, 2) * svec_extract(b, 2) - svec_extract(c, 2)),\
- (svec_extract(a, 3) * svec_extract(b, 3) - svec_extract(c, 3)));\
return ret; \
}
svec< 4, int32_t > svec_madd(svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
vector multiply and add operation. return a * b + c.
Definition: power_vsx4.h:2802
svec< 4, int32_t > svec_nmsub(svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
vector multiply and add operation. return - ( a * b - c).
Definition: power_vsx4.h:2802
svec< 4, int32_t > svec_msub(svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
vector multiply and add operation. return a * b - c.
Definition: power_vsx4.h:2802
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define UNARY_OP (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> v) { \
INC_STATS_NAME(STATS_UNARY_SLOW, 1, #OP); \
svec<LANES,STYPE> ret; \
for (int i = 0; i < LANES; ++i) { ret[i] = OP(v[i]); } \
return ret; \
}
#define LANES
Definition: power_vsx4.h:119
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define UNARY_OP_L4 (   STYPE,
  NAME,
  OP 
)
Value:
static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> v) { \
INC_STATS_NAME(STATS_UNARY_SLOW, 1, #OP); \
return svec<LANES,STYPE>(OP(svec_extract(v, 0)),\
OP(svec_extract(v, 1)),\
OP(svec_extract(v, 2)),\
OP(svec_extract(v, 3)));\
}
#define INC_STATS_NAME(stat, inc, opname)
Definition: gsimd_utility.h:156
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define VEC_BIN_DECL (   STYPE)
Value:
FORCEINLINE svec<LANES,STYPE> operator+(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,STYPE> operator+(STYPE s); \
FORCEINLINE svec<LANES,STYPE> operator-(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,STYPE> operator-(STYPE s); \
FORCEINLINE svec<LANES,STYPE> operator*(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,STYPE> operator*(STYPE s); \
FORCEINLINE svec<LANES,STYPE> operator/(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,STYPE> operator/(STYPE s);
svec< 4, int8_t > operator/(int8_t s, svec< 4, int8_t > a)
Divide a scalar by a vector.
Definition: power_vsx4.h:3813
svec< 4, int8_t > operator*(int8_t s, svec< 4, int8_t > a)
Multiply a scalar and a vector.
Definition: power_vsx4.h:3813
svec< 4, int8_t > operator+(int8_t s, svec< 4, int8_t > a)
Add a scalar and a vector.
Definition: power_vsx4.h:3813
svec< 4, int8_t > operator-(int8_t s, svec< 4, int8_t > a)
Sub a scalar and a vector.
Definition: power_vsx4.h:3813
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for binary operations.

#define VEC_BIN_IMPL (   STYPE)
#define VEC_CLASS_METHOD_DECL (   STYPE)
Value:
VEC_BIN_DECL(STYPE);\
static FORCEINLINE svec<LANES,STYPE> load(svec<LANES,STYPE>* p); \
FORCEINLINE void store(svec<LANES,STYPE>* p); \
static FORCEINLINE svec<LANES,STYPE> masked_load(svec<LANES,STYPE>* p, svec<LANES,bool> mask); \
FORCEINLINE void masked_store(svec<LANES,STYPE>* p, svec<LANES,bool> mask); \
static FORCEINLINE svec<LANES,STYPE> load_const(const STYPE* p); \
static FORCEINLINE svec<LANES,STYPE> load_and_splat(STYPE* p); \
static FORCEINLINE svec<LANES,STYPE> gather(svec<LANES,void*> ptrs, svec<LANES,bool> mask);\
FORCEINLINE void scatter(svec<LANES,void*> ptrs, svec<LANES,bool> mask); \
static FORCEINLINE svec<LANES,STYPE> gather_base_offsets(STYPE* b, uint32_t scale, svec<LANES,int32_t> offsets, svec<LANES,bool> mask);\
static FORCEINLINE svec<LANES,STYPE> gather_base_offsets(STYPE* b, uint32_t scale, svec<LANES,int64_t> offsets, svec<LANES,bool> mask);\
FORCEINLINE void scatter_base_offsets(STYPE* b, uint32_t scale, svec<LANES,int32_t> offsets, svec<LANES,bool> mask); \
FORCEINLINE void scatter_base_offsets(STYPE* b, uint32_t scale, svec<LANES,int64_t> offsets, svec<LANES,bool> mask); \
static FORCEINLINE svec<LANES,STYPE> gather_stride(STYPE* b, int32_t off, int32_t stride);\
static FORCEINLINE svec<LANES,STYPE> gather_stride(STYPE* b, int64_t off, int64_t stride);\
FORCEINLINE void scatter_stride(STYPE* b, int32_t off, int32_t stride); \
FORCEINLINE void scatter_stride(STYPE* b, int64_t off, int64_t stride); \
FORCEINLINE svec<LANES,STYPE> broadcast(int32_t index); \
FORCEINLINE svec<LANES,STYPE> rotate(int32_t index); \
FORCEINLINE svec<LANES,STYPE> shuffle(svec<LANES, int32_t> index); \
FORCEINLINE svec<LANES,STYPE> abs();
#define VEC_UNARY_DECL(STYPE)
macros for unary. note &quot;-&quot; means neg or complement
Definition: gsimd_utility.h:307
#define VEC_CMP_DECL(STYPE)
macros to define compare methods == and != are available for all the types.
Definition: gsimd_utility.h:296
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define VEC_BIN_DECL(STYPE)
macros for binary operations.
Definition: gsimd_utility.h:316

macros for non-mask i8 - double types's method

#define VEC_CLASS_METHOD_IMPL (   STYPE)
#define VEC_CMP_DECL (   STYPE)
Value:
FORCEINLINE svec<LANES,bool> operator==(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,bool> operator!=(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,bool> operator<(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,bool> operator<=(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,bool> operator>(svec<LANES,STYPE> a); \
FORCEINLINE svec<LANES,bool> operator>=(svec<LANES,STYPE> a); \
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros to define compare methods == and != are available for all the types.

#define VEC_CMP_IMPL (   STYPE)

Below I use macros to declare all vector operators

#define VEC_FLOAT_CLASS_METHOD_DECL (   STYPE)
Value:
FORCEINLINE svec<LANES,STYPE> round(); \
FORCEINLINE svec<LANES,STYPE> floor(); \
FORCEINLINE svec<LANES,STYPE> ceil(); \
FORCEINLINE svec<LANES,STYPE> sqrt(); \
FORCEINLINE svec<LANES,STYPE> rcp(); \
FORCEINLINE svec<LANES,STYPE> rsqrt(); \
FORCEINLINE svec<LANES,STYPE> exp(); \
FORCEINLINE svec<LANES,STYPE> log(); \
FORCEINLINE svec<LANES,STYPE> pow(svec<LANES,STYPE> a);
#define FORCEINLINE
Definition: gsimd_utility.h:175

brief macros for float/double math unary operations

#define VEC_FLOAT_CLASS_METHOD_IMPL (   STYPE)
#define VEC_INT_CLASS_METHOD_DECL (   STYPE,
  USTYPE 
)
Value:
FORCEINLINE svec<LANES, STYPE> operator|(svec<LANES, STYPE> a); \
FORCEINLINE svec<LANES, STYPE> operator&(svec<LANES, STYPE> a); \
FORCEINLINE svec<LANES, STYPE> operator^(svec<LANES, STYPE> a); \
FORCEINLINE svec<LANES, STYPE> operator<<(svec<LANES, USTYPE> a); \
FORCEINLINE svec<LANES, STYPE> operator<<(int32_t s); \
FORCEINLINE svec<LANES, STYPE> operator>>(svec<LANES, USTYPE> a); \
FORCEINLINE svec<LANES, STYPE> operator>>(int32_t s); \
FORCEINLINE svec<LANES, STYPE> operator%(svec<LANES, STYPE> a); \
FORCEINLINE svec<LANES, STYPE> operator%(STYPE s);
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros method definition for integer vector only Note: shift's operator can only be unsigned vector

#define VEC_INT_CLASS_METHOD_IMPL (   STYPE,
  STYPE2 
)
#define VEC_UNARY_DECL (   STYPE)
Value:
FORCEINLINE svec<LANES,STYPE> operator-(); \
FORCEINLINE STYPE reduce_add(); \
FORCEINLINE STYPE reduce_max(); \
FORCEINLINE STYPE reduce_min();
svec< 4, int8_t > operator-(int8_t s, svec< 4, int8_t > a)
Sub a scalar and a vector.
Definition: power_vsx4.h:3813
#define FORCEINLINE
Definition: gsimd_utility.h:175

macros for unary. note "-" means neg or complement

#define VEC_UNARY_IMPL (   STYPE)
Value:
FORCEINLINE svec<LANES,STYPE> svec<LANES,STYPE>::operator-() {return svec_neg(*this); } \\
FORCEINLINE STYPE svec<LANES,STYPE>::reduce_add() {return svec_reduce_add(*this); } \\
FORCEINLINE STYPE svec<LANES,STYPE>::reduce_max() {return svec_reduce_max(*this); } \\
FORCEINLINE STYPE svec<LANES,STYPE>::reduce_min() {return svec_reduce_min(*this); }
svec< 4, int8_t > operator-(int8_t s, svec< 4, int8_t > a)
Sub a scalar and a vector.
Definition: power_vsx4.h:3813
#define FORCEINLINE
Definition: gsimd_utility.h:175
#define ZERO (   STYPE,
  NAME 
)
Value:
static FORCEINLINE svec<LANES,STYPE> svec_zero(svec<LANES,STYPE>) { \
svec<LANES,STYPE> ret(0,0,0,0); \
return ret; \
}
#define FORCEINLINE
Definition: gsimd_utility.h:175

macro for setzero method implementation

Typedef Documentation

typedef double svec1_d
typedef float svec1_f
typedef int16_t svec1_i16
typedef int32_t svec1_i32
typedef int64_t svec1_i64
typedef int8_t svec1_i8
typedef uint16_t svec1_u16
typedef uint32_t svec1_u32
typedef uint64_t svec1_u64
typedef uint8_t svec1_u8

LANES=1 short vector definition for SIMD Generic Interfaces

Function Documentation

template<int N>
const bool check_lanes ( int  n)
template<>
const bool check_lanes< 16 > ( int  n)
template<>
const bool check_lanes< 2 > ( int  n)
template<>
const bool check_lanes< 4 > ( int  n)
template<>
const bool check_lanes< 8 > ( int  n)
template<typename T >
const char* iu_get_type_name ( )
template<>
const char* iu_get_type_name< double > ( )
template<>
const char* iu_get_type_name< float > ( )
template<>
const char* iu_get_type_name< int16_t > ( )
template<>
const char* iu_get_type_name< int32_t > ( )
template<>
const char* iu_get_type_name< int64_t > ( )
template<>
const char* iu_get_type_name< int8_t > ( )
template<>
const char* iu_get_type_name< uint16_t > ( )
template<>
const char* iu_get_type_name< uint32_t > ( )
template<>
const char* iu_get_type_name< uint64_t > ( )
template<>
const char* iu_get_type_name< uint8_t > ( )
template<typename T >
void stdout_scalar ( std::ostream &  out,
v 
)
template<>
void stdout_scalar< int8_t > ( std::ostream &  out,
int8_t  v 
)
template<>
void stdout_scalar< uint8_t > ( std::ostream &  out,
uint8_t  v 
)