SIMD LANES=4 interfaces implemented by scalar. More...

#include <stdint.h>
#include <math.h>
#include <assert.h>
#include <iostream>
#include <smmintrin.h>
#include <nmmintrin.h>
#include "gsimd_utility.h"

Classes
struct	sse::svec< Lanes, T >

struct	sse::svec< 4, bool >
	Data representation and operations on a vector of 4 boolean values. This is used in predicated vector operations. Specifically the ith value of svec<4,bool> indicates whether the ith lane of a predicated vector operation is enabled or not. More...

struct	sse::svec< 4, bool >::Helper
	A special helper class to support customized subscript[] operations. More...

struct	sse::svec< 4, int8_t >
	data representation and operations on a vector of 4 signed chars. More...

struct	sse::svec< 4, uint8_t >
	data representation and operations on a vector of 4 unsigned chars. More...

struct	sse::svec< 4, int16_t >
	data representation and operations on a vector of 4 signed short. More...

struct	sse::svec< 4, uint16_t >
	data representation and operations on a vector of 4 unsigned short. More...

struct	sse::svec< 4, int32_t >
	data representation and operations on a vector of 4 signed int. More...

struct	sse::svec< 4, uint32_t >
	data representation and operations on a vector of 4 unsigned int. More...

struct	sse::svec< 4, int64_t >
	data representation and operations on a vector of 4 signed long long. More...

struct	sse::svec< 4, uint64_t >
	data representation and operations on a vector of 4 unsigned long long. More...

struct	sse::svec< 4, float >
	data representation and operations on a vector of 4 float. More...

struct	sse::svec< 4, double >
	data representation and operations on a vector of 4 double. More...

struct	sse::svec< 4, void * >
	data representation and operations on a vector of 4 pointers. This is only used in gather and scatter. More...

Namespaces
	sse

Macros
#define	LANES 4

#define	INSERT_EXTRACT_SSE(STYPE)
	macros for svec's insert extract method implementation The implementation is based on vector type's subscript operator More...

#define	INSERT_EXTRACT_SSEOPT(STYPE, FUNC)

#define	INSERT_EXTRACT_SSEOPT64(STYPE, FUNC)

#define	LOAD_CONST_SSE(STYPE)

#define	UNARY_OP_OPT(STYPE, NAME, OP)

#define	UNARY_OP_OPT64(STYPE, NAME, OP)
	macros for 64bit object, i64/u64/double More...

#define	BINARY_OP_OPT_FUNC(STYPE, STYPE2, NAME, FUNC)

#define	BINARY_OP_OPT_FUNC64(STYPE, STYPE2, NAME, FUNC)

#define	BIN_VEC_SCAL(STYPE)

#define	INT_BINARY_OP_METHODS(STYPE)

#define	INT_BINARY_OP_METHODS64(STYPE)

#define	TERNERY_OPT(STYPE)

#define	MAX_MIN_REDUCE_METHODS(STYPE)

#define	CAST_OPT(SFROM, STO)
	cast based on directly change the __mm object type type More...

#define	CAST_OPT64(SFROM, STO)
	cast based on directly change the __vector type More...

#define	CAST_BITS_OPT(SFROM, STO, func)
	cast based on directly change the __vector type More...

#define	CAST_BITS_OPT64(SFROM, STO, func)
	cast based on directly change the __vector type More...

#define	SUBSCRIPT_FUNC_IMPL_SSE(STYPE)
	this macro uses sse specific intrinsics to do extract, insert More...

Functions
svec< 4, bool >	sse::svec_select (svec< 4, bool > mask, svec< 4, bool > a, svec< 4, bool > b)
	construct c by selecting elements from two input vectors according to the mask More...

svec< 4, int8_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, int8_t > a, svec< 4, int8_t > b)
	select of svec<4,int8_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, uint8_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, uint8_t > a, svec< 4, uint8_t > b)
	select of svec<4,uint8_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, int16_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, int16_t > a, svec< 4, int16_t > b)
	select of svec<4,int16_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, uint16_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, uint16_t > a, svec< 4, uint16_t > b)
	select of svec<4,uint16_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, int32_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, int32_t > a, svec< 4, int32_t > b)
	select of svec<4,int32_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, uint32_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, uint32_t > a, svec< 4, uint32_t > b)
	select of svec<4,uint32_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, int64_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, int64_t > a, svec< 4, int64_t > b)
	select of svec<4,int64_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, uint64_t >	sse::svec_select (svec< 4, bool > mask, svec< 4, uint64_t > a, svec< 4, uint64_t > b)
	select of svec<4,uint64_t> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, float >	sse::svec_select (svec< 4, bool > mask, svec< 4, float > a, svec< 4, float > b)
	select of svec<4,float> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, double >	sse::svec_select (svec< 4, bool > mask, svec< 4, double > a, svec< 4, double > b)
	select of svec<4,double> vectors by a mask vector see svec_select(svec<4,bool> mask, svec<4,bool> a, svec<4,bool> b) More...

svec< 4, bool >	sse::svec_select (bool cond, svec< 4, bool > a, svec< 4, bool > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, int8_t >	sse::svec_select (bool cond, svec< 4, int8_t > a, svec< 4, int8_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, uint8_t >	sse::svec_select (bool cond, svec< 4, uint8_t > a, svec< 4, uint8_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, int16_t >	sse::svec_select (bool cond, svec< 4, int16_t > a, svec< 4, int16_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, uint16_t >	sse::svec_select (bool cond, svec< 4, uint16_t > a, svec< 4, uint16_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, int32_t >	sse::svec_select (bool cond, svec< 4, int32_t > a, svec< 4, int32_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, uint32_t >	sse::svec_select (bool cond, svec< 4, uint32_t > a, svec< 4, uint32_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, int64_t >	sse::svec_select (bool cond, svec< 4, int64_t > a, svec< 4, int64_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, uint64_t >	sse::svec_select (bool cond, svec< 4, uint64_t > a, svec< 4, uint64_t > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, float >	sse::svec_select (bool cond, svec< 4, float > a, svec< 4, float > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

svec< 4, double >	sse::svec_select (bool cond, svec< 4, double > a, svec< 4, double > b)
	Select two TYPE vectors by a bool scalar. The same as cond ? a: b. More...

template<>
svec< 4, int8_t >	sse::svec_load_const< svec< 4, int8_t > > (const int8_t *p)

template<>
svec< 4, int8_t >	sse::svec_load_and_splat< svec< 4, int8_t > > (int8_t *p)

template<>
svec< 4, uint8_t >	sse::svec_load_const< svec< 4, uint8_t > > (const uint8_t *p)

template<>
svec< 4, uint8_t >	sse::svec_load_and_splat< svec< 4, uint8_t > > (uint8_t *p)

template<>
svec< 4, int16_t >	sse::svec_load_const< svec< 4, int16_t > > (const int16_t *p)

template<>
svec< 4, int16_t >	sse::svec_load_and_splat< svec< 4, int16_t > > (int16_t *p)

template<>
svec< 4, uint16_t >	sse::svec_load_const< svec< 4, uint16_t > > (const uint16_t *p)

template<>
svec< 4, uint16_t >	sse::svec_load_and_splat< svec< 4, uint16_t > > (uint16_t *p)

template<>
svec< 4, int32_t >	sse::svec_load_const< svec< 4, int32_t > > (const int32_t *p)

template<>
svec< 4, int32_t >	sse::svec_load_and_splat< svec< 4, int32_t > > (int32_t *p)

template<>
svec< 4, uint32_t >	sse::svec_load_const< svec< 4, uint32_t > > (const uint32_t *p)

template<>
svec< 4, uint32_t >	sse::svec_load_and_splat< svec< 4, uint32_t > > (uint32_t *p)

template<>
svec< 4, int64_t >	sse::svec_load_const< svec< 4, int64_t > > (const int64_t *p)

template<>
svec< 4, int64_t >	sse::svec_load_and_splat< svec< 4, int64_t > > (int64_t *p)

template<>
svec< 4, uint64_t >	sse::svec_load_const< svec< 4, uint64_t > > (const uint64_t *p)

template<>
svec< 4, uint64_t >	sse::svec_load_and_splat< svec< 4, uint64_t > > (uint64_t *p)

template<>
svec< 4, float >	sse::svec_load_const< svec< 4, float > > (const float *p)

template<>
svec< 4, float >	sse::svec_load_and_splat< svec< 4, float > > (float *p)

template<>
svec< 4, double >	sse::svec_load_const< svec< 4, double > > (const double *p)

template<>
svec< 4, double >	sse::svec_load_and_splat< svec< 4, double > > (double *p)

svec< 4, int32_t >	sse::svec_madd (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
	vector multiply and add operation. return a * b + c. More...

svec< 4, int32_t >	sse::svec_msub (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
	vector multiply and add operation. return a * b - c. More...

svec< 4, int32_t >	sse::svec_nmsub (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, int32_t > c)
	vector multiply and add operation. return -(a * b - c). More...

svec< 4, uint32_t >	sse::svec_madd (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, uint32_t > c)
	vector multiply and add operation. return a * b + c. More...

svec< 4, uint32_t >	sse::svec_msub (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, uint32_t > c)
	vector multiply and add operation. return a * b - c. More...

svec< 4, uint32_t >	sse::svec_nmsub (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, uint32_t > c)
	vector multiply and add operation. return -(a * b - c). More...

svec< 4, int64_t >	sse::svec_madd (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, int64_t > c)
	vector multiply and add operation. return a * b + c. More...

svec< 4, int64_t >	sse::svec_msub (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, int64_t > c)
	vector multiply and add operation. return a * b - c. More...

svec< 4, int64_t >	sse::svec_nmsub (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, int64_t > c)
	vector multiply and add operation. return -(a * b - c). More...

svec< 4, uint64_t >	sse::svec_madd (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, uint64_t > c)
	vector multiply and add operation. return a * b + c. More...

svec< 4, uint64_t >	sse::svec_msub (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, uint64_t > c)
	vector multiply and add operation. return a * b - c. More...

svec< 4, uint64_t >	sse::svec_nmsub (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, uint64_t > c)
	vector multiply and add operation. return -(a * b - c). More...

svec< 4, float >	sse::svec_madd (svec< 4, float > a, svec< 4, float > b, svec< 4, float > c)
	vector multiply and add operation. return a * b + c. More...

svec< 4, float >	sse::svec_msub (svec< 4, float > a, svec< 4, float > b, svec< 4, float > c)
	vector multiply and add operation. return a * b - c. More...

svec< 4, float >	sse::svec_nmsub (svec< 4, float > a, svec< 4, float > b, svec< 4, float > c)
	vector multiply and add operation. return -(a * b - c). More...

svec< 4, double >	sse::svec_madd (svec< 4, double > a, svec< 4, double > b, svec< 4, double > c)
	vector multiply and add operation. return a * b + c. More...

svec< 4, double >	sse::svec_msub (svec< 4, double > a, svec< 4, double > b, svec< 4, double > c)
	vector multiply and add operation. return a * b - c. More...

svec< 4, double >	sse::svec_nmsub (svec< 4, double > a, svec< 4, double > b, svec< 4, double > c)
	vector multiply and add operation. return -(a * b - c). More...

svec< 4,float >	sse::svec_preduce_add (svec< 4, float > v0, svec< 4, float > v1, svec< 4, float > v2, svec< 4, float > v3)

svec< 4, double >	sse::svec_preduce_add (svec< 4, double > v0, svec< 4, double > v1, svec< 4, double > v2, svec< 4, double > v3)

svec< 4,bool >	sse::svec_masked_equal (svec< 4, int8_t > a, svec< 4, int8_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, int8_t > a, svec< 4, int8_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, int8_t > a, svec< 4, int8_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, int8_t > a, svec< 4, int8_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, int8_t > a, svec< 4, int8_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, int8_t > a, svec< 4, int8_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, uint8_t > a, svec< 4, uint8_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, uint8_t > a, svec< 4, uint8_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, uint8_t > a, svec< 4, uint8_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, uint8_t > a, svec< 4, uint8_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, uint8_t > a, svec< 4, uint8_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, uint8_t > a, svec< 4, uint8_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, int16_t > a, svec< 4, int16_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, int16_t > a, svec< 4, int16_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, int16_t > a, svec< 4, int16_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, int16_t > a, svec< 4, int16_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, int16_t > a, svec< 4, int16_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, int16_t > a, svec< 4, int16_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, uint16_t > a, svec< 4, uint16_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, uint16_t > a, svec< 4, uint16_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, uint16_t > a, svec< 4, uint16_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, uint16_t > a, svec< 4, uint16_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, uint16_t > a, svec< 4, uint16_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, uint16_t > a, svec< 4, uint16_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, int32_t > a, svec< 4, int32_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, uint32_t > a, svec< 4, uint32_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, int64_t > a, svec< 4, int64_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, uint64_t > a, svec< 4, uint64_t > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, float > a, svec< 4, float > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, float > a, svec< 4, float > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, float > a, svec< 4, float > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, float > a, svec< 4, float > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, float > a, svec< 4, float > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, float > a, svec< 4, float > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_equal (svec< 4, double > a, svec< 4, double > b, svec< 4, bool > mask)
	Do equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_not_equal (svec< 4, double > a, svec< 4, double > b, svec< 4, bool > mask)
	Do not_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_than (svec< 4, double > a, svec< 4, double > b, svec< 4, bool > mask)
	Do less_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_less_equal (svec< 4, double > a, svec< 4, double > b, svec< 4, bool > mask)
	Do less_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_than (svec< 4, double > a, svec< 4, double > b, svec< 4, bool > mask)
	Do greater_than operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

svec< 4,bool >	sse::svec_masked_greater_equal (svec< 4, double > a, svec< 4, double > b, svec< 4, bool > mask)
	Do greater_equal operation on a and b with mask * If mask is true, return the compare result, otherwise return false. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,int8_t> type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,uint8_t> type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,int16_t> type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,uint16_t> type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,int32_t> type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,uint32_t> type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, bool > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, bool > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,float> type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, bool > val)
	cast val from svec<4,bool> type to svec<4,float> type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, int8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, uint8_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, int16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, uint16_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, int32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, int32_t > val)
	cast val from svec<4,int32_t> type to svec<4,float> type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, int32_t > val)
	cast val from svec<4,int32_t> type to svec<4,double> type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, uint32_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, int64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, uint64_t > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, float > val)
	cast val from svec<4,float> type to svec<4,int32_t> type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, float > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast< svec< 4, double > > (svec< 4, float > val)
	cast val from svec<4,float> type to svec<4,double> type. More...

template<>
svec< 4, bool >	sse::svec_cast< svec< 4, bool > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int8_t >	sse::svec_cast< svec< 4, int8_t > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint8_t >	sse::svec_cast< svec< 4, uint8_t > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int16_t >	sse::svec_cast< svec< 4, int16_t > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint16_t >	sse::svec_cast< svec< 4, uint16_t > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast< svec< 4, int32_t > > (svec< 4, double > val)
	cast val from svec<4,double> type to svec<4,int32_t> type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast< svec< 4, uint32_t > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast< svec< 4, int64_t > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast< svec< 4, uint64_t > > (svec< 4, double > val)
	cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast< svec< 4, float > > (svec< 4, double > val)
	cast val from svec<4,double> type to svec<4,float> type. More...

template<>
svec< 4, float >	sse::svec_cast_bits< svec< 4, float > > (svec< 4, int32_t > val)
	bit cast val from FROM type to TO type. More...

template<>
svec< 4, float >	sse::svec_cast_bits< svec< 4, float > > (svec< 4, uint32_t > val)
	bit cast val from FROM type to TO type. More...

template<>
svec< 4, int32_t >	sse::svec_cast_bits< svec< 4, int32_t > > (svec< 4, float > val)
	bit cast val from FROM type to TO type. More...

template<>
svec< 4, uint32_t >	sse::svec_cast_bits< svec< 4, uint32_t > > (svec< 4, float > val)
	bit cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast_bits< svec< 4, double > > (svec< 4, int64_t > val)
	bit cast val from FROM type to TO type. More...

template<>
svec< 4, double >	sse::svec_cast_bits< svec< 4, double > > (svec< 4, uint64_t > val)
	bit cast val from FROM type to TO type. More...

template<>
svec< 4, int64_t >	sse::svec_cast_bits< svec< 4, int64_t > > (svec< 4, double > val)
	bit cast val from FROM type to TO type. More...

template<>
svec< 4, uint64_t >	sse::svec_cast_bits< svec< 4, uint64_t > > (svec< 4, double > val)
	bit cast val from FROM type to TO type. More...

svec< 4, int8_t >	sse::operator+ (int8_t s, svec< 4, int8_t > a)
	Add a scalar and a vector. More...

svec< 4, int8_t >	sse::operator- (int8_t s, svec< 4, int8_t > a)
	Sub a scalar and a vector. More...

svec< 4, int8_t >	sse::operator* (int8_t s, svec< 4, int8_t > a)
	Multiply a scalar and a vector. More...

svec< 4, int8_t >	sse::operator/ (int8_t s, svec< 4, int8_t > a)
	Divide a scalar by a vector. More...

svec< 4, uint8_t >	sse::operator+ (uint8_t s, svec< 4, uint8_t > a)
	Add a scalar and a vector. More...

svec< 4, uint8_t >	sse::operator- (uint8_t s, svec< 4, uint8_t > a)
	Sub a scalar and a vector. More...

svec< 4, uint8_t >	sse::operator* (uint8_t s, svec< 4, uint8_t > a)
	Multiply a scalar and a vector. More...

svec< 4, uint8_t >	sse::operator/ (uint8_t s, svec< 4, uint8_t > a)
	Divide a scalar by a vector. More...

svec< 4, int16_t >	sse::operator+ (int16_t s, svec< 4, int16_t > a)
	Add a scalar and a vector. More...

svec< 4, int16_t >	sse::operator- (int16_t s, svec< 4, int16_t > a)
	Sub a scalar and a vector. More...

svec< 4, int16_t >	sse::operator* (int16_t s, svec< 4, int16_t > a)
	Multiply a scalar and a vector. More...

svec< 4, int16_t >	sse::operator/ (int16_t s, svec< 4, int16_t > a)
	Divide a scalar by a vector. More...

svec< 4, uint16_t >	sse::operator+ (uint16_t s, svec< 4, uint16_t > a)
	Add a scalar and a vector. More...

svec< 4, uint16_t >	sse::operator- (uint16_t s, svec< 4, uint16_t > a)
	Sub a scalar and a vector. More...

svec< 4, uint16_t >	sse::operator* (uint16_t s, svec< 4, uint16_t > a)
	Multiply a scalar and a vector. More...

svec< 4, uint16_t >	sse::operator/ (uint16_t s, svec< 4, uint16_t > a)
	Divide a scalar by a vector. More...

svec< 4, int32_t >	sse::operator+ (int32_t s, svec< 4, int32_t > a)
	Add a scalar and a vector. More...

svec< 4, int32_t >	sse::operator- (int32_t s, svec< 4, int32_t > a)
	Sub a scalar and a vector. More...

svec< 4, int32_t >	sse::operator* (int32_t s, svec< 4, int32_t > a)
	Multiply a scalar and a vector. More...

svec< 4, int32_t >	sse::operator/ (int32_t s, svec< 4, int32_t > a)
	Divide a scalar by a vector. More...

svec< 4, uint32_t >	sse::operator+ (uint32_t s, svec< 4, uint32_t > a)
	Add a scalar and a vector. More...

svec< 4, uint32_t >	sse::operator- (uint32_t s, svec< 4, uint32_t > a)
	Sub a scalar and a vector. More...

svec< 4, uint32_t >	sse::operator* (uint32_t s, svec< 4, uint32_t > a)
	Multiply a scalar and a vector. More...

svec< 4, uint32_t >	sse::operator/ (uint32_t s, svec< 4, uint32_t > a)
	Divide a scalar by a vector. More...

svec< 4, int64_t >	sse::operator+ (int64_t s, svec< 4, int64_t > a)
	Add a scalar and a vector. More...

svec< 4, int64_t >	sse::operator- (int64_t s, svec< 4, int64_t > a)
	Sub a scalar and a vector. More...

svec< 4, int64_t >	sse::operator* (int64_t s, svec< 4, int64_t > a)
	Multiply a scalar and a vector. More...

svec< 4, int64_t >	sse::operator/ (int64_t s, svec< 4, int64_t > a)
	Divide a scalar by a vector. More...

svec< 4, uint64_t >	sse::operator+ (uint64_t s, svec< 4, uint64_t > a)
	Add a scalar and a vector. More...

svec< 4, uint64_t >	sse::operator- (uint64_t s, svec< 4, uint64_t > a)
	Sub a scalar and a vector. More...

svec< 4, uint64_t >	sse::operator* (uint64_t s, svec< 4, uint64_t > a)
	Multiply a scalar and a vector. More...

svec< 4, uint64_t >	sse::operator/ (uint64_t s, svec< 4, uint64_t > a)
	Divide a scalar by a vector. More...

svec< 4, float >	sse::operator+ (float s, svec< 4, float > a)
	Add a scalar and a vector. More...

svec< 4, float >	sse::operator- (float s, svec< 4, float > a)
	Sub a scalar and a vector. More...

svec< 4, float >	sse::operator* (float s, svec< 4, float > a)
	Multiply a scalar and a vector. More...

svec< 4, float >	sse::operator/ (float s, svec< 4, float > a)
	Divide a scalar by a vector. More...

svec< 4, double >	sse::operator+ (double s, svec< 4, double > a)
	Add a scalar and a vector. More...

svec< 4, double >	sse::operator- (double s, svec< 4, double > a)
	Sub a scalar and a vector. More...

svec< 4, double >	sse::operator* (double s, svec< 4, double > a)
	Multiply a scalar and a vector. More...

svec< 4, double >	sse::operator/ (double s, svec< 4, double > a)
	Divide a scalar by a vector. More...

Detailed Description

SIMD LANES=4 interfaces implemented by scalar.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

 Redistributions of source code must retain the above copyright
 notice, this list of conditions and the following disclaimer.
 Redistributions in binary form must reproduce the above
 copyright notice, this list of conditions and the following
 disclaimer in the documentation and/or other materials provided
 with the distribution.
 Neither the name of IBM Corp. nor the names of its contributors may be
 used to endorse or promote products derived from this software
 without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Date: July 20, 2013

Author

Haichuan Wang (haichuan@us.ibm.com, hwang154@illinois.edu) The source file is organized as follows I. all types definition, with class function incorporated II. data operation interfaces

load/store
select
broadcast/rotate/shuffle/smear/setzero
gather/scatter
load const, smear const, load and splat
masked load/masked store III. Mask type (i1) interfaces

mask construction
bit operations IV. General data operation interfaces

Unary
Math unary
Binary
Ternary
Compare
Max/Min
Reduce
Cast

The current implementation is based on Intel ISPC SIMD intrinsics. ISPC: http://ispc.github.io/

Macro Definition Documentation

#define BIN_VEC_SCAL ( STYPE )

Value:

static FORCEINLINE svec<LANES,STYPE> svec_add_scalar(svec<LANES,STYPE> a, STYPE s) { \
  return svec_add(a, svec<LANES,STYPE>(s)); \
} \
static FORCEINLINE svec<LANES,STYPE> svec_scalar_add(STYPE s, svec<LANES,STYPE> a) { \
  return svec_add(svec<LANES,STYPE>(s), a); \
} \
static FORCEINLINE svec<LANES,STYPE> svec_sub_scalar(svec<LANES,STYPE> a, STYPE s) { \
  return svec_sub(a, svec<LANES,STYPE>(s)); \
} \
static FORCEINLINE svec<LANES,STYPE> svec_scalar_sub(STYPE s, svec<LANES,STYPE> a) { \
  return svec_sub(svec<LANES,STYPE>(s), a); \
} \
static FORCEINLINE svec<LANES,STYPE> svec_mul_scalar(svec<LANES,STYPE> a, STYPE s) { \
  return svec_mul(a, svec<LANES,STYPE>(s)); \
} \
static FORCEINLINE svec<LANES,STYPE> svec_scalar_mul(STYPE s, svec<LANES,STYPE> a) { \
  return svec_mul(svec<LANES,STYPE>(s), a); \
} \
static FORCEINLINE svec<LANES,STYPE> svec_div_scalar(svec<LANES,STYPE> a, STYPE s) { \
  return svec_div(a, svec<LANES,STYPE>(s)); \
} \
static FORCEINLINE svec<LANES,STYPE> svec_scalar_div(STYPE s, svec<LANES,STYPE> a) { \
  return svec_div(svec<LANES,STYPE>(s), a); \
} \

#define BINARY_OP_OPT_FUNC	(	STYPE,
		STYPE2,
		NAME,
		FUNC
	)

Value:

static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE2> b) { \
  return svec<LANES,STYPE>(FUNC(a.v, b.v)); \
}

#define BINARY_OP_OPT_FUNC64	(	STYPE,
		STYPE2,
		NAME,
		FUNC
	)

Value:

static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a, svec<LANES,STYPE> b) { \
  return svec<LANES,STYPE>(FUNC(a.v[0], b.v[0]), FUNC(a.v[1], b.v[1])); \
}

#define CAST_BITS_OPT	(	SFROM,
		STO,
		func
	)

Value:

template <class T> static T svec_cast_bits(svec<LANES,SFROM> val);     \ \
template <> FORCEINLINE svec<LANES,STO> svec_cast_bits<svec<LANES,STO> >(svec<LANES,SFROM> val) {      \
    return svec<LANES,STO>(func(val.v)); \
}

cast based on directly change the __vector type

#define CAST_BITS_OPT64	(	SFROM,
		STO,
		func
	)

Value:

template <class T> static T svec_cast_bits(svec<LANES,SFROM> val);     \ \
template <> FORCEINLINE svec<LANES,STO> svec_cast_bits<svec<LANES,STO> >(svec<LANES,SFROM> val) {      \
    return svec<LANES,STO>(func(val.v[0]), func(val.v[1])); \
}

cast based on directly change the __vector type

#define CAST_OPT	(	SFROM,
		STO
	)

Value:

template <class T> static T svec_cast(svec<LANES,SFROM> val);     \ \
template <> FORCEINLINE svec<LANES,STO> svec_cast<svec<LANES,STO> >(svec<LANES,SFROM> val) {      \
    return svec<LANES,STO>((val.v)); \
}

cast based on directly change the __mm object type type

Here we provide the full cast combinations. Some may have fast impl

#define CAST_OPT64	(	SFROM,
		STO
	)

Value:

template <class T> static T svec_cast(svec<LANES,SFROM> val);     \ \
template <> FORCEINLINE svec<LANES,STO> svec_cast<svec<LANES,STO> >(svec<LANES,SFROM> val) {      \
    return svec<LANES,STO>((val.v[0]),(val.v[1])); \
}

cast based on directly change the __vector type

#define INSERT_EXTRACT_SSE ( STYPE )

Value:

static FORCEINLINE STYPE svec_extract(svec<LANES,STYPE> v, int index) {    \
    return ((STYPE*)&v)[index];                      \
  }                                          \
  static FORCEINLINE void svec_insert(svec<LANES,STYPE> *v, int index, STYPE val) { \
    ((STYPE*)v)[index] = val;                      \
  }

macros for svec's insert extract method implementation The implementation is based on vector type's subscript operator

#define INSERT_EXTRACT_SSEOPT	(	STYPE,
		FUNC
	)

Value:

static FORCEINLINE STYPE svec_extract(svec<LANES,STYPE> v, int index) {    \
    if(__builtin_constant_p(index) && index >=0 && index < 4) { \
      return (STYPE)_mm_extract_##FUNC(v.v, index);                \
    } else { \
      return ((STYPE*)&v)[index];  \
    } \
  }                                     \
  static FORCEINLINE void svec_insert(svec<LANES,STYPE> *v, int index, STYPE val) { \
    if(__builtin_constant_p(index) && index >=0 && index < 4) { \
      v->v = _mm_insert_##FUNC(v->v, val, index);                      \
    } else {\
      ((STYPE*)v)[index] = val;               \
    } \
  }

#define INSERT_EXTRACT_SSEOPT64	(	STYPE,
		FUNC
	)

Value:

static FORCEINLINE STYPE svec_extract(svec<LANES,STYPE> v, int index) {    \
    if(__builtin_constant_p(index) && index >=0 && index < 4) { \
      return (STYPE)_mm_extract_##FUNC(v.v[index>>1], index%2);    \
    } else { \
      return ((STYPE*)&v)[index];  \
    } \
  }                                          \
  static FORCEINLINE void svec_insert(svec<LANES,STYPE> *v, int index, STYPE val) { \
    if(__builtin_constant_p(index) && index >=0 && index < 4) { \
      v->v[index>>1] = _mm_insert_##FUNC(v->v[index>>1], val, index%2);      \
    } else { \
      ((STYPE*)v)[index] = val;               \
    } \
  }

#define INT_BINARY_OP_METHODS ( STYPE )

Value:

BINARY_OP_OPT_FUNC(STYPE, STYPE, svec_or, _mm_or_si128); \
BINARY_OP_OPT_FUNC(STYPE, STYPE, svec_and, _mm_and_si128); \
BINARY_OP_OPT_FUNC(STYPE, STYPE, svec_xor, _mm_xor_si128); \
BINARY_OP_L4(STYPE, svec_rem, %); \
BINARY_OP_SCALAR_L4(STYPE, STYPE, svec_rem, %);

#define INT_BINARY_OP_METHODS64 ( STYPE )

Value:

BINARY_OP_OPT_FUNC64(STYPE, STYPE, svec_or, _mm_or_si128); \
BINARY_OP_OPT_FUNC64(STYPE, STYPE, svec_and, _mm_and_si128); \
BINARY_OP_OPT_FUNC64(STYPE, STYPE, svec_xor, _mm_xor_si128); \
BINARY_OP_L4(STYPE, svec_rem, %); \
BINARY_OP_SCALAR_L4(STYPE, STYPE, svec_rem, %);

#define LANES 4

#define LOAD_CONST_SSE ( STYPE )

Value:

template <class RetVecType> static RetVecType svec_load_const(const STYPE* p); \
template<> \
  FORCEINLINE svec<LANES,STYPE> svec_load_const<svec<LANES,STYPE> >(const STYPE* p) { \
    return svec<LANES,STYPE>(*p); \
} \
template <class RetVecType> static RetVecType svec_load_and_splat(STYPE* p); \
template<> \
FORCEINLINE svec<LANES,STYPE> svec_load_and_splat<svec<LANES,STYPE> >(STYPE* p) { \
  return svec<LANES,STYPE>(*p);\
}

#define MAX_MIN_REDUCE_METHODS ( STYPE )

Value:

BINARY_OP_REDUCE_FUNC(STYPE, svec_reduce_add, add<STYPE>); \
BINARY_OP_REDUCE_FUNC(STYPE, svec_reduce_max, max<STYPE>); \
BINARY_OP_REDUCE_FUNC(STYPE, svec_reduce_min, min<STYPE>); \

#define SUBSCRIPT_FUNC_IMPL_SSE ( STYPE )

Value:

FORCEINLINE STYPE& svec<LANES,STYPE>::operator[](int index) { \
  return ((STYPE *)&v)[index];   \
} \
const FORCEINLINE STYPE  svec<LANES,STYPE>::operator[](int index) const { \
  return svec_extract(*this, index); \
}

this macro uses sse specific intrinsics to do extract, insert

#define TERNERY_OPT ( STYPE )

Value:

 \
FORCEINLINE svec<LANES,STYPE> svec_madd(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
  return a * b + c;\
} \ \
FORCEINLINE svec<LANES,STYPE> svec_msub(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
  return a * b - c;\
} \ \
FORCEINLINE svec<LANES,STYPE> svec_nmsub(svec<LANES,STYPE> a, svec<LANES,STYPE> b, svec<LANES,STYPE> c) { \
  return c - a * b ;\
}

#define UNARY_OP_OPT	(	STYPE,
		NAME,
		OP
	)

Value:

static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a) { \
  return OP(a.v); \
}

#define UNARY_OP_OPT64	(	STYPE,
		NAME,
		OP
	)

Value:

static FORCEINLINE svec<LANES,STYPE> NAME(svec<LANES,STYPE> a) { \
  return  svec<LANES,STYPE>(OP(a.v[0]), OP(a.v[1]));  \
}

macros for 64bit object, i64/u64/double

Classes

Namespaces

Macros

Functions

Detailed Description

Macro Definition Documentation