|
Generic SIMD Intrinsic Library API
0.6
|
#include <stdint.h>#include <iostream>Go to the source code of this file.
Classes | |
| struct | invalid_template_arguments< Lanes, T > |
| union | BitcastUnion |
Macros | |
| #define | DUMP(v) std::cout << #v << ":" << (v) << std::endl |
| #define | IS32BIT |
| #define | INC_STATS_NAME(stat, inc, opname) |
| #define | INC_STATS(stat, inc) |
| #define | NOT_IMPLEMENTED(msg) |
| #define | FORCEINLINE inline __attribute__((always_inline)) |
| #define | PRE_ALIGN(x) |
| #define | POST_ALIGN(x) __attribute__ ((aligned(x))) |
| #define | DEFINE_TYPE_NAME(type, name) template<> FORCEINLINE const char *iu_get_type_name<type>(){return name;} \ |
| #define | SUBSCRIPT_FUNC_DECL(STYPE) |
| macros to define a intrinsic based subscript opertor More... | |
| #define | SUBSCRIPT_FUNC_BOOL_DECL(STYPE) |
| #define | COUT_FUNC_BOOL_DECL() |
| #define | COUT_FUNC_CHAR_DECL(STYPE) |
| #define | COUT_FUNC_DECL(STYPE) |
| #define | VEC_CMP_DECL(STYPE) |
| macros to define compare methods == and != are available for all the types. More... | |
| #define | VEC_UNARY_DECL(STYPE) |
| macros for unary. note "-" means neg or complement More... | |
| #define | VEC_BIN_DECL(STYPE) |
| macros for binary operations. More... | |
| #define | SVEC_BOOL_CLASS_METHOD_DECL() |
| macros for svec<N,bool> class's class method More... | |
| #define | VEC_CLASS_METHOD_DECL(STYPE) |
| macros for non-mask i8 - double types's method More... | |
| #define | VEC_INT_CLASS_METHOD_DECL(STYPE, USTYPE) |
| macros method definition for integer vector only Note: shift's operator can only be unsigned vector More... | |
| #define | VEC_FLOAT_CLASS_METHOD_DECL(STYPE) |
| #define | INSERT_EXTRACT(STYPE) |
| macros for svec's insert extract method implementation The implementation is based on vector type's subscript operator More... | |
| #define | LOAD_STORE(STYPE) |
| #define | SELECT(STYPE) |
| macros for svec's select by mask vector method generic implementation More... | |
| #define | SELECT_BOOLCOND(STYPE) |
| macros for svec's select by bool scalar method implementation More... | |
| #define | BROADCAST(STYPE) |
| macro for broadcast method implementation All broadcast are slow implementation More... | |
| #define | BROADCAST_L4(STYPE) |
| macro for broadcast method implementation for lanes4 All broadcast are slow implementation More... | |
| #define | ROTATE(STYPE) |
| macro for rotate method implementation More... | |
| #define | ROTATE_L4(STYPE) |
| macro for rotate method implementation More... | |
| #define | SHUFFLES(STYPE) |
| macro for shuffle/shuffle2 methods implementation More... | |
| #define | SHUFFLES_L4(STYPE) |
| macro for shuffle/shuffle2 methods implementation More... | |
| #define | ZERO(STYPE, NAME) |
| macro for setzero method implementation More... | |
| #define | LOAD_CONST(STYPE) |
| #define | GATHER_GENERAL(STYPE, PSTYPE) |
| slow implementation of gather general Must use template to specify the return type More... | |
| #define | GATHER_GENERAL_L4(STYPE, PSTYPE) |
| slow implementation of gather general Must use template to specify the return type More... | |
| #define | GATHER_BASE_OFFSETS(STYPE, OSTYPE) |
| #define | GATHER_BASE_OFFSETS_L4(STYPE, OSTYPE) |
| #define | GATHER_STRIDE(STYPE, OSTYPE) |
| macros for general impl of gather base step More... | |
| #define | GATHER_STRIDE_L4(STYPE, OSTYPE) |
| macros for fast impl of gather base step More... | |
| #define | SCATTER_STRIDE(STYPE, OSTYPE) |
| #define | SCATTER_STRIDE_L4(STYPE, OSTYPE) |
| #define | SCATTER_GENERAL(STYPE, PSTYPE) |
| #define | SCATTER_GENERAL_L4(STYPE, PSTYPE) |
| #define | SCATTER_BASE_OFFSETS(STYPE, OSTYPE) |
| #define | SCATTER_BASE_OFFSETS_L4(STYPE, OSTYPE) |
| #define | MASKED_LOAD_STORE_L4(STYPE) |
| #define | MASKED_LOAD_STORE_L8(STYPE) |
| #define | UNARY_OP(STYPE, NAME, OP) |
| #define | UNARY_OP_L4(STYPE, NAME, OP) |
| #define | BINARY_OP(STYPE, NAME, OP) |
| macros for generic slow impl of binary operation More... | |
| #define | BINARY_OP2(STYPE, STYPE2, NAME, OP) |
| #define | BINARY_OP_FUNC(STYPE, NAME, FUNC) |
| #define | BINARY_OP_L4(STYPE, NAME, OP) |
| macros for generic slow imple of binary operation More... | |
| #define | BINARY_OP2_L4(STYPE, STYPE2, NAME, OP) |
| macros for generic slow imple of binary operation, style 2 More... | |
| #define | BINARY_OP_FUNC_L4(STYPE, NAME, FUNC) |
| #define | BINARY_OP_SCALAR_L4(STYPE, STYPE2, NAME, OP) |
| macros for binary: vector op scalar More... | |
| #define | BINARY_OP_SCALAR(STYPE, NAME, OP) |
| macros for binary: vector op scalar More... | |
| #define | BINARY_SHT_SCALAR(STYPE, SHTTYPE, NAME, OP) |
| macros for binary shift: vector op intvalue More... | |
| #define | BINARY_SCALAR_OP(STYPE, NAME, OP) |
| macros for binary: scalar op vector More... | |
| #define | TERNERY(STYPE) |
| #define | TERNERY_L4(STYPE) |
| #define | BINARY_OP_REDUCE_FUNC(STYPE, NAME, FUNC) |
| #define | BINARY_OP_REDUCE_FUNC_L4(STYPE, NAME, FUNC) |
| #define | CMP_OP(STYPE, NAME, OP) |
| macros for binary: vector op scalar More... | |
| #define | CMP_OP_L4(STYPE, NAME, OP) |
| #define | CMP_MASKED_OP(STYPE, NAME, OP) |
| #define | CMP_ALL_NOMASK_OP(STYPE) |
| #define | CMP_ALL_NOMASK_OP_L4(STYPE) |
| #define | CMP_ALL_MASKED_OP(STYPE) |
| #define | CMP_ALL_OP(STYPE) |
| #define | CAST(SFROM, STO) |
| #define | CAST_L4(SFROM, STO) |
| #define | CAST_BITS(SFROM, FROM_F, STO, TO_F) |
| #define | SUBSCRIPT_FUNC_IMPL(STYPE) |
| #define | VEC_CMP_IMPL(STYPE) |
| #define | VEC_UNARY_IMPL(STYPE) |
| #define | VEC_BIN_IMPL(STYPE) |
| #define | MVEC_CLASS_METHOD_IMPL(STYPE) |
| mask class's class method impl More... | |
| #define | VEC_CLASS_METHOD_IMPL(STYPE) |
| #define | VEC_INT_CLASS_METHOD_IMPL(STYPE, STYPE2) |
| #define | VEC_FLOAT_CLASS_METHOD_IMPL(STYPE) |
Typedefs | |
| typedef uint8_t | svec1_u8 |
| typedef int8_t | svec1_i8 |
| typedef uint16_t | svec1_u16 |
| typedef int16_t | svec1_i16 |
| typedef uint32_t | svec1_u32 |
| typedef int32_t | svec1_i32 |
| typedef uint64_t | svec1_u64 |
| typedef int64_t | svec1_i64 |
| typedef float | svec1_f |
| typedef double | svec1_d |
Functions | |
| template<typename T > | |
| const char * | iu_get_type_name () |
| template<> | |
| const char * | iu_get_type_name< int8_t > () |
| template<> | |
| const char * | iu_get_type_name< uint8_t > () |
| template<> | |
| const char * | iu_get_type_name< int16_t > () |
| template<> | |
| const char * | iu_get_type_name< uint16_t > () |
| template<> | |
| const char * | iu_get_type_name< int32_t > () |
| template<> | |
| const char * | iu_get_type_name< uint32_t > () |
| template<> | |
| const char * | iu_get_type_name< int64_t > () |
| template<> | |
| const char * | iu_get_type_name< uint64_t > () |
| template<> | |
| const char * | iu_get_type_name< float > () |
| template<> | |
| const char * | iu_get_type_name< double > () |
| template<typename T > | |
| void | stdout_scalar (std::ostream &out, T v) |
| template<> | |
| void | stdout_scalar< int8_t > (std::ostream &out, int8_t v) |
| template<> | |
| void | stdout_scalar< uint8_t > (std::ostream &out, uint8_t v) |
| template<int N> | |
| const bool | check_lanes (int n) |
| template<> | |
| const bool | check_lanes< 2 > (int n) |
| template<> | |
| const bool | check_lanes< 4 > (int n) |
| template<> | |
| const bool | check_lanes< 8 > (int n) |
| template<> | |
| const bool | check_lanes< 16 > (int n) |
| #define BINARY_OP | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
macros for generic slow impl of binary operation
| #define BINARY_OP2 | ( | STYPE, | |
| STYPE2, | |||
| NAME, | |||
| OP | |||
| ) |
| #define BINARY_OP2_L4 | ( | STYPE, | |
| STYPE2, | |||
| NAME, | |||
| OP | |||
| ) |
macros for generic slow imple of binary operation, style 2
| #define BINARY_OP_FUNC | ( | STYPE, | |
| NAME, | |||
| FUNC | |||
| ) |
| #define BINARY_OP_FUNC_L4 | ( | STYPE, | |
| NAME, | |||
| FUNC | |||
| ) |
| #define BINARY_OP_L4 | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
macros for generic slow imple of binary operation
| #define BINARY_OP_REDUCE_FUNC | ( | STYPE, | |
| NAME, | |||
| FUNC | |||
| ) |
| #define BINARY_OP_REDUCE_FUNC_L4 | ( | STYPE, | |
| NAME, | |||
| FUNC | |||
| ) |
| #define BINARY_OP_SCALAR | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
macros for binary: vector op scalar
| #define BINARY_OP_SCALAR_L4 | ( | STYPE, | |
| STYPE2, | |||
| NAME, | |||
| OP | |||
| ) |
macros for binary: vector op scalar
| #define BINARY_SCALAR_OP | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
macros for binary: scalar op vector
| #define BINARY_SHT_SCALAR | ( | STYPE, | |
| SHTTYPE, | |||
| NAME, | |||
| OP | |||
| ) |
macros for binary shift: vector op intvalue
| #define BROADCAST | ( | STYPE | ) |
macro for broadcast method implementation All broadcast are slow implementation
| #define BROADCAST_L4 | ( | STYPE | ) |
macro for broadcast method implementation for lanes4 All broadcast are slow implementation
| #define CAST | ( | SFROM, | |
| STO | |||
| ) |
| #define CAST_BITS | ( | SFROM, | |
| FROM_F, | |||
| STO, | |||
| TO_F | |||
| ) |
| #define CAST_L4 | ( | SFROM, | |
| STO | |||
| ) |
| #define CMP_ALL_MASKED_OP | ( | STYPE | ) |
| #define CMP_ALL_NOMASK_OP | ( | STYPE | ) |
| #define CMP_ALL_NOMASK_OP_L4 | ( | STYPE | ) |
| #define CMP_ALL_OP | ( | STYPE | ) |
| #define CMP_MASKED_OP | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
Macros for masked operation based on fast operation
| #define CMP_OP | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
macros for binary: vector op scalar
| #define CMP_OP_L4 | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
| #define COUT_FUNC_BOOL_DECL | ( | ) |
| #define COUT_FUNC_CHAR_DECL | ( | STYPE | ) |
| #define COUT_FUNC_DECL | ( | STYPE | ) |
| #define DEFINE_TYPE_NAME | ( | type, | |
| name | |||
| ) | template<> FORCEINLINE const char *iu_get_type_name<type>(){return name;} \ |
| #define DUMP | ( | v | ) | std::cout << #v << ":" << (v) << std::endl |
Copyright 2012 the Generic SIMD Intrinsic Library project authors. All rights reserved.
Copyright IBM Corp. 2013, 2013. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of IBM Corp. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The original source code covered by the above license above has been modified significantly by IBM Corp. Copyright 2013 the Generic SIMD Intrinsic Library project authors. All rights reserved.
Copyright (c) 2010-2012, Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. gsimd_utility.h
Created on: Jun 12, 2013
Contains the utility functions for intrinsics implementation. Should be included in all intrinsics header files
| #define FORCEINLINE inline __attribute__((always_inline)) |
| #define GATHER_BASE_OFFSETS | ( | STYPE, | |
| OSTYPE | |||
| ) |
| #define GATHER_BASE_OFFSETS_L4 | ( | STYPE, | |
| OSTYPE | |||
| ) |
@ macros for generic impl of gather base offsets
| #define GATHER_GENERAL | ( | STYPE, | |
| PSTYPE | |||
| ) |
slow implementation of gather general Must use template to specify the return type
| mask |
| #define GATHER_GENERAL_L4 | ( | STYPE, | |
| PSTYPE | |||
| ) |
slow implementation of gather general Must use template to specify the return type
| mask |
| #define GATHER_STRIDE | ( | STYPE, | |
| OSTYPE | |||
| ) |
macros for general impl of gather base step
| #define GATHER_STRIDE_L4 | ( | STYPE, | |
| OSTYPE | |||
| ) |
macros for fast impl of gather base step
| #define INC_STATS | ( | stat, | |
| inc | |||
| ) |
| #define INC_STATS_NAME | ( | stat, | |
| inc, | |||
| opname | |||
| ) |
| #define INSERT_EXTRACT | ( | STYPE | ) |
macros for svec's insert extract method implementation The implementation is based on vector type's subscript operator
| #define IS32BIT |
| #define LOAD_CONST | ( | STYPE | ) |
| #define LOAD_STORE | ( | STYPE | ) |
| #define MASKED_LOAD_STORE_L4 | ( | STYPE | ) |
| #define MASKED_LOAD_STORE_L8 | ( | STYPE | ) |
| #define MVEC_CLASS_METHOD_IMPL | ( | STYPE | ) |
mask class's class method impl
| #define NOT_IMPLEMENTED | ( | msg | ) |
| #define POST_ALIGN | ( | x | ) | __attribute__ ((aligned(x))) |
| #define PRE_ALIGN | ( | x | ) |
| #define ROTATE | ( | STYPE | ) |
macro for rotate method implementation
| #define ROTATE_L4 | ( | STYPE | ) |
macro for rotate method implementation
| #define SCATTER_BASE_OFFSETS | ( | STYPE, | |
| OSTYPE | |||
| ) |
@ macros for generic impl of scatter base offsets
| #define SCATTER_BASE_OFFSETS_L4 | ( | STYPE, | |
| OSTYPE | |||
| ) |
@ macros for generic impl of scatter base offsets
| #define SCATTER_GENERAL | ( | STYPE, | |
| PSTYPE | |||
| ) |
| #define SCATTER_GENERAL_L4 | ( | STYPE, | |
| PSTYPE | |||
| ) |
| #define SCATTER_STRIDE | ( | STYPE, | |
| OSTYPE | |||
| ) |
| #define SCATTER_STRIDE_L4 | ( | STYPE, | |
| OSTYPE | |||
| ) |
| #define SELECT | ( | STYPE | ) |
macros for svec's select by mask vector method generic implementation
| #define SELECT_BOOLCOND | ( | STYPE | ) |
macros for svec's select by bool scalar method implementation
| #define SHUFFLES | ( | STYPE | ) |
macro for shuffle/shuffle2 methods implementation
| #define SHUFFLES_L4 | ( | STYPE | ) |
macro for shuffle/shuffle2 methods implementation
| #define SUBSCRIPT_FUNC_BOOL_DECL | ( | STYPE | ) |
| #define SUBSCRIPT_FUNC_DECL | ( | STYPE | ) |
macros to define a intrinsic based subscript opertor
| #define SUBSCRIPT_FUNC_IMPL | ( | STYPE | ) |
| #define SVEC_BOOL_CLASS_METHOD_DECL | ( | ) |
macros for svec<N,bool> class's class method
| #define TERNERY | ( | STYPE | ) |
| #define TERNERY_L4 | ( | STYPE | ) |
| #define UNARY_OP | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
| #define UNARY_OP_L4 | ( | STYPE, | |
| NAME, | |||
| OP | |||
| ) |
| #define VEC_BIN_DECL | ( | STYPE | ) |
macros for binary operations.
| #define VEC_BIN_IMPL | ( | STYPE | ) |
| #define VEC_CLASS_METHOD_DECL | ( | STYPE | ) |
macros for non-mask i8 - double types's method
| #define VEC_CLASS_METHOD_IMPL | ( | STYPE | ) |
| #define VEC_CMP_DECL | ( | STYPE | ) |
macros to define compare methods == and != are available for all the types.
| #define VEC_CMP_IMPL | ( | STYPE | ) |
Below I use macros to declare all vector operators
| #define VEC_FLOAT_CLASS_METHOD_DECL | ( | STYPE | ) |
brief macros for float/double math unary operations
| #define VEC_FLOAT_CLASS_METHOD_IMPL | ( | STYPE | ) |
| #define VEC_INT_CLASS_METHOD_DECL | ( | STYPE, | |
| USTYPE | |||
| ) |
macros method definition for integer vector only Note: shift's operator can only be unsigned vector
| #define VEC_INT_CLASS_METHOD_IMPL | ( | STYPE, | |
| STYPE2 | |||
| ) |
| #define VEC_UNARY_DECL | ( | STYPE | ) |
macros for unary. note "-" means neg or complement
| #define VEC_UNARY_IMPL | ( | STYPE | ) |
| #define ZERO | ( | STYPE, | |
| NAME | |||
| ) |
macro for setzero method implementation
| typedef double svec1_d |
| typedef float svec1_f |
| typedef int16_t svec1_i16 |
| typedef int32_t svec1_i32 |
| typedef int64_t svec1_i64 |
| typedef int8_t svec1_i8 |
| typedef uint16_t svec1_u16 |
| typedef uint32_t svec1_u32 |
| typedef uint64_t svec1_u64 |
| typedef uint8_t svec1_u8 |
LANES=1 short vector definition for SIMD Generic Interfaces
| const bool check_lanes | ( | int | n | ) |
| const bool check_lanes< 16 > | ( | int | n | ) |
| const bool check_lanes< 2 > | ( | int | n | ) |
| const bool check_lanes< 4 > | ( | int | n | ) |
| const bool check_lanes< 8 > | ( | int | n | ) |
| const char* iu_get_type_name | ( | ) |
| const char* iu_get_type_name< double > | ( | ) |
| const char* iu_get_type_name< float > | ( | ) |
| const char* iu_get_type_name< int16_t > | ( | ) |
| const char* iu_get_type_name< int32_t > | ( | ) |
| const char* iu_get_type_name< int64_t > | ( | ) |
| const char* iu_get_type_name< int8_t > | ( | ) |
| const char* iu_get_type_name< uint16_t > | ( | ) |
| const char* iu_get_type_name< uint32_t > | ( | ) |
| const char* iu_get_type_name< uint64_t > | ( | ) |
| const char* iu_get_type_name< uint8_t > | ( | ) |
| void stdout_scalar | ( | std::ostream & | out, |
| T | v | ||
| ) |
| void stdout_scalar< int8_t > | ( | std::ostream & | out, |
| int8_t | v | ||
| ) |
| void stdout_scalar< uint8_t > | ( | std::ostream & | out, |
| uint8_t | v | ||
| ) |
1.8.5