Generic SIMD Intrinsic Library API  0.6
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
generic8.h
Go to the documentation of this file.
1 
101 #ifndef GENERIC8_H_
102 #define GENERIC8_H_
103 
104 #include "gsimd_utility.h"
105 
106 namespace generic {
107 
108 #define LANES 8
109 //
111 // Constructor Section
112 //
114 
115 template <>
116 struct svec<LANES,bool>;
117 template <>
118  struct svec<LANES,int8_t>;
119 template <>
120  struct svec<LANES,uint8_t>;
121 template <>
122  struct svec<LANES,int16_t>;
123 template <>
124  struct svec<LANES,uint16_t>;
125 template <>
126  struct svec<LANES,int32_t>;
127 template <>
128  struct svec<LANES,uint32_t>;
129 template <>
130  struct svec<LANES,int64_t>;
131 template <>
132  struct svec<LANES,uint64_t>;
133 template <>
134  struct svec<LANES,float>;
135 template <>
136  struct svec<LANES,double>;
137 template <>
138  struct svec<LANES,void*>;
139 
148 template<>
149 struct svec<LANES,bool> {
150 
151  uint32_t v; //only use 8 bits
152 
157  FORCEINLINE svec() { v = 0;}
164  FORCEINLINE svec(uint32_t a, uint32_t b, uint32_t c, uint32_t d,
165  uint32_t e, uint32_t f, uint32_t g, uint32_t h) {
166  v = ((a ? 1 : 0) |(b ? 2 : 0)|(c ? 4 : 0)|(d ? 8 : 0)
167  |(e ? 16 : 0) |(f ? 32 : 0)|(g ? 64 : 0)|(h ? 128 : 0) );
168  }
175  FORCEINLINE svec(uint32_t a){
176  v = a ? 255 : 0;
177  }
178 
182 };
183 
184 
188 template <>
189 struct svec<LANES,int8_t> {
190  int8_t v[LANES];
191 
201  FORCEINLINE svec(int8_t a, int8_t b, int8_t c, int8_t d,
202  int8_t e, int8_t f, int8_t g, int8_t h) {
203  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
204  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
205  }
210  FORCEINLINE svec(int8_t a) {
211  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
212  }
217  SUBSCRIPT_FUNC_DECL(int8_t);
219 
220  VEC_CLASS_METHOD_DECL(int8_t);
221  VEC_INT_CLASS_METHOD_DECL(int8_t, uint8_t);
222 };
223 
227 template<>
228 struct svec<LANES,uint8_t> {
229  uint8_t v[LANES];
239  FORCEINLINE svec(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
240  uint8_t e, uint8_t f, uint8_t g, uint8_t h) {
241  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
242  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
243  }
249  FORCEINLINE svec(uint8_t a){
250  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
251  }
256  SUBSCRIPT_FUNC_DECL(uint8_t);
258 
259  VEC_CLASS_METHOD_DECL(uint8_t);
260  VEC_INT_CLASS_METHOD_DECL(uint8_t, uint8_t);
261 };
262 
266 template <>
267 struct svec<LANES,int16_t> {
268  int16_t v[LANES];
278  FORCEINLINE svec(int16_t a, int16_t b, int16_t c, int16_t d,
279  int16_t e, int16_t f, int16_t g, int16_t h) {
280  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
281  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
282  }
288  FORCEINLINE svec( int16_t a) {
289  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
290  }
295  SUBSCRIPT_FUNC_DECL(int16_t);
296  COUT_FUNC_DECL(int16_t);
297 
298  VEC_CLASS_METHOD_DECL(int16_t);
299  VEC_INT_CLASS_METHOD_DECL(int16_t, uint16_t);
300 
301 };
302 
306 template <>
307 struct svec<LANES,uint16_t> {
308  uint16_t v[LANES];
318  FORCEINLINE svec(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
319  uint16_t e, uint16_t f, uint16_t g, uint16_t h) {
320  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
321  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
322  }
328  FORCEINLINE svec( uint16_t a) {
329  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
330  }
335  SUBSCRIPT_FUNC_DECL(uint16_t);
336  COUT_FUNC_DECL(uint16_t);
337 
338  VEC_CLASS_METHOD_DECL(uint16_t);
339  VEC_INT_CLASS_METHOD_DECL(uint16_t, uint16_t);
340 
341 };
342 
346 template <>
347 struct svec<LANES,int32_t> {
348  int32_t v[LANES];
358  FORCEINLINE svec(int a, int b, int c, int d,
359  int e, int f, int g, int h) {
360  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
361  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
362  }
368  FORCEINLINE svec(int32_t a) {
369  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
370  }
375  SUBSCRIPT_FUNC_DECL(int32_t);
376  COUT_FUNC_DECL(int32_t);
377 
378  VEC_CLASS_METHOD_DECL(int32_t);
379  VEC_INT_CLASS_METHOD_DECL(int32_t, uint32_t);
380 
381 };
382 
386 template <>
387 struct svec<LANES,uint32_t> {
388  uint32_t v[LANES];
398  FORCEINLINE svec(uint32_t a, uint32_t b, uint32_t c, uint32_t d,
399  uint32_t e, uint32_t f, uint32_t g, uint32_t h) {
400  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
401  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
402  }
408  FORCEINLINE svec(uint32_t a) {
409  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
410  }
415  SUBSCRIPT_FUNC_DECL(uint32_t);
416  COUT_FUNC_DECL(uint32_t);
417 
418  VEC_CLASS_METHOD_DECL(uint32_t);
419  VEC_INT_CLASS_METHOD_DECL(uint32_t, uint32_t);
420 };
421 
425 template <>
426 struct svec<LANES,int64_t> {
427  int64_t v[LANES];
437  FORCEINLINE svec(int64_t a, int64_t b, int64_t c, int64_t d,
438  int64_t e, int64_t f, int64_t g, int64_t h) {
439  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
440  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
441  }
447  FORCEINLINE svec( int64_t a) {
448  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
449  }
454  SUBSCRIPT_FUNC_DECL(int64_t);
455  COUT_FUNC_DECL(int64_t);
456 
457  VEC_CLASS_METHOD_DECL(int64_t);
458  VEC_INT_CLASS_METHOD_DECL(int64_t, uint64_t);
459 };
460 
464 template <>
465 struct svec<LANES,uint64_t> {
466  uint64_t v[LANES];
476  FORCEINLINE svec(uint64_t a, uint64_t b, uint64_t c, uint64_t d,
477  uint64_t e, uint64_t f, uint64_t g, uint64_t h) {
478  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
479  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
480  }
486  FORCEINLINE svec( uint64_t a) {
487  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
488  }
493  SUBSCRIPT_FUNC_DECL(uint64_t);
494  COUT_FUNC_DECL(uint64_t);
495 
496  VEC_CLASS_METHOD_DECL(uint64_t);
497  VEC_INT_CLASS_METHOD_DECL(uint64_t, uint64_t);
498 };
499 
503 template<>
504 struct svec<LANES,float> {
505  float v[LANES];
515  FORCEINLINE svec(float a, float b, float c, float d,
516  float e, float f, float g, float h) {
517  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
518  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
519  }
525  FORCEINLINE svec( float a) {
526  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
527  }
532  SUBSCRIPT_FUNC_DECL(float);
534 
535  VEC_CLASS_METHOD_DECL(float);
537 };
538 
542 template<>
543 struct svec<LANES,double> {
544  double v[LANES];
554  FORCEINLINE svec(double a, double b, double c, double d,
555  double e, double f, double g, double h) {
556  v[0] = a; v[1] = b; v[2] = c; v[3] = d;
557  v[4] = e; v[5] = f; v[6] = g; v[7] = h;
558  }
564  FORCEINLINE svec( double a) {
565  v[0] = v[1] = v[2] = v[3] = v[4] = v[5] = v[6] = v[7] = a;
566  }
571  SUBSCRIPT_FUNC_DECL(double);
572  COUT_FUNC_DECL(double);
573 
574  VEC_CLASS_METHOD_DECL(double);
576 };
577 
578 
580 //
581 // Data operation interfaces
582 //
584 
585 //
587 //
588 //i1 use different approach
589 static FORCEINLINE uint32_t svec_extract(svec<LANES,bool> v, int index) {
590  return (v.v & (1 << index)) ? -1 : 0;
591 }
592 static FORCEINLINE void svec_insert(svec<LANES,bool> *v, int index, uint32_t val) {
593  if(!val) {
594  v->v &= ~(1 << index);
595  } else {
596  v->v |= (1 << index);
597  }
598 }
599 INSERT_EXTRACT(int8_t);
600 INSERT_EXTRACT(uint8_t);
601 INSERT_EXTRACT(int16_t);
602 INSERT_EXTRACT(uint16_t);
603 INSERT_EXTRACT(int32_t);
604 INSERT_EXTRACT(uint32_t);
605 INSERT_EXTRACT(int64_t);
606 INSERT_EXTRACT(uint64_t);
607 INSERT_EXTRACT(float);
608 INSERT_EXTRACT(double);
609 
610 // 1. Load / Store
611 LOAD_STORE(bool);
612 LOAD_STORE(int8_t);
613 LOAD_STORE(uint8_t);
614 LOAD_STORE(int16_t);
615 LOAD_STORE(uint16_t);
616 LOAD_STORE(int32_t);
617 LOAD_STORE(uint32_t);
618 LOAD_STORE(int64_t);
619 LOAD_STORE(uint64_t);
620 LOAD_STORE(float);
621 LOAD_STORE(double);
622 
623 // 3. Select
624 static FORCEINLINE svec<LANES,bool> svec_select(svec<LANES,bool> mask, svec<LANES,bool> a, svec<LANES,bool> b) {
625  svec<LANES,bool> ret;
626  ret.v = (a.v & mask.v) | (b.v & ~mask.v);
627  return ret;
628 }
629 SELECT(int8_t);
630 SELECT(uint8_t);
631 SELECT(int16_t);
632 SELECT(uint16_t);
633 SELECT(int32_t);
634 SELECT(uint32_t);
635 SELECT(int64_t);
636 SELECT(uint64_t);
637 SELECT(float);
638 SELECT(double);
639 
644 SELECT_BOOLCOND(uint16_t);
646 SELECT_BOOLCOND(uint32_t);
648 SELECT_BOOLCOND(uint64_t);
651 
652 // 4. broadcast/rotate/shuffle/smear/setzero
653 BROADCAST(int8_t);
654 BROADCAST(uint8_t);
655 BROADCAST(int16_t);
656 BROADCAST(uint16_t);
657 BROADCAST(int32_t);
658 BROADCAST(uint32_t);
659 BROADCAST(int64_t);
660 BROADCAST(uint64_t);
661 BROADCAST(float);
662 BROADCAST(double);
663 
664 ROTATE(int8_t);
665 ROTATE(uint8_t);
666 ROTATE(int16_t);
667 ROTATE(uint16_t);
668 ROTATE(int32_t);
669 ROTATE(uint32_t);
670 ROTATE(int64_t);
671 ROTATE(uint64_t);
672 ROTATE(float);
673 ROTATE(double);
674 
675 SHUFFLES(int8_t);
676 SHUFFLES(uint8_t);
677 SHUFFLES(int16_t);
678 SHUFFLES(uint16_t);
679 SHUFFLES(int32_t);
680 SHUFFLES(uint32_t);
681 SHUFFLES(int64_t);
682 SHUFFLES(uint64_t);
683 SHUFFLES(float);
684 SHUFFLES(double);
685 
686 //load const
687 LOAD_CONST(int8_t);
688 LOAD_CONST(uint8_t);
689 LOAD_CONST(int16_t);
690 LOAD_CONST(uint16_t);
691 LOAD_CONST(int32_t);
692 LOAD_CONST(uint32_t);
693 LOAD_CONST(int64_t);
694 LOAD_CONST(uint64_t);
695 LOAD_CONST(float);
696 LOAD_CONST(double);
697 
698 
699 // 5. Gather / Scatter
712 #if defined(__x86_64__) || defined(__PPC64__)
713 template<>
714 struct svec<LANES,void*> : public svec<LANES,uint64_t>{
719  FORCEINLINE svec(void* p0, void* p1, void* p2, void* p3, void* p4, void* p5, void* p6, void* p7):
720  svec<LANES,uint64_t>((uint64_t)(p0),(uint64_t)(p1),(uint64_t)(p2),(uint64_t)(p3),(uint64_t)(p4),(uint64_t)(p5),(uint64_t)(p6),(uint64_t)(p7)){}
721 };
722 #else // 32-bit
723 template<>
724 struct svec<LANES,void*>: public svec<LANES,uint32_t>{
729  FORCEINLINE svec(void* p0, void* p1, void* p2, void* p3, void* p4, void* p5, void* p6, void* p7):
730  svec<LANES,uint32_t>((uint32_t)(p0),(uint32_t)(p1),(uint32_t)(p2),(uint32_t)(p3),(uint32_t)(p4),(uint32_t)(p5),(uint32_t)(p6),(uint32_t)(p7)){}
731 };
732 #endif // __PPC64__
733 
734 #ifndef DOXYGEN_SHOULD_SKIP_THIS //not want generate svec_gather*/svec_scatter methods
735 
736 template <class RetVecType> static RetVecType svec_gather(svec<LANES, uint32_t> ptrs, svec<LANES,bool> mask);
737 template <class RetVecType> static RetVecType svec_gather(svec<LANES, uint64_t> ptrs, svec<LANES,bool> mask);
738 
739 GATHER_GENERAL(int8_t, uint32_t);
740 GATHER_GENERAL(int8_t, uint64_t);
741 GATHER_GENERAL(uint8_t, uint32_t);
742 GATHER_GENERAL(uint8_t, uint64_t);
743 GATHER_GENERAL(int16_t, uint32_t);
744 GATHER_GENERAL(int16_t, uint64_t);
745 GATHER_GENERAL(uint16_t, uint32_t);
746 GATHER_GENERAL(uint16_t, uint64_t);
747 GATHER_GENERAL(int32_t, uint32_t);
748 GATHER_GENERAL(int32_t, uint64_t);
749 GATHER_GENERAL(uint32_t, uint32_t);
750 GATHER_GENERAL(uint32_t, uint64_t);
751 GATHER_GENERAL(int64_t, uint32_t);
752 GATHER_GENERAL(int64_t, uint64_t);
753 GATHER_GENERAL(uint64_t, uint32_t);
754 GATHER_GENERAL(uint64_t, uint64_t);
755 GATHER_GENERAL(float, uint32_t);
756 GATHER_GENERAL(float, uint64_t);
757 GATHER_GENERAL(double, uint32_t);
758 GATHER_GENERAL(double, uint64_t);
759 
760 GATHER_BASE_OFFSETS(int8_t, int32_t);
761 GATHER_BASE_OFFSETS(int8_t, int64_t);
762 GATHER_BASE_OFFSETS(uint8_t, int32_t);
763 GATHER_BASE_OFFSETS(uint8_t, int64_t);
764 GATHER_BASE_OFFSETS(int16_t, int32_t);
765 GATHER_BASE_OFFSETS(int16_t, int64_t);
766 GATHER_BASE_OFFSETS(uint16_t, int32_t);
767 GATHER_BASE_OFFSETS(uint16_t, int64_t);
768 GATHER_BASE_OFFSETS(int32_t, int32_t);
769 GATHER_BASE_OFFSETS(int32_t, int64_t);
770 GATHER_BASE_OFFSETS(uint32_t, int32_t);
771 GATHER_BASE_OFFSETS(uint32_t, int64_t);
772 GATHER_BASE_OFFSETS(int64_t, int32_t);
773 GATHER_BASE_OFFSETS(int64_t, int64_t);
774 GATHER_BASE_OFFSETS(uint64_t, int32_t);
775 GATHER_BASE_OFFSETS(uint64_t, int64_t);
776 GATHER_BASE_OFFSETS(float, int32_t);
777 GATHER_BASE_OFFSETS(float, int64_t);
778 GATHER_BASE_OFFSETS(double, int32_t);
779 GATHER_BASE_OFFSETS(double, int64_t);
780 
781 GATHER_STRIDE(int8_t, int32_t);
782 GATHER_STRIDE(int8_t, int64_t);
783 GATHER_STRIDE(uint8_t, int32_t);
784 GATHER_STRIDE(uint8_t, int64_t);
785 GATHER_STRIDE(int16_t, int32_t);
786 GATHER_STRIDE(int16_t, int64_t);
787 GATHER_STRIDE(uint16_t, int32_t);
788 GATHER_STRIDE(uint16_t, int64_t);
789 GATHER_STRIDE(int32_t, int32_t);
790 GATHER_STRIDE(int32_t, int64_t);
791 GATHER_STRIDE(uint32_t, int32_t);
792 GATHER_STRIDE(uint32_t, int64_t);
793 GATHER_STRIDE(int64_t, int32_t);
794 GATHER_STRIDE(int64_t, int64_t);
795 GATHER_STRIDE(uint64_t, int32_t);
796 GATHER_STRIDE(uint64_t, int64_t);
797 GATHER_STRIDE(float, int32_t);
798 GATHER_STRIDE(float, int64_t);
799 GATHER_STRIDE(double, int32_t);
800 GATHER_STRIDE(double, int64_t);
801 
802 
803 SCATTER_GENERAL(int8_t, uint32_t);
804 SCATTER_GENERAL(int8_t, uint64_t);
805 SCATTER_GENERAL(uint8_t, uint32_t);
806 SCATTER_GENERAL(uint8_t, uint64_t);
807 SCATTER_GENERAL(int16_t, uint32_t);
808 SCATTER_GENERAL(int16_t, uint64_t);
809 SCATTER_GENERAL(uint16_t, uint32_t);
810 SCATTER_GENERAL(uint16_t, uint64_t);
811 SCATTER_GENERAL(int32_t, uint32_t);
812 SCATTER_GENERAL(int32_t, uint64_t);
813 SCATTER_GENERAL(uint32_t, uint32_t);
814 SCATTER_GENERAL(uint32_t, uint64_t);
815 SCATTER_GENERAL(int64_t, uint32_t);
816 SCATTER_GENERAL(int64_t, uint64_t);
817 SCATTER_GENERAL(uint64_t, uint32_t);
818 SCATTER_GENERAL(uint64_t, uint64_t);
819 SCATTER_GENERAL(float, uint32_t);
820 SCATTER_GENERAL(float, uint64_t);
821 SCATTER_GENERAL(double, uint32_t);
822 SCATTER_GENERAL(double, uint64_t);
823 
824 SCATTER_BASE_OFFSETS(int8_t, int32_t);
825 SCATTER_BASE_OFFSETS(int8_t, int64_t);
826 SCATTER_BASE_OFFSETS(uint8_t, int32_t);
827 SCATTER_BASE_OFFSETS(uint8_t, int64_t);
828 SCATTER_BASE_OFFSETS(int16_t, int32_t);
829 SCATTER_BASE_OFFSETS(int16_t, int64_t);
830 SCATTER_BASE_OFFSETS(uint16_t, int32_t);
831 SCATTER_BASE_OFFSETS(uint16_t, int64_t);
832 SCATTER_BASE_OFFSETS(int32_t, int32_t);
833 SCATTER_BASE_OFFSETS(int32_t, int64_t);
834 SCATTER_BASE_OFFSETS(uint32_t, int32_t);
835 SCATTER_BASE_OFFSETS(uint32_t, int64_t);
836 SCATTER_BASE_OFFSETS(int64_t, int32_t);
837 SCATTER_BASE_OFFSETS(int64_t, int64_t);
838 SCATTER_BASE_OFFSETS(uint64_t, int32_t);
839 SCATTER_BASE_OFFSETS(uint64_t, int64_t);
840 SCATTER_BASE_OFFSETS(float, int32_t);
841 SCATTER_BASE_OFFSETS(float, int64_t);
842 SCATTER_BASE_OFFSETS(double, int32_t);
843 SCATTER_BASE_OFFSETS(double, int64_t);
844 
845 SCATTER_STRIDE(int8_t, int32_t);
846 SCATTER_STRIDE(int8_t, int64_t);
847 SCATTER_STRIDE(uint8_t, int32_t);
848 SCATTER_STRIDE(uint8_t, int64_t);
849 SCATTER_STRIDE(int16_t, int32_t);
850 SCATTER_STRIDE(int16_t, int64_t);
851 SCATTER_STRIDE(uint16_t, int32_t);
852 SCATTER_STRIDE(uint16_t, int64_t);
853 SCATTER_STRIDE(int32_t, int32_t);
854 SCATTER_STRIDE(int32_t, int64_t);
855 SCATTER_STRIDE(uint32_t, int32_t);
856 SCATTER_STRIDE(uint32_t, int64_t);
857 SCATTER_STRIDE(int64_t, int32_t);
858 SCATTER_STRIDE(int64_t, int64_t);
859 SCATTER_STRIDE(uint64_t, int32_t);
860 SCATTER_STRIDE(uint64_t, int64_t);
861 SCATTER_STRIDE(float, int32_t);
862 SCATTER_STRIDE(float, int64_t);
863 SCATTER_STRIDE(double, int32_t);
864 SCATTER_STRIDE(double, int64_t);
865 
866 #endif //DOXYGEN_SHOULD_SKIP_THIS
867 
868 
869 // 5. masked load/masked store
870 
871 //Masked load/store is implemented based on gather_base_offsets/scatter_base_offsets
872 //Here we only use offsets with 32bit
873 
874 MASKED_LOAD_STORE_L8(int8_t);
875 MASKED_LOAD_STORE_L8(uint8_t);
876 MASKED_LOAD_STORE_L8(int16_t);
877 MASKED_LOAD_STORE_L8(uint16_t);
878 MASKED_LOAD_STORE_L8(int32_t);
879 MASKED_LOAD_STORE_L8(uint32_t);
880 MASKED_LOAD_STORE_L8(int64_t);
881 MASKED_LOAD_STORE_L8(uint64_t);
882 MASKED_LOAD_STORE_L8(float);
883 MASKED_LOAD_STORE_L8(double);
884 
886 //
887 // Mask type (i1) interfaces
888 //
890 
891 // 1. mask construction
897 static FORCEINLINE bool svec_any_true(const svec<LANES,bool>& mask) {
898  return (mask.v != 0);
899 }
900 
906 static FORCEINLINE bool svec_all_true(const svec<LANES,bool>& mask) {
907  return (mask.v & 0xFF) == 0xFF;
908 }
909 
910 
916 static FORCEINLINE bool svec_none_true(const svec<LANES,bool>& mask) {
917  return (mask.v == 0);
918 }
919 
920 // 2. bit operations
921 
925 static FORCEINLINE svec<LANES,bool> svec_and(svec<LANES,bool> a, svec<LANES,bool> b) {
926  svec<LANES,bool> ret;
927  ret.v = a.v & b.v;
928  return ret;
929 }
930 
931 
935 static FORCEINLINE svec<LANES,bool> svec_or(svec<LANES,bool> a, svec<LANES,bool> b) {
936  svec<LANES,bool> ret;
937  ret.v = a.v | b.v;
938  return ret;
939 }
940 
944 static FORCEINLINE svec<LANES,bool> svec_xor(svec<LANES,bool> a, svec<LANES,bool> b) {
945  svec<LANES,bool> ret;
946  ret.v = a.v ^ b.v;
947  return ret;
948 }
949 
953 static FORCEINLINE svec<LANES,bool> svec_not(svec<LANES,bool> a) {
954  svec<LANES,bool> ret;
955  ret.v = ~a.v;
956  return ret;
957 }
958 
965 static FORCEINLINE uint64_t svec_movmsk(svec<LANES,bool> mask) {
966  return (uint64_t)(mask.v);
967 }
968 
969 
971 //
972 // General data operation interfaces
973 //
975 // 1. Unary
976 
977 // neg operation
978 UNARY_OP(int8_t, svec_neg, -);
979 UNARY_OP(uint8_t, svec_neg, -);
980 UNARY_OP(int16_t, svec_neg, -);
981 UNARY_OP(uint16_t, svec_neg, -);
982 UNARY_OP(int32_t, svec_neg, -);
983 UNARY_OP(uint32_t, svec_neg, -);
984 UNARY_OP(int64_t, svec_neg, -);
985 UNARY_OP(uint64_t, svec_neg, -);
986 UNARY_OP(float, svec_neg, -);
987 UNARY_OP(double, svec_neg, -);
988 
989 // 2. Math unary
990 //round
991 UNARY_OP(float, svec_round, roundf);
992 UNARY_OP(double, svec_round, round);
993 //floor
994 UNARY_OP(float, svec_floor, floorf);
995 UNARY_OP(double, svec_floor, floor);
996 //ceil
997 UNARY_OP(float, svec_ceil, ceilf);
998 UNARY_OP(double, svec_ceil, ceil);
999 //reverse 1/
1000 UNARY_OP(float, svec_rcp, 1.0/);
1001 UNARY_OP(double, svec_rcp, 1.0/);
1002 //reverse sqrt
1003 UNARY_OP(float, svec_rsqrt, 1.0/sqrtf);
1004 UNARY_OP(double, svec_rsqrt, 1.0/sqrt);
1005 //sqrt
1006 UNARY_OP(float, svec_sqrt, sqrtf);
1007 UNARY_OP(double, svec_sqrt, sqrt);
1008 //exp
1009 UNARY_OP(float, svec_exp, expf);
1010 UNARY_OP(double, svec_exp, exp);
1011 //log
1012 UNARY_OP(float, svec_log, logf);
1013 UNARY_OP(double, svec_log, log);
1014 //abs - for all types
1015 UNARY_OP(int8_t, svec_abs, abs<int8_t>);
1016 static FORCEINLINE svec<LANES,uint8_t> svec_abs(svec<LANES,uint8_t> v) { return v;}
1017 UNARY_OP(int16_t, svec_abs, abs<int16_t>);
1018 static FORCEINLINE svec<LANES,uint16_t> svec_abs(svec<LANES,uint16_t> v) { return v;}
1019 UNARY_OP(int32_t, svec_abs, abs<int32_t>);
1020 static FORCEINLINE svec<LANES,uint32_t> svec_abs(svec<LANES,uint32_t> v) { return v;}
1021 UNARY_OP(int64_t, svec_abs, abs<int64_t>);
1022 static FORCEINLINE svec<LANES,uint64_t> svec_abs(svec<LANES,uint64_t> v) { return v;}
1023 UNARY_OP(float, svec_abs, abs);
1024 UNARY_OP(double, svec_abs, abs);
1025 
1026 // 3. Binary
1027 
1028 //add, sub, div, mul.
1029 #define BINARY_OP_METHODS(STYPE) \
1030 BINARY_OP(STYPE, svec_add, +); \
1031 BINARY_OP(STYPE, svec_sub, -); \
1032 BINARY_OP(STYPE, svec_mul, *); \
1033 BINARY_OP(STYPE, svec_div, /); \
1034 BINARY_OP_SCALAR(STYPE, svec_add_scalar, +); \
1035 BINARY_SCALAR_OP(STYPE, svec_scalar_add, +); \
1036 BINARY_OP_SCALAR(STYPE, svec_sub_scalar, -); \
1037 BINARY_SCALAR_OP(STYPE, svec_scalar_sub, -); \
1038 BINARY_OP_SCALAR(STYPE, svec_mul_scalar, *); \
1039 BINARY_SCALAR_OP(STYPE, svec_scalar_mul, *); \
1040 BINARY_OP_SCALAR(STYPE, svec_div_scalar, /); \
1041 BINARY_SCALAR_OP(STYPE, svec_scalar_div, /); \
1042 
1043 #define INT_BINARY_OP_METHODS(STYPE) \
1044 BINARY_OP(STYPE, svec_or, |); \
1045 BINARY_OP(STYPE, svec_and, &); \
1046 BINARY_OP(STYPE, svec_xor, ^); \
1047 BINARY_SHT_SCALAR(STYPE, int32_t, svec_shl, <<); \
1048 BINARY_SHT_SCALAR(STYPE, int32_t, svec_shr, >>); \
1049 BINARY_OP(STYPE, svec_rem, %); \
1050 BINARY_OP_SCALAR(STYPE, svec_rem, %);
1051 
1052 BINARY_OP_METHODS(int8_t);
1053 BINARY_OP_METHODS(uint8_t);
1054 BINARY_OP_METHODS(int16_t);
1055 BINARY_OP_METHODS(uint16_t);
1056 BINARY_OP_METHODS(int32_t);
1057 BINARY_OP_METHODS(uint32_t);
1058 BINARY_OP_METHODS(int64_t);
1059 BINARY_OP_METHODS(uint64_t);
1060 BINARY_OP_METHODS(float);
1061 BINARY_OP_METHODS(double);
1062 
1063 INT_BINARY_OP_METHODS(int8_t);
1064 INT_BINARY_OP_METHODS(uint8_t);
1065 INT_BINARY_OP_METHODS(int16_t);
1066 INT_BINARY_OP_METHODS(uint16_t);
1067 INT_BINARY_OP_METHODS(int32_t);
1068 INT_BINARY_OP_METHODS(uint32_t);
1069 INT_BINARY_OP_METHODS(int64_t);
1070 INT_BINARY_OP_METHODS(uint64_t);
1071 
1072 
1073 //power only for float
1074 BINARY_OP_FUNC(float, svec_pow, powf);
1075 BINARY_OP_FUNC(double, svec_pow, pow);
1076 
1077 //shift left
1078 BINARY_OP2(int8_t, uint8_t, svec_shl, <<);
1079 BINARY_OP2(uint8_t, uint8_t, svec_shl, <<);
1080 BINARY_OP2(int16_t, uint16_t, svec_shl, <<);
1081 BINARY_OP2(uint16_t, uint16_t, svec_shl, <<);
1082 BINARY_OP2(int32_t, uint32_t, svec_shl, <<);
1083 BINARY_OP2(uint32_t, uint32_t, svec_shl, <<);
1084 BINARY_OP2(int64_t, uint64_t, svec_shl, <<);
1085 BINARY_OP2(uint64_t, uint64_t, svec_shl, <<);
1086 
1087 //shift right
1088 BINARY_OP2(int8_t, uint8_t, svec_shr, >>);
1089 BINARY_OP2(uint8_t, uint8_t, svec_shr, >>);
1090 BINARY_OP2(int16_t, uint16_t, svec_shr, >>);
1091 BINARY_OP2(uint16_t, uint16_t, svec_shr, >>);
1092 BINARY_OP2(int32_t, uint32_t, svec_shr, >>);
1093 BINARY_OP2(uint32_t, uint32_t, svec_shr, >>);
1094 BINARY_OP2(int64_t, uint64_t, svec_shr, >>);
1095 BINARY_OP2(uint64_t, uint64_t, svec_shr, >>);
1096 
1097 // 4. Ternary
1098 
1099 //madd / msub for only int32/u32/float/double
1100 TERNERY(int32_t);
1101 TERNERY(uint32_t);
1102 TERNERY(int64_t);
1103 TERNERY(uint64_t);
1104 TERNERY(float);
1105 TERNERY(double);
1106 
1107 
1108 // 5. Max/Min & 6. Reduce
1109 #define MAX_MIN_REDUCE_METHODS(STYPE) \
1110 BINARY_OP_FUNC(STYPE, svec_max, max<STYPE>); \
1111 BINARY_OP_FUNC(STYPE, svec_min, min<STYPE>); \
1112 BINARY_OP_REDUCE_FUNC(STYPE, svec_reduce_add, add<STYPE>); \
1113 BINARY_OP_REDUCE_FUNC(STYPE, svec_reduce_max, max<STYPE>); \
1114 BINARY_OP_REDUCE_FUNC(STYPE, svec_reduce_min, min<STYPE>); \
1115 
1116 MAX_MIN_REDUCE_METHODS(int8_t);
1117 MAX_MIN_REDUCE_METHODS(uint8_t);
1118 MAX_MIN_REDUCE_METHODS(int16_t);
1119 MAX_MIN_REDUCE_METHODS(uint16_t);
1120 MAX_MIN_REDUCE_METHODS(int32_t);
1121 MAX_MIN_REDUCE_METHODS(uint32_t);
1122 MAX_MIN_REDUCE_METHODS(int64_t);
1123 MAX_MIN_REDUCE_METHODS(uint64_t);
1124 MAX_MIN_REDUCE_METHODS(float);
1125 MAX_MIN_REDUCE_METHODS(double);
1126 
1129  return svec<LANES,double>(
1130  svec_reduce_add(v0),
1131  svec_reduce_add(v1),
1132  svec_reduce_add(v2),
1133  svec_reduce_add(v3),
1134  svec_reduce_add(v4),
1135  svec_reduce_add(v5),
1136  svec_reduce_add(v6),
1137  svec_reduce_add(v7)
1138  );
1139 }
1140 
1141 
1142 // 7. Compare
1143 CMP_ALL_OP(int8_t);
1144 CMP_ALL_OP(uint8_t);
1145 CMP_ALL_OP(int16_t);
1146 CMP_ALL_OP(uint16_t);
1147 CMP_ALL_OP(int32_t);
1148 CMP_ALL_OP(uint32_t);
1149 CMP_ALL_OP(int64_t);
1150 CMP_ALL_OP(uint64_t);
1151 CMP_ALL_OP(float);
1152 CMP_ALL_OP(double);
1153 
1160 CMP_OP(bool, equal, ==);
1161 CMP_OP(bool, not_equal, !=);
1162 
1163 // 8. Cast
1164 
1170 //i1 -> all
1171 //CAST(bool, uint32_t);
1172 CAST(bool, int8_t); //better way: packing
1173 CAST(bool, uint8_t); //better way: packing
1174 CAST(bool, int16_t); //better way: packing
1175 CAST(bool, uint16_t); //better way: packing
1176 CAST(bool, int32_t);
1177 CAST(bool, uint32_t);
1178 CAST(bool, int64_t); //better way: unpack, singed ext
1179 CAST(bool, uint64_t);//better way: unpack, singed ext
1180 CAST(bool, float); //si to fp call
1181 CAST(bool, double);
1182 
1183 //i8 -> all
1184 CAST(int8_t, bool);
1185 //CAST(int8_t, int8_t);
1186 CAST(int8_t, uint8_t);
1187 CAST(int8_t, int16_t); //better way, use vec_unpackh
1188 CAST(int8_t, uint16_t); //better way, sext + zero mask and
1189 CAST(int8_t, int32_t); //better way, use twice vec_unpack
1190 CAST(int8_t, uint32_t); //better way, use unpack + zero mask
1191 CAST(int8_t, int64_t);
1192 CAST(int8_t, uint64_t);
1193 CAST(int8_t, float);
1194 CAST(int8_t, double);
1195 
1196 //u8 -> all
1197 CAST(uint8_t, bool);
1198 CAST(uint8_t, int8_t);
1199 //CAST(uint8_t, uint8_t);
1200 CAST(uint8_t, int16_t); //better way, use unpack + zero mask
1201 CAST(uint8_t, uint16_t); //better way use unpack + zero mask
1202 CAST(uint8_t, int32_t);
1203 CAST(uint8_t, uint32_t);
1204 CAST(uint8_t, int64_t);
1205 CAST(uint8_t, uint64_t);
1206 CAST(uint8_t, float);
1207 CAST(uint8_t, double);
1208 
1209 //i16 -> all
1210 CAST(int16_t, bool);
1211 CAST(int16_t, int8_t); //could use pack
1212 CAST(int16_t, uint8_t); //could use pack
1213 //CAST(int16_t, int16_t);
1214 CAST(int16_t, uint16_t);
1215 CAST(int16_t, int32_t); //use unpack
1216 CAST(int16_t, uint32_t); //use unpack and zeromaskout
1217 CAST(int16_t, int64_t);
1218 CAST(int16_t, uint64_t);
1219 CAST(int16_t, float);
1220 CAST(int16_t, double);
1221 
1222 //u16 -> all
1223 CAST(uint16_t, bool);
1224 CAST(uint16_t, int8_t);
1225 CAST(uint16_t, uint8_t);
1226 CAST(uint16_t, int16_t);
1227 //CAST(uint16_t, uint16_t);
1228 CAST(uint16_t, int32_t); //use unpack +mask
1229 CAST(uint16_t, uint32_t); //use unpack + mask
1230 CAST(uint16_t, int64_t);
1231 CAST(uint16_t, uint64_t);
1232 CAST(uint16_t, float);
1233 CAST(uint16_t, double);
1234 
1235 //i32 -> all
1236 CAST(int32_t, bool);
1237 CAST(int32_t, int8_t);
1238 CAST(int32_t, uint8_t);
1239 CAST(int32_t, int16_t);
1240 CAST(int32_t, uint16_t);
1241 //CAST(int32_t, int32_t);
1242 CAST(int32_t, uint32_t);
1243 CAST(int32_t, int64_t); //use p8 unpack
1244 CAST(int32_t, uint64_t); //use p8 unpack
1245 CAST(int32_t, float); //use ctf
1246 CAST(int32_t, double);
1247 
1248 //u32 -> all
1249 CAST(uint32_t, bool);
1250 CAST(uint32_t, int8_t);
1251 CAST(uint32_t, uint8_t);
1252 CAST(uint32_t, int16_t);
1253 CAST(uint32_t, uint16_t);
1254 CAST(uint32_t, int32_t);
1255 //CAST(uint32_t, uint32_t);
1256 CAST(uint32_t, int64_t); //use p8 unpack
1257 CAST(uint32_t, uint64_t); //use p8 unpack
1258 CAST(uint32_t, float);
1259 CAST(uint32_t, double);
1260 
1261 //i64-> all
1262 CAST(int64_t, bool);
1263 CAST(int64_t, int8_t);
1264 CAST(int64_t, uint8_t);
1265 CAST(int64_t, int16_t);
1266 CAST(int64_t, uint16_t);
1267 CAST(int64_t, int32_t); //use p8 trunk
1268 CAST(int64_t, uint32_t); //use p8 trunk
1269 //CAST(int64_t, int64_t);
1270 CAST(int64_t, uint64_t);
1271 CAST(int64_t, float);
1272 CAST(int64_t, double);
1273 
1274 //u64 -> all
1275 CAST(uint64_t, bool);
1276 CAST(uint64_t, int8_t);
1277 CAST(uint64_t, uint8_t);
1278 CAST(uint64_t, int16_t);
1279 CAST(uint64_t, uint16_t);
1280 CAST(uint64_t, int32_t); //use p8 pack
1281 CAST(uint64_t, uint32_t); //use p8 pack
1282 CAST(uint64_t, int64_t);
1283 //CAST(uint64_t, uint64_t);
1284 CAST(uint64_t, float);
1285 CAST(uint64_t, double);
1286 
1287 //float -> all
1288 CAST(float, bool);
1289 CAST(float, int8_t); //use cts + pack+pack
1290 CAST(float, uint8_t); //use ctu + pack + pack
1291 CAST(float, int16_t); //use cts + pack
1292 CAST(float, uint16_t); //use ctu + pack
1293 CAST(float, int32_t);//use cts
1294 CAST(float, uint32_t); //use ctu
1295 CAST(float, int64_t);
1296 CAST(float, uint64_t);
1297 //CAST(float, float);
1298 CAST(float, double);
1299 
1300 //double -> all
1301 CAST(double, bool);
1302 CAST(double, int8_t);
1303 CAST(double, uint8_t);
1304 CAST(double, int16_t);
1305 CAST(double, uint16_t);
1306 CAST(double, int32_t);
1307 CAST(double, uint32_t);
1308 CAST(double, int64_t);
1309 CAST(double, uint64_t);
1310 CAST(double, float);
1311 //CAST(double, double);
1312 
1314 
1315 
1319 CAST_BITS(int32_t, i32, float, f);
1320 CAST_BITS(uint32_t, u32, float, f);
1321 CAST_BITS(float, f, int32_t, i32);
1322 CAST_BITS(float, f, uint32_t, u32);
1323 
1324 CAST_BITS(int64_t, i64, double, d);
1325 CAST_BITS(uint64_t, u64, double, d);
1326 CAST_BITS(double, d, int64_t, i64);
1327 CAST_BITS(double, d, uint64_t, u64);
1328 
1329 
1331 //
1332 // Class operations based on the above interfaces
1333 //
1335 
1336 //add the impl of i1's
1337 FORCEINLINE void svec<LANES,bool>::Helper::operator=(uint32_t value) {
1338  svec_insert(m_self, m_index, value);
1339 }
1340 FORCEINLINE void svec<LANES,bool>::Helper::operator=(svec<LANES,bool>::Helper helper) {
1341  svec_insert(m_self, m_index, helper.operator uint32_t());
1342 }
1343 FORCEINLINE svec<LANES,bool>::Helper::operator uint32_t() const {
1344  return svec_extract(*m_self, m_index);
1345 }
1346 const FORCEINLINE uint32_t svec<LANES,bool>::operator[](int index) const {
1347  return svec_extract(*this, index);
1348 }
1359 
1365 FORCEINLINE bool svec<LANES,bool>::any_true() { return svec_any_true(*this); }
1366 
1372 FORCEINLINE bool svec<LANES,bool>::all_true() { return svec_all_true(*this); }
1373 
1379 FORCEINLINE bool svec<LANES,bool>::none_true() { return svec_none_true(*this); }
1380 
1385 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator~() { return svec_not(*this); }
1386 
1392 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator|(svec<LANES,bool> a) { return svec_or(*this, a); }
1398 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator&(svec<LANES,bool> a) { return svec_and(*this, a); }
1404 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator^(svec<LANES,bool> a) { return svec_xor(*this, a); }
1409 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator!() { return svec_not(*this); }
1410 
1416 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator&&(svec<LANES,bool> a) { return svec_and(*this, a); }
1422 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator||(svec<LANES,bool> a) { return svec_or(*this, a); }
1428 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator ==(svec<LANES,bool> a) {
1429  return svec_equal(*this, a);
1430 }
1431 
1437 FORCEINLINE svec<LANES,bool> svec<LANES,bool>::operator !=(svec<LANES,bool> a) {
1438  return svec_not_equal(*this, a);
1439 }
1440 
1442 VEC_CMP_IMPL(uint8_t);
1443 VEC_CMP_IMPL(int16_t);
1444 VEC_CMP_IMPL(uint16_t);
1445 VEC_CMP_IMPL(int32_t);
1446 VEC_CMP_IMPL(uint32_t);
1447 VEC_CMP_IMPL(int64_t);
1448 VEC_CMP_IMPL(uint64_t);
1451 
1463 
1464 VEC_INT_CLASS_METHOD_IMPL(int8_t, uint8_t);
1465 VEC_INT_CLASS_METHOD_IMPL(uint8_t, uint8_t);
1466 VEC_INT_CLASS_METHOD_IMPL(int16_t, uint16_t);
1467 VEC_INT_CLASS_METHOD_IMPL(uint16_t, uint16_t);
1468 VEC_INT_CLASS_METHOD_IMPL(int32_t, uint32_t);
1469 VEC_INT_CLASS_METHOD_IMPL(uint32_t, uint32_t);
1470 VEC_INT_CLASS_METHOD_IMPL(int64_t, uint64_t);
1471 VEC_INT_CLASS_METHOD_IMPL(uint64_t, uint64_t);
1472 
1475 
1476 #undef LANES
1477 } //end of namespace generic
1478 #endif /* GENERIC8_H_ */
1479 
#define COUT_FUNC_BOOL_DECL()
Definition: gsimd_utility.h:266
svec(float a, float b, float c, float d, float e, float f, float g, float h)
Constructor.
Definition: generic8.h:515
svec(int32_t a)
Constructor.
Definition: generic8.h:368
svec(void *p0, void *p1, void *p2, void *p3, void *p4, void *p5, void *p6, void *p7)
Constructor.
Definition: generic8.h:729
#define VEC_INT_CLASS_METHOD_DECL(STYPE, USTYPE)
macros method definition for integer vector only Note: shift&#39;s operator can only be unsigned vector ...
Definition: gsimd_utility.h:379
svec(int8_t a)
Constructor.
Definition: generic8.h:210
#define SCATTER_BASE_OFFSETS(STYPE, OSTYPE)
Definition: gsimd_utility.h:765
#define CAST(SFROM, STO)
Definition: gsimd_utility.h:1112
svec()
Default constructor.
Definition: generic8.h:196
#define BINARY_OP_METHODS(STYPE)
Definition: generic8.h:1029
svec(int16_t a)
Constructor.
Definition: generic8.h:288
svec(uint16_t a, uint16_t b, uint16_t c, uint16_t d, uint16_t e, uint16_t f, uint16_t g, uint16_t h)
Constructor.
Definition: generic8.h:318
svec(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e, uint64_t f, uint64_t g, uint64_t h)
Constructor.
Definition: generic8.h:476
#define LOAD_CONST(STYPE)
Definition: gsimd_utility.h:562
svec(uint8_t a)
Constructor.
Definition: generic8.h:249
#define VEC_FLOAT_CLASS_METHOD_DECL(STYPE)
Definition: gsimd_utility.h:393
#define VEC_CLASS_METHOD_DECL(STYPE)
macros for non-mask i8 - double types&#39;s method
Definition: gsimd_utility.h:350
svec(int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f, int8_t g, int8_t h)
Constructor.
Definition: generic8.h:201
#define BINARY_OP_FUNC(STYPE, NAME, FUNC)
Definition: gsimd_utility.h:869
svec< 4, bool > svec_select(svec< 4, bool > mask, svec< 4, bool > a, svec< 4, bool > b)
construct c by selecting elements from two input vectors according to the mask
Definition: power_vsx4.h:1126
#define ROTATE(STYPE)
macro for rotate method implementation
Definition: gsimd_utility.h:496
#define TERNERY(STYPE)
Definition: gsimd_utility.h:958
#define SVEC_BOOL_CLASS_METHOD_DECL()
macros for svec&lt;N,bool&gt; class&#39;s class method
Definition: gsimd_utility.h:330
svec(int16_t a, int16_t b, int16_t c, int16_t d, int16_t e, int16_t f, int16_t g, int16_t h)
Constructor.
Definition: generic8.h:278
#define BROADCAST(STYPE)
macro for broadcast method implementation All broadcast are slow implementation
Definition: gsimd_utility.h:472
#define COUT_FUNC_DECL(STYPE)
Definition: gsimd_utility.h:283
#define LANES
Definition: generic8.h:108
svec()
Default constructor.
Definition: generic8.h:393
#define SUBSCRIPT_FUNC_IMPL(STYPE)
Definition: gsimd_utility.h:1160
#define VEC_CMP_IMPL(STYPE)
Definition: gsimd_utility.h:1175
svec()
Default constructor.
Definition: generic8.h:510
#define GATHER_GENERAL(STYPE, PSTYPE)
slow implementation of gather general Must use template to specify the return type ...
Definition: gsimd_utility.h:602
#define INT_BINARY_OP_METHODS(STYPE)
Definition: generic8.h:1043
#define SUBSCRIPT_FUNC_DECL(STYPE)
macros to define a intrinsic based subscript opertor
Definition: gsimd_utility.h:247
svec(int64_t a, int64_t b, int64_t c, int64_t d, int64_t e, int64_t f, int64_t g, int64_t h)
Constructor.
Definition: generic8.h:437
svec(uint64_t a)
Constructor.
Definition: generic8.h:486
Definition: generic.h:57
#define MASKED_LOAD_STORE_L8(STYPE)
Definition: gsimd_utility.h:805
svec< 4,float > svec_preduce_add(svec< 4, float > v0, svec< 4, float > v1, svec< 4, float > v2, svec< 4, float > v3)
Definition: generic4.h:1106
#define MVEC_CLASS_METHOD_IMPL(STYPE)
mask class&#39;s class method impl
Definition: gsimd_utility.h:1285
svec(int a, int b, int c, int d, int e, int f, int g, int h)
Constructor.
Definition: generic8.h:358
#define SUBSCRIPT_FUNC_BOOL_DECL(STYPE)
Definition: gsimd_utility.h:251
#define VEC_CLASS_METHOD_IMPL(STYPE)
Definition: gsimd_utility.h:1301
svec()
Default constructor.
Definition: generic8.h:549
#define VEC_FLOAT_CLASS_METHOD_IMPL(STYPE)
Definition: gsimd_utility.h:1433
#define INSERT_EXTRACT(STYPE)
macros for svec&#39;s insert extract method implementation The implementation is based on vector type&#39;s s...
Definition: gsimd_utility.h:409
svec(double a, double b, double c, double d, double e, double f, double g, double h)
Constructor.
Definition: generic8.h:554
#define LOAD_STORE(STYPE)
Definition: gsimd_utility.h:419
#define CAST_BITS(SFROM, FROM_F, STO, TO_F)
Definition: gsimd_utility.h:1143
svec()
Default constructor.
Definition: generic8.h:157
#define GATHER_STRIDE(STYPE, OSTYPE)
macros for general impl of gather base step
Definition: gsimd_utility.h:666
#define SCATTER_STRIDE(STYPE, OSTYPE)
Definition: gsimd_utility.h:705
#define SCATTER_GENERAL(STYPE, PSTYPE)
Definition: gsimd_utility.h:736
svec(uint32_t a)
Constructor.
Definition: generic8.h:408
svec()
Default constructor.
Definition: generic8.h:273
#define GATHER_BASE_OFFSETS(STYPE, OSTYPE)
Definition: gsimd_utility.h:647
uint32_t v
Definition: generic8.h:151
svec()
Default constructor.
Definition: generic8.h:471
svec()
Default constructor.
Definition: generic8.h:353
#define UNARY_OP(STYPE, NAME, OP)
Definition: gsimd_utility.h:833
svec(uint32_t a)
Constructor.
Definition: generic8.h:175
#define SELECT(STYPE)
macros for svec&#39;s select by mask vector method generic implementation
Definition: gsimd_utility.h:448
svec(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h)
Constructor.
Definition: generic8.h:239
#define CMP_ALL_OP(STYPE)
Definition: gsimd_utility.h:1107
#define BINARY_OP2(STYPE, STYPE2, NAME, OP)
Definition: gsimd_utility.h:861
#define COUT_FUNC_CHAR_DECL(STYPE)
Definition: gsimd_utility.h:275
svec()
Default constructor,.
Definition: generic8.h:432
#define CMP_OP(STYPE, NAME, OP)
macros for binary: vector op scalar
Definition: gsimd_utility.h:1049
svec(float a)
Constructor.
Definition: generic8.h:525
svec()
Default constructor.
Definition: generic8.h:234
#define MAX_MIN_REDUCE_METHODS(STYPE)
Definition: generic8.h:1109
#define SHUFFLES(STYPE)
macro for shuffle/shuffle2 methods implementation
Definition: gsimd_utility.h:521
svec(uint16_t a)
Constructor.
Definition: generic8.h:328
#define SELECT_BOOLCOND(STYPE)
macros for svec&#39;s select by bool scalar method implementation
Definition: gsimd_utility.h:459
svec(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f, uint32_t g, uint32_t h)
Constructor.
Definition: generic8.h:398
svec(int64_t a)
Constructor.
Definition: generic8.h:447
#define VEC_INT_CLASS_METHOD_IMPL(STYPE, STYPE2)
Definition: gsimd_utility.h:1394
#define FORCEINLINE
Definition: gsimd_utility.h:175
uint32_t v
Definition: generic4.h:151
svec(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f, uint32_t g, uint32_t h)
Constructor.
Definition: generic8.h:164
svec(double a)
Constructor.
Definition: generic8.h:564
svec()
Default constructor.
Definition: generic8.h:313