mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-15 14:16:07 -06:00
Core: SSE iTxN cleanup
This commit is contained in:
parent
fb1deb732f
commit
3f37110ce6
1 changed files with 19 additions and 12 deletions
|
|
@ -54,18 +54,23 @@ force_inline f32x4 Broadcast1(f32x4 a) { return _mm_shuffle_ps(a.data
|
|||
force_inline f32x4 Broadcast2(f32x4 a) { return _mm_shuffle_ps(a.data, a.data, _MM_BCAST(2)); }
|
||||
force_inline f32x4 Broadcast3(f32x4 a) { return _mm_shuffle_ps(a.data, a.data, _MM_BCAST(3)); }
|
||||
|
||||
struct i16x8 { // 8xint16
|
||||
template <class T>
|
||||
struct iTxN { // 8xint16
|
||||
__m128i data;
|
||||
|
||||
i16x8& Load(const void *ptr) { data = _mm_loadu_si128((__m128i *)ptr); return *this; }
|
||||
i16x8& Load64(const void *ptr) { data = _mm_castpd_si128(_mm_load_sd((double *)ptr)); return *this; }
|
||||
i16x8& Load32(const void *ptr) { data = _mm_castps_si128(_mm_load_ss((float *)ptr)); return *this; }
|
||||
|
||||
void Store(void *ptr) { _mm_storeu_si128((__m128i *)ptr, data); }
|
||||
void Store64(void *ptr) { _mm_store_sd((double *)ptr, _mm_castsi128_pd(data)); }
|
||||
void Store32(void *ptr) { _mm_store_ss((float *)ptr, _mm_castsi128_ps(data)); }
|
||||
void Stream(void *ptr) { _mm_stream_si128((__m128i *)ptr, data); };
|
||||
|
||||
T& AsT() { return *static_cast<T *>(this); }
|
||||
|
||||
T& Load(const void *ptr) { data = _mm_loadu_si128((__m128i *)ptr); return AsT(); }
|
||||
T& Load64(const void *ptr) { data = _mm_castpd_si128(_mm_load_sd((double *)ptr)); return AsT(); }
|
||||
T& Load32(const void *ptr) { data = _mm_castps_si128(_mm_load_ss((float *)ptr)); return AsT(); }
|
||||
|
||||
void Store(void *ptr) { _mm_storeu_si128((__m128i *)ptr, data); }
|
||||
void Store64(void *ptr) { _mm_store_sd((double *)ptr, _mm_castsi128_pd(data)); }
|
||||
void Store32(void *ptr) { _mm_store_ss((float *)ptr, _mm_castsi128_ps(data)); }
|
||||
void Stream(void *ptr) { _mm_stream_si128((__m128i *)ptr, data); };
|
||||
};
|
||||
|
||||
struct i16x8 : iTxN<i16x8> { // 8xint16
|
||||
i16x8() {}
|
||||
i16x8(const void *ptr) { Load(ptr); }
|
||||
i16x8(__m128i d) { data = d; }
|
||||
|
|
@ -106,13 +111,14 @@ force_inline int FirstTrue(i16x8 a) { return CountTrailingZeroBit
|
|||
force_inline int FirstFalse(i16x8 a) { return CountTrailingZeroBits(~_mm_movemask_epi8(a.data)) >> 1; }
|
||||
force_inline int IsTrue(i16x8 a, int i) { return _mm_movemask_epi8(a.data) & (1 << 2 * i); }
|
||||
|
||||
struct i32x4 : i16x8 { // 4xint32
|
||||
struct i32x4 : iTxN<i32x4> { // 4xint32
|
||||
i32x4() {}
|
||||
i32x4(const void *ptr) { Load(ptr); }
|
||||
i32x4(__m128i d) { data = d; }
|
||||
i32x4(int v) { data = _mm_set_epi32(0, 0, 0, v); }
|
||||
i32x4(int a, int b, int c, int d) { data = _mm_set_epi32(a, b, c, d); }
|
||||
operator int() { return _mm_cvtsi128_si32(data); }
|
||||
operator i16x8() const { return i16x8(data); }
|
||||
};
|
||||
|
||||
force_inline i32x4 i32all(int v) { return _mm_set1_epi32(v); }
|
||||
|
|
@ -145,13 +151,14 @@ force_inline int FirstTrue(i32x4 a) { return CountTrailingZeroBit
|
|||
force_inline int FirstFalse(i32x4 a) { return CountTrailingZeroBits(~_mm_movemask_ps(_mm_castsi128_ps(a.data))); }
|
||||
force_inline bool IsTrue(i32x4 a, int i) { return _mm_movemask_ps(_mm_castsi128_ps(a.data)) & (1 << i); }
|
||||
|
||||
struct i8x16 : i16x8 { // 16xint8
|
||||
struct i8x16 : iTxN<i8x16> { // 16xint8
|
||||
i8x16() {}
|
||||
i8x16(const void *ptr) { Load(ptr); }
|
||||
i8x16(__m128i d) { data = d; }
|
||||
i8x16(int v) { data = _mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,v); }
|
||||
i8x16(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l, int m, int n, int o, int p)
|
||||
{ data = _mm_set_epi8(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p); }
|
||||
operator i16x8() const { return i16x8(data); }
|
||||
};
|
||||
|
||||
force_inline i8x16 i8all(int v) { return _mm_set1_epi8(v); }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue