diff --git a/autotest/SIMD_CMP/Etalon.log b/autotest/SIMD_CMP/Etalon.log new file mode 100644 index 000000000..fe79cb01a --- /dev/null +++ b/autotest/SIMD_CMP/Etalon.log @@ -0,0 +1,486 @@ +* /Users/cxl/out/CLANG.Debug.Debug_Full.Shared/SIMD_CMP 02.04.2025 12:04:20, user: cxl + +=============== f32x4 +a = 9 9 9 9 +c = 9 9 9 9 +AllTrue(c == a) = true +AnyTrue(c == a) = true +CountTrue(c == a) = 4 +FirstTrue(c == a) = 0 +matches = 0 1 2 3 +b = 0 0 0 0 +AllTrue(c == b) = false +AnyTrue(c == b) = false +CountTrue(c == b) = 0 +matches = +--- +a = 9 9 9 0 +c = 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 3 +FirstTrue(c == a) = 1 +matches = 1 2 3 +b = 0 0 0 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 1 +FirstTrue(c == b) = 0 +matches = 0 +--- +a = 9 9 0 0 +c = 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 2 +FirstTrue(c == a) = 2 +matches = 2 3 +b = 0 0 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 2 +FirstTrue(c == b) = 0 +matches = 0 1 +--- +a = 9 0 0 0 +c = 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 1 +FirstTrue(c == a) = 3 +matches = 3 +b = 0 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 3 +FirstTrue(c == b) = 0 +matches = 0 1 2 +--- +a = 1 2 3 4 +b = 1 2 3 9 +c = 9 9 9 9 +AllTrue(a == b) = false +AllTrue(a == a) = true +AnyTrue(a == b) = true +AnyTrue(a == c) = false +CountTrue(a == b) = 3 +FirstTrue(a == b) = 1 +=============== i16x8 +a = 9 9 9 9 9 9 9 9 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = true +AnyTrue(c == a) = true +CountTrue(c == a) = 8 +FirstTrue(c == a) = 0 +matches = 0 1 2 3 4 5 6 7 +b = 0 0 0 0 0 0 0 0 +AllTrue(c == b) = false +AnyTrue(c == b) = false +CountTrue(c == b) = 0 +matches = +--- +a = 9 9 9 9 9 9 9 0 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 7 +FirstTrue(c == a) = 1 +matches = 1 2 3 4 5 6 7 +b = 0 0 0 0 0 0 0 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 1 +FirstTrue(c == b) = 0 +matches = 0 +--- +a = 9 9 9 9 9 9 0 0 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 6 +FirstTrue(c == a) = 2 +matches = 2 3 4 5 6 7 +b = 0 0 0 0 0 0 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 2 +FirstTrue(c == b) = 0 +matches = 0 1 +--- +a = 9 9 9 9 9 0 0 0 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 5 +FirstTrue(c == a) = 3 +matches = 3 4 5 6 7 +b = 0 0 0 0 0 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 3 +FirstTrue(c == b) = 0 +matches = 0 1 2 +--- +a = 9 9 9 9 0 0 0 0 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 4 +FirstTrue(c == a) = 4 +matches = 4 5 6 7 +b = 0 0 0 0 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 4 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 +--- +a = 9 9 9 0 0 0 0 0 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 3 +FirstTrue(c == a) = 5 +matches = 5 6 7 +b = 0 0 0 9 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 5 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 +--- +a = 9 9 0 0 0 0 0 0 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 2 +FirstTrue(c == a) = 6 +matches = 6 7 +b = 0 0 9 9 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 6 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 +--- +a = 9 0 0 0 0 0 0 0 +c = 9 9 9 9 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 1 +FirstTrue(c == a) = 7 +matches = 7 +b = 0 9 9 9 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 7 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 +--- +a = 1 2 3 4 5 6 7 8 +b = 1 4 2 4 5 7 7 9 +c = 9 9 9 9 9 9 9 9 +AllTrue(a == b) = false +AllTrue(a == a) = true +AnyTrue(a == b) = true +AnyTrue(a == c) = false +CountTrue(a == b) = 4 +FirstTrue(a == b) = 1 +=============== i32x4 +a = 9 9 9 9 +c = 9 9 9 9 +AllTrue(c == a) = true +AnyTrue(c == a) = true +CountTrue(c == a) = 4 +FirstTrue(c == a) = 0 +matches = 0 1 2 3 +b = 0 0 0 0 +AllTrue(c == b) = false +AnyTrue(c == b) = false +CountTrue(c == b) = 0 +matches = +--- +a = 9 9 9 0 +c = 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 3 +FirstTrue(c == a) = 1 +matches = 1 2 3 +b = 0 0 0 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 1 +FirstTrue(c == b) = 0 +matches = 0 +--- +a = 9 9 0 0 +c = 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 2 +FirstTrue(c == a) = 2 +matches = 2 3 +b = 0 0 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 2 +FirstTrue(c == b) = 0 +matches = 0 1 +--- +a = 9 0 0 0 +c = 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 1 +FirstTrue(c == a) = 3 +matches = 3 +b = 0 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 3 +FirstTrue(c == b) = 0 +matches = 0 1 2 +--- +a = 1 2 3 4 +b = 1 2 3 9 +c = 9 9 9 9 +AllTrue(a == b) = false +AllTrue(a == a) = true +AnyTrue(a == b) = true +AnyTrue(a == c) = false +CountTrue(a == b) = 3 +FirstTrue(a == b) = 1 +=============== i8x16 +a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = true +AnyTrue(c == a) = true +CountTrue(c == a) = 16 +FirstTrue(c == a) = 0 +matches = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0 +AllTrue(c == b) = false +AnyTrue(c == b) = false +CountTrue(c == b) = 0 +matches = +--- +a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 15 +FirstTrue(c == a) = 1 +matches = 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 1 +FirstTrue(c == b) = 0 +matches = 0 +--- +a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 14 +FirstTrue(c == a) = 2 +matches = 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 2 +FirstTrue(c == b) = 0 +matches = 0 1 +--- +a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 13 +FirstTrue(c == a) = 3 +matches = 3 4 5 6 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 3 +FirstTrue(c == b) = 0 +matches = 0 1 2 +--- +a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 12 +FirstTrue(c == a) = 4 +matches = 4 5 6 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 4 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 +--- +a = 9 9 9 9 . 9 9 9 9 . 9 9 9 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 11 +FirstTrue(c == a) = 5 +matches = 5 6 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 0 0 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 5 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 +--- +a = 9 9 9 9 . 9 9 9 9 . 9 9 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 10 +FirstTrue(c == a) = 6 +matches = 6 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 0 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 6 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 +--- +a = 9 9 9 9 . 9 9 9 9 . 9 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 9 +FirstTrue(c == a) = 7 +matches = 7 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 0 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 7 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 +--- +a = 9 9 9 9 . 9 9 9 9 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 8 +FirstTrue(c == a) = 8 +matches = 8 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 0 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 8 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 +--- +a = 9 9 9 9 . 9 9 9 0 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 7 +FirstTrue(c == a) = 9 +matches = 9 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 0 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 9 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 8 +--- +a = 9 9 9 9 . 9 9 0 0 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 6 +FirstTrue(c == a) = 10 +matches = 10 11 12 13 14 15 +b = 0 0 0 0 . 0 0 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 10 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 8 9 +--- +a = 9 9 9 9 . 9 0 0 0 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 5 +FirstTrue(c == a) = 11 +matches = 11 12 13 14 15 +b = 0 0 0 0 . 0 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 11 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 8 9 10 +--- +a = 9 9 9 9 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 4 +FirstTrue(c == a) = 12 +matches = 12 13 14 15 +b = 0 0 0 0 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 12 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 8 9 10 11 +--- +a = 9 9 9 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 3 +FirstTrue(c == a) = 13 +matches = 13 14 15 +b = 0 0 0 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 13 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 8 9 10 11 12 +--- +a = 9 9 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 2 +FirstTrue(c == a) = 14 +matches = 14 15 +b = 0 0 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 14 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 +--- +a = 9 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == a) = false +AnyTrue(c == a) = true +CountTrue(c == a) = 1 +FirstTrue(c == a) = 15 +matches = 15 +b = 0 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(c == b) = false +AnyTrue(c == b) = true +CountTrue(c == b) = 15 +FirstTrue(c == b) = 0 +matches = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 +--- +a = 1 2 3 4 . 5 6 7 8 . 1 2 3 4 . 5 6 7 8 +b = 1 4 2 4 . 5 7 7 0 . 1 4 2 4 . 5 7 7 9 +c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9 +AllTrue(a == b) = false +AllTrue(a == a) = true +AnyTrue(a == b) = true +AnyTrue(a == c) = false +CountTrue(a == b) = 8 +FirstTrue(a == b) = 1 diff --git a/autotest/SIMD_CMP/SIMD_CMP.cpp b/autotest/SIMD_CMP/SIMD_CMP.cpp new file mode 100644 index 000000000..042135f34 --- /dev/null +++ b/autotest/SIMD_CMP/SIMD_CMP.cpp @@ -0,0 +1,124 @@ +#include + +using namespace Upp; + +template +T Test() +{ + T c; + for(int i = 0; i < n; i++) { + W h[n], g[n], e[n]; + for(int j = 0; j < n; j++) { + h[j] = j < i ? 0 : 9; + g[j] = j < i ? 9 : 0; + e[j] = 9; + } + c.Load(e); + T a(h); + T b(g); + DDUMP(a); + DDUMP(c); + DDUMP(AllTrue(c == a)); + DDUMP(AnyTrue(c == a)); + DDUMP(CountTrue(c == a)); + if(AnyTrue(c == a)) + DDUMP(FirstTrue(c == a)); + String matches; + for(int j = 0; j < n; j++) + if(IsTrue(c == a, j)) + matches << j << ' '; + DDUMP(matches); + DDUMP(b); + DDUMP(AllTrue(c == b)); + DDUMP(AnyTrue(c == b)); + DDUMP(CountTrue(c == b)); + if(AnyTrue(c == b)) + DDUMP(FirstTrue(c == b)); + matches.Clear(); + for(int j = 0; j < n; j++) + if(IsTrue(c == b, j)) + matches << j << ' '; + DDUMP(matches); + DLOG("---"); + } + return c; +} + +CONSOLE_APP_MAIN +{ + StdLogSetup(LOG_COUT|LOG_FILE); + + // CheckCode(); + + // DoTest2(); return; + + DLOG("=============== f32x4"); + { + f32x4 c = Test(); + + f32x4 a(1, 2, 3, 4); + f32x4 b(1, 2, 3, 9); + + DDUMP(a); + DDUMP(b); + DDUMP(c); + DDUMP(AllTrue(a == b)); + DDUMP(AllTrue(a == a)); + DDUMP(AnyTrue(a == b)); + DDUMP(AnyTrue(a == c)); + DDUMP(CountTrue(a == b)); + DDUMP(FirstTrue(a == b)); + } + DLOG("=============== i16x8"); + { + i16x8 c = Test(); + + i16x8 a(1, 2, 3, 4, 5, 6, 7, 8); + i16x8 b(1, 4, 2, 4, 5, 7, 7, 9); + + DDUMP(a); + DDUMP(b); + DDUMP(c); + DDUMP(AllTrue(a == b)); + DDUMP(AllTrue(a == a)); + DDUMP(AnyTrue(a == b)); + DDUMP(AnyTrue(a == c)); + DDUMP(CountTrue(a == b)); + DDUMP(FirstTrue(a == b)); + } + DLOG("=============== i32x4"); + { + i32x4 c = Test(); + + i32x4 a(1, 2, 3, 4); + i32x4 b(1, 2, 3, 9); + + DDUMP(a); + DDUMP(b); + DDUMP(c); + DDUMP(AllTrue(a == b)); + DDUMP(AllTrue(a == a)); + DDUMP(AnyTrue(a == b)); + DDUMP(AnyTrue(a == c)); + DDUMP(CountTrue(a == b)); + DDUMP(FirstTrue(a == b)); + } + DLOG("=============== i8x16"); + { + i8x16 c = Test(); + + i8x16 a(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + i8x16 b(1, 4, 2, 4, 5, 7, 7, 0, 1, 4, 2, 4, 5, 7, 7, 9); + + DDUMP(a); + DDUMP(b); + DDUMP(c); + DDUMP(AllTrue(a == b)); + DDUMP(AllTrue(a == a)); + DDUMP(AnyTrue(a == b)); + DDUMP(AnyTrue(a == c)); + DDUMP(CountTrue(a == b)); + DDUMP(FirstTrue(a == b)); + } + CheckLogEtalon(); +} diff --git a/autotest/SIMD_CMP/SIMD_CMP.upp b/autotest/SIMD_CMP/SIMD_CMP.upp new file mode 100644 index 000000000..ea30b73fd --- /dev/null +++ b/autotest/SIMD_CMP/SIMD_CMP.upp @@ -0,0 +1,10 @@ +uses + Core; + +file + Etalon.log, + SIMD_CMP.cpp; + +mainconfig + "" = ""; + diff --git a/uppsrc/Core/SIMD_NEON.h b/uppsrc/Core/SIMD_NEON.h index 42557223b..bdc661b8c 100644 --- a/uppsrc/Core/SIMD_NEON.h +++ b/uppsrc/Core/SIMD_NEON.h @@ -2,6 +2,15 @@ using namespace Upp; +force_inline +uint64 cmask16__(uint16x8_t mask) { + uint8x8_t res = vshrn_n_u16(mask, 4); + uint64_t matches = vget_lane_u64(vreinterpret_u64_u8(res), 0); + return matches; +} + +const uint64 cmask_all__ = 0xffffffffffffffffull; + struct f32x4 { float32x4_t data; @@ -53,14 +62,12 @@ force_inline f32x4 operator>(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32 force_inline f32x4 operator<=(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32(vcleq_f32(a, b)); } force_inline f32x4 operator>=(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32(vcgeq_f32(a, b)); } -force_inline bool AllTrue(uint32x4_t v) { - uint32x2_t tmp = vand_u32(vget_low_u32(v), vget_high_u32(v)); - return vget_lane_u32(vpmin_u32(tmp, tmp), 0) == 0xffffffff;; -} - -force_inline bool AllTrue(f32x4 a) { - return AllTrue(vreinterpretq_u32_f32(a)); -} +force_inline uint64 cmaskf__(float32x4_t mask) { return cmask16__(vreinterpretq_u16_f32(mask)); } +force_inline bool AllTrue(f32x4 a) { return cmaskf__(a.data) == cmask_all__; } +force_inline bool AnyTrue(f32x4 a) { return cmaskf__(a.data); } +force_inline int CountTrue(f32x4 a) { return CountBits64(cmaskf__(a.data)) >> 4; } +force_inline int FirstTrue(f32x4 a) { return CountTrailingZeroBits64(cmaskf__(a.data)) >> 4; } +force_inline bool IsTrue(f32x4 a, int i) { return cmaskf__(a.data) & ((uint64)1 << (i << 4)); } force_inline f32x4 min(f32x4 a, f32x4 b) { return vminq_f32(a, b); } force_inline f32x4 max(f32x4 a, f32x4 b) { return vmaxq_f32(a, b); } @@ -122,9 +129,11 @@ force_inline i16x8 operator==(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16 force_inline i16x8 operator<(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16(vcltq_s16(a, b)); } force_inline i16x8 operator>(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16(vcgtq_s16(a, b)); } -force_inline bool AllTrue(i16x8 a) { - return AllTrue(vreinterpretq_u32_s16(a)); -} +force_inline bool AllTrue(i16x8 a) { return cmask16__(a.data) == cmask_all__; } +force_inline bool AnyTrue(i16x8 a) { return cmask16__(a.data); } +force_inline int CountTrue(i16x8 a) { return CountBits64(cmask16__(a.data)) >> 3; } +force_inline int FirstTrue(i16x8 a) { return CountTrailingZeroBits64(cmask16__(a.data)) >> 3; } +force_inline bool IsTrue(i16x8 a, int i) { return cmask16__(a.data) & ((uint64)1 << (i << 3)); } struct i32x4 { // 4xint32 int32x4_t data; @@ -176,9 +185,12 @@ force_inline i32x4 operator==(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32 force_inline i32x4 operator<(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32(vcltq_s32(a, b)); } force_inline i32x4 operator>(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32(vcgtq_s32(a, b)); } -force_inline bool AllTrue(i32x4 a) { - return AllTrue(vreinterpretq_u32_s32(a)); -} +force_inline uint64 cmask32__(uint32x4_t mask) { return cmask16__(vreinterpretq_u16_u32(mask)); } +force_inline bool AllTrue(i32x4 a) { return cmask32__(a.data) == cmask_all__; } +force_inline bool AnyTrue(i32x4 a) { return cmask32__(a.data); } +force_inline int CountTrue(i32x4 a) { return CountBits64(cmask32__(a.data)) >> 4; } +force_inline int FirstTrue(i32x4 a) { return CountTrailingZeroBits64(cmask32__(a.data)) >> 4; } +force_inline bool IsTrue(i32x4 a, int i) { return cmask32__(a.data) & ((uint64)1 << (i << 4)); } struct i8x16 { // 16*int8 int8x16_t data; @@ -226,10 +238,16 @@ force_inline i8x16 operator^(i8x16 a, i8x16 b) { return veorq_s8(a, b); } force_inline i8x16& operator^=(i8x16& a, i8x16 b) { return a = a ^ b; } force_inline i8x16 operator~(i8x16 a) { return vmvnq_s8(a); } -force_inline i8x16 operator==(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vceqq_s8(a, b)); } -force_inline i8x16 operator<(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcltq_s8(a, b)); } -force_inline i8x16 operator>(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcgtq_s8(a, b)); } -force_inline bool AllTrue(i8x16 a) { return AllTrue(vreinterpretq_u32_s8(a)); } +force_inline i8x16 operator==(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vceqq_s8(a, b)); } +force_inline i8x16 operator<(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcltq_s8(a, b)); } +force_inline i8x16 operator>(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcgtq_s8(a, b)); } + +force_inline uint64 cmask8__(uint8x16_t mask) { return cmask16__(vreinterpretq_u16_u8(mask)); } +force_inline bool AllTrue(i8x16 a) { return cmask8__(a.data) == cmask_all__; } +force_inline bool AnyTrue(i8x16 a) { return cmask8__(a.data); } +force_inline int CountTrue(i8x16 a) { return CountBits64(cmask8__(a.data)) >> 2; } +force_inline int FirstTrue(i8x16 a) { return CountTrailingZeroBits64(cmask8__(a.data)) >> 2; } +force_inline bool IsTrue(i8x16 a, int i) { return cmask8__(a.data) & ((uint64)1 << (i << 2)); } force_inline f32x4 ToFloat(i32x4 a) { return vcvtq_f32_s32(a); } force_inline i32x4 Truncate(f32x4 a) { return vcvtq_s32_f32(a); } diff --git a/uppsrc/Core/SIMD_SSE2.h b/uppsrc/Core/SIMD_SSE2.h index 8335008db..37387d793 100644 --- a/uppsrc/Core/SIMD_SSE2.h +++ b/uppsrc/Core/SIMD_SSE2.h @@ -38,6 +38,10 @@ force_inline f32x4 operator>(f32x4 a, f32x4 b) { return _mm_cmpgt_ps(a.data, force_inline f32x4 operator<=(f32x4 a, f32x4 b) { return _mm_cmple_ps(a.data, b.data); } force_inline f32x4 operator>=(f32x4 a, f32x4 b) { return _mm_cmpge_ps(a.data, b.data); } force_inline bool AllTrue(f32x4 a) { return _mm_movemask_ps(a.data) == 0xf; } +force_inline bool AnyTrue(f32x4 a) { return _mm_movemask_ps(a.data); } +force_inline int CountTrue(f32x4 a) { return CountBits(_mm_movemask_ps(a.data)); } +force_inline int FirstTrue(f32x4 a) { return CountTrailingZeroBits(_mm_movemask_ps(a.data)); } +force_inline bool IsTrue(f32x4 a, int i) { return _mm_movemask_ps(a.data) & (1 << i); } force_inline f32x4 min(f32x4 a, f32x4 b) { return _mm_min_ps(a.data, b.data); } force_inline f32x4 max(f32x4 a, f32x4 b) { return _mm_max_ps(a.data, b.data); } @@ -95,6 +99,10 @@ force_inline i16x8 operator==(i16x8 a, i16x8 b) { return _mm_cmpeq_epi16(a.da force_inline i16x8 operator<(i16x8 a, i16x8 b) { return _mm_cmplt_epi16(a.data, b.data); } force_inline i16x8 operator>(i16x8 a, i16x8 b) { return _mm_cmpgt_epi16(a.data, b.data); } force_inline bool AllTrue(i16x8 a) { return _mm_movemask_epi8(a.data) == 0xffff; } +force_inline bool AnyTrue(i16x8 a) { return _mm_movemask_epi8(a.data); } +force_inline int CountTrue(i16x8 a) { return CountBits(_mm_movemask_epi8(a.data)) >> 1; } +force_inline int FirstTrue(i16x8 a) { return CountTrailingZeroBits(_mm_movemask_epi8(a.data)) >> 1; } +force_inline int IsTrue(i16x8 a, int i) { return _mm_movemask_epi8(a.data) & (1 << 2 * i); } struct i32x4 : i16x8 { // 4xint32 i32x4() {} @@ -129,6 +137,10 @@ force_inline i32x4 operator==(i32x4 a, i32x4 b) { return _mm_cmpeq_epi32(a.da force_inline i32x4 operator<(i32x4 a, i32x4 b) { return _mm_cmplt_epi32(a.data, b.data); } force_inline i32x4 operator>(i32x4 a, i32x4 b) { return _mm_cmpgt_epi32(a.data, b.data); } force_inline bool AllTrue(i32x4 a) { return _mm_movemask_epi8(a.data) == 0xffff; } +force_inline bool AnyTrue(i32x4 a) { return _mm_movemask_ps(_mm_castsi128_ps(a.data)); } +force_inline int CountTrue(i32x4 a) { return CountBits(_mm_movemask_ps(_mm_castsi128_ps(a.data))); } +force_inline int FirstTrue(i32x4 a) { return CountTrailingZeroBits(_mm_movemask_ps(_mm_castsi128_ps(a.data))); } +force_inline bool IsTrue(i32x4 a, int i) { return _mm_movemask_ps(_mm_castsi128_ps(a.data)) & (1 << i); } struct i8x16 : i16x8 { // 16xint8 i8x16() {} @@ -158,6 +170,10 @@ force_inline i8x16 operator==(i8x16 a, i8x16 b) { return _mm_cmpeq_epi8(a.dat force_inline i8x16 operator<(i8x16 a, i8x16 b) { return _mm_cmplt_epi8(a.data, b.data); } force_inline i8x16 operator>(i8x16 a, i8x16 b) { return _mm_cmpgt_epi8(a.data, b.data); } force_inline bool AllTrue(i8x16 a) { return _mm_movemask_epi8(a.data) == 0xffff; } +force_inline bool AnyTrue(i8x16 a) { return _mm_movemask_epi8(a.data); } +force_inline int CountTrue(i8x16 a) { return CountBits(_mm_movemask_epi8(a.data)); } +force_inline int FirstTrue(i8x16 a) { return CountTrailingZeroBits(_mm_movemask_epi8(a.data)); } +force_inline bool IsTrue(i8x16 a, int i) { return _mm_movemask_epi8(a.data) & (1 << i); } force_inline f32x4 ToFloat(i32x4 a) { return _mm_cvtepi32_ps(a.data); } force_inline i32x4 Truncate(f32x4 a) { return _mm_cvttps_epi32(a.data); }