Core: SIMD AnyTrue, AllTrue, CountTrue, FirstTrue, IsTrue

This commit is contained in:
Mirek Fidler 2025-04-03 10:41:08 +02:00
parent 46fcfdc092
commit 2ae4a30077
5 changed files with 672 additions and 18 deletions

View file

@ -0,0 +1,486 @@
* /Users/cxl/out/CLANG.Debug.Debug_Full.Shared/SIMD_CMP 02.04.2025 12:04:20, user: cxl
=============== f32x4
a = 9 9 9 9
c = 9 9 9 9
AllTrue(c == a) = true
AnyTrue(c == a) = true
CountTrue(c == a) = 4
FirstTrue(c == a) = 0
matches = 0 1 2 3
b = 0 0 0 0
AllTrue(c == b) = false
AnyTrue(c == b) = false
CountTrue(c == b) = 0
matches =
---
a = 9 9 9 0
c = 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 3
FirstTrue(c == a) = 1
matches = 1 2 3
b = 0 0 0 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 1
FirstTrue(c == b) = 0
matches = 0
---
a = 9 9 0 0
c = 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 2
FirstTrue(c == a) = 2
matches = 2 3
b = 0 0 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 2
FirstTrue(c == b) = 0
matches = 0 1
---
a = 9 0 0 0
c = 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 1
FirstTrue(c == a) = 3
matches = 3
b = 0 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 3
FirstTrue(c == b) = 0
matches = 0 1 2
---
a = 1 2 3 4
b = 1 2 3 9
c = 9 9 9 9
AllTrue(a == b) = false
AllTrue(a == a) = true
AnyTrue(a == b) = true
AnyTrue(a == c) = false
CountTrue(a == b) = 3
FirstTrue(a == b) = 1
=============== i16x8
a = 9 9 9 9 9 9 9 9
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = true
AnyTrue(c == a) = true
CountTrue(c == a) = 8
FirstTrue(c == a) = 0
matches = 0 1 2 3 4 5 6 7
b = 0 0 0 0 0 0 0 0
AllTrue(c == b) = false
AnyTrue(c == b) = false
CountTrue(c == b) = 0
matches =
---
a = 9 9 9 9 9 9 9 0
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 7
FirstTrue(c == a) = 1
matches = 1 2 3 4 5 6 7
b = 0 0 0 0 0 0 0 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 1
FirstTrue(c == b) = 0
matches = 0
---
a = 9 9 9 9 9 9 0 0
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 6
FirstTrue(c == a) = 2
matches = 2 3 4 5 6 7
b = 0 0 0 0 0 0 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 2
FirstTrue(c == b) = 0
matches = 0 1
---
a = 9 9 9 9 9 0 0 0
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 5
FirstTrue(c == a) = 3
matches = 3 4 5 6 7
b = 0 0 0 0 0 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 3
FirstTrue(c == b) = 0
matches = 0 1 2
---
a = 9 9 9 9 0 0 0 0
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 4
FirstTrue(c == a) = 4
matches = 4 5 6 7
b = 0 0 0 0 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 4
FirstTrue(c == b) = 0
matches = 0 1 2 3
---
a = 9 9 9 0 0 0 0 0
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 3
FirstTrue(c == a) = 5
matches = 5 6 7
b = 0 0 0 9 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 5
FirstTrue(c == b) = 0
matches = 0 1 2 3 4
---
a = 9 9 0 0 0 0 0 0
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 2
FirstTrue(c == a) = 6
matches = 6 7
b = 0 0 9 9 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 6
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5
---
a = 9 0 0 0 0 0 0 0
c = 9 9 9 9 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 1
FirstTrue(c == a) = 7
matches = 7
b = 0 9 9 9 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 7
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6
---
a = 1 2 3 4 5 6 7 8
b = 1 4 2 4 5 7 7 9
c = 9 9 9 9 9 9 9 9
AllTrue(a == b) = false
AllTrue(a == a) = true
AnyTrue(a == b) = true
AnyTrue(a == c) = false
CountTrue(a == b) = 4
FirstTrue(a == b) = 1
=============== i32x4
a = 9 9 9 9
c = 9 9 9 9
AllTrue(c == a) = true
AnyTrue(c == a) = true
CountTrue(c == a) = 4
FirstTrue(c == a) = 0
matches = 0 1 2 3
b = 0 0 0 0
AllTrue(c == b) = false
AnyTrue(c == b) = false
CountTrue(c == b) = 0
matches =
---
a = 9 9 9 0
c = 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 3
FirstTrue(c == a) = 1
matches = 1 2 3
b = 0 0 0 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 1
FirstTrue(c == b) = 0
matches = 0
---
a = 9 9 0 0
c = 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 2
FirstTrue(c == a) = 2
matches = 2 3
b = 0 0 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 2
FirstTrue(c == b) = 0
matches = 0 1
---
a = 9 0 0 0
c = 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 1
FirstTrue(c == a) = 3
matches = 3
b = 0 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 3
FirstTrue(c == b) = 0
matches = 0 1 2
---
a = 1 2 3 4
b = 1 2 3 9
c = 9 9 9 9
AllTrue(a == b) = false
AllTrue(a == a) = true
AnyTrue(a == b) = true
AnyTrue(a == c) = false
CountTrue(a == b) = 3
FirstTrue(a == b) = 1
=============== i8x16
a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = true
AnyTrue(c == a) = true
CountTrue(c == a) = 16
FirstTrue(c == a) = 0
matches = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0
AllTrue(c == b) = false
AnyTrue(c == b) = false
CountTrue(c == b) = 0
matches =
---
a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 15
FirstTrue(c == a) = 1
matches = 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 1
FirstTrue(c == b) = 0
matches = 0
---
a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 14
FirstTrue(c == a) = 2
matches = 2 3 4 5 6 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 2
FirstTrue(c == b) = 0
matches = 0 1
---
a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 13
FirstTrue(c == a) = 3
matches = 3 4 5 6 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 3
FirstTrue(c == b) = 0
matches = 0 1 2
---
a = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 12
FirstTrue(c == a) = 4
matches = 4 5 6 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 0 0 0 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 4
FirstTrue(c == b) = 0
matches = 0 1 2 3
---
a = 9 9 9 9 . 9 9 9 9 . 9 9 9 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 11
FirstTrue(c == a) = 5
matches = 5 6 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 0 0 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 5
FirstTrue(c == b) = 0
matches = 0 1 2 3 4
---
a = 9 9 9 9 . 9 9 9 9 . 9 9 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 10
FirstTrue(c == a) = 6
matches = 6 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 0 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 6
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5
---
a = 9 9 9 9 . 9 9 9 9 . 9 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 9
FirstTrue(c == a) = 7
matches = 7 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 0 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 7
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6
---
a = 9 9 9 9 . 9 9 9 9 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 8
FirstTrue(c == a) = 8
matches = 8 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 0 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 8
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7
---
a = 9 9 9 9 . 9 9 9 0 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 7
FirstTrue(c == a) = 9
matches = 9 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 0 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 9
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7 8
---
a = 9 9 9 9 . 9 9 0 0 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 6
FirstTrue(c == a) = 10
matches = 10 11 12 13 14 15
b = 0 0 0 0 . 0 0 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 10
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7 8 9
---
a = 9 9 9 9 . 9 0 0 0 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 5
FirstTrue(c == a) = 11
matches = 11 12 13 14 15
b = 0 0 0 0 . 0 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 11
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7 8 9 10
---
a = 9 9 9 9 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 4
FirstTrue(c == a) = 12
matches = 12 13 14 15
b = 0 0 0 0 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 12
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7 8 9 10 11
---
a = 9 9 9 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 3
FirstTrue(c == a) = 13
matches = 13 14 15
b = 0 0 0 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 13
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7 8 9 10 11 12
---
a = 9 9 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 2
FirstTrue(c == a) = 14
matches = 14 15
b = 0 0 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 14
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7 8 9 10 11 12 13
---
a = 9 0 0 0 . 0 0 0 0 . 0 0 0 0 . 0 0 0 0
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == a) = false
AnyTrue(c == a) = true
CountTrue(c == a) = 1
FirstTrue(c == a) = 15
matches = 15
b = 0 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(c == b) = false
AnyTrue(c == b) = true
CountTrue(c == b) = 15
FirstTrue(c == b) = 0
matches = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
---
a = 1 2 3 4 . 5 6 7 8 . 1 2 3 4 . 5 6 7 8
b = 1 4 2 4 . 5 7 7 0 . 1 4 2 4 . 5 7 7 9
c = 9 9 9 9 . 9 9 9 9 . 9 9 9 9 . 9 9 9 9
AllTrue(a == b) = false
AllTrue(a == a) = true
AnyTrue(a == b) = true
AnyTrue(a == c) = false
CountTrue(a == b) = 8
FirstTrue(a == b) = 1

View file

@ -0,0 +1,124 @@
#include <Core/Core.h>
using namespace Upp;
template <class T, class W, int n>
T Test()
{
T c;
for(int i = 0; i < n; i++) {
W h[n], g[n], e[n];
for(int j = 0; j < n; j++) {
h[j] = j < i ? 0 : 9;
g[j] = j < i ? 9 : 0;
e[j] = 9;
}
c.Load(e);
T a(h);
T b(g);
DDUMP(a);
DDUMP(c);
DDUMP(AllTrue(c == a));
DDUMP(AnyTrue(c == a));
DDUMP(CountTrue(c == a));
if(AnyTrue(c == a))
DDUMP(FirstTrue(c == a));
String matches;
for(int j = 0; j < n; j++)
if(IsTrue(c == a, j))
matches << j << ' ';
DDUMP(matches);
DDUMP(b);
DDUMP(AllTrue(c == b));
DDUMP(AnyTrue(c == b));
DDUMP(CountTrue(c == b));
if(AnyTrue(c == b))
DDUMP(FirstTrue(c == b));
matches.Clear();
for(int j = 0; j < n; j++)
if(IsTrue(c == b, j))
matches << j << ' ';
DDUMP(matches);
DLOG("---");
}
return c;
}
CONSOLE_APP_MAIN
{
StdLogSetup(LOG_COUT|LOG_FILE);
// CheckCode();
// DoTest2(); return;
DLOG("=============== f32x4");
{
f32x4 c = Test<f32x4, float, 4>();
f32x4 a(1, 2, 3, 4);
f32x4 b(1, 2, 3, 9);
DDUMP(a);
DDUMP(b);
DDUMP(c);
DDUMP(AllTrue(a == b));
DDUMP(AllTrue(a == a));
DDUMP(AnyTrue(a == b));
DDUMP(AnyTrue(a == c));
DDUMP(CountTrue(a == b));
DDUMP(FirstTrue(a == b));
}
DLOG("=============== i16x8");
{
i16x8 c = Test<i16x8, word, 8>();
i16x8 a(1, 2, 3, 4, 5, 6, 7, 8);
i16x8 b(1, 4, 2, 4, 5, 7, 7, 9);
DDUMP(a);
DDUMP(b);
DDUMP(c);
DDUMP(AllTrue(a == b));
DDUMP(AllTrue(a == a));
DDUMP(AnyTrue(a == b));
DDUMP(AnyTrue(a == c));
DDUMP(CountTrue(a == b));
DDUMP(FirstTrue(a == b));
}
DLOG("=============== i32x4");
{
i32x4 c = Test<i32x4, dword, 4>();
i32x4 a(1, 2, 3, 4);
i32x4 b(1, 2, 3, 9);
DDUMP(a);
DDUMP(b);
DDUMP(c);
DDUMP(AllTrue(a == b));
DDUMP(AllTrue(a == a));
DDUMP(AnyTrue(a == b));
DDUMP(AnyTrue(a == c));
DDUMP(CountTrue(a == b));
DDUMP(FirstTrue(a == b));
}
DLOG("=============== i8x16");
{
i8x16 c = Test<i8x16, byte, 16>();
i8x16 a(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
i8x16 b(1, 4, 2, 4, 5, 7, 7, 0, 1, 4, 2, 4, 5, 7, 7, 9);
DDUMP(a);
DDUMP(b);
DDUMP(c);
DDUMP(AllTrue(a == b));
DDUMP(AllTrue(a == a));
DDUMP(AnyTrue(a == b));
DDUMP(AnyTrue(a == c));
DDUMP(CountTrue(a == b));
DDUMP(FirstTrue(a == b));
}
CheckLogEtalon();
}

View file

@ -0,0 +1,10 @@
uses
Core;
file
Etalon.log,
SIMD_CMP.cpp;
mainconfig
"" = "";

View file

@ -2,6 +2,15 @@
using namespace Upp; using namespace Upp;
force_inline
uint64 cmask16__(uint16x8_t mask) {
uint8x8_t res = vshrn_n_u16(mask, 4);
uint64_t matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
return matches;
}
const uint64 cmask_all__ = 0xffffffffffffffffull;
struct f32x4 { struct f32x4 {
float32x4_t data; float32x4_t data;
@ -53,14 +62,12 @@ force_inline f32x4 operator>(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32
force_inline f32x4 operator<=(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32(vcleq_f32(a, b)); } force_inline f32x4 operator<=(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32(vcleq_f32(a, b)); }
force_inline f32x4 operator>=(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32(vcgeq_f32(a, b)); } force_inline f32x4 operator>=(f32x4 a, f32x4 b) { return vreinterpretq_f32_u32(vcgeq_f32(a, b)); }
force_inline bool AllTrue(uint32x4_t v) { force_inline uint64 cmaskf__(float32x4_t mask) { return cmask16__(vreinterpretq_u16_f32(mask)); }
uint32x2_t tmp = vand_u32(vget_low_u32(v), vget_high_u32(v)); force_inline bool AllTrue(f32x4 a) { return cmaskf__(a.data) == cmask_all__; }
return vget_lane_u32(vpmin_u32(tmp, tmp), 0) == 0xffffffff;; force_inline bool AnyTrue(f32x4 a) { return cmaskf__(a.data); }
} force_inline int CountTrue(f32x4 a) { return CountBits64(cmaskf__(a.data)) >> 4; }
force_inline int FirstTrue(f32x4 a) { return CountTrailingZeroBits64(cmaskf__(a.data)) >> 4; }
force_inline bool AllTrue(f32x4 a) { force_inline bool IsTrue(f32x4 a, int i) { return cmaskf__(a.data) & ((uint64)1 << (i << 4)); }
return AllTrue(vreinterpretq_u32_f32(a));
}
force_inline f32x4 min(f32x4 a, f32x4 b) { return vminq_f32(a, b); } force_inline f32x4 min(f32x4 a, f32x4 b) { return vminq_f32(a, b); }
force_inline f32x4 max(f32x4 a, f32x4 b) { return vmaxq_f32(a, b); } force_inline f32x4 max(f32x4 a, f32x4 b) { return vmaxq_f32(a, b); }
@ -122,9 +129,11 @@ force_inline i16x8 operator==(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16
force_inline i16x8 operator<(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16(vcltq_s16(a, b)); } force_inline i16x8 operator<(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16(vcltq_s16(a, b)); }
force_inline i16x8 operator>(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16(vcgtq_s16(a, b)); } force_inline i16x8 operator>(i16x8 a, i16x8 b) { return vreinterpretq_s16_u16(vcgtq_s16(a, b)); }
force_inline bool AllTrue(i16x8 a) { force_inline bool AllTrue(i16x8 a) { return cmask16__(a.data) == cmask_all__; }
return AllTrue(vreinterpretq_u32_s16(a)); force_inline bool AnyTrue(i16x8 a) { return cmask16__(a.data); }
} force_inline int CountTrue(i16x8 a) { return CountBits64(cmask16__(a.data)) >> 3; }
force_inline int FirstTrue(i16x8 a) { return CountTrailingZeroBits64(cmask16__(a.data)) >> 3; }
force_inline bool IsTrue(i16x8 a, int i) { return cmask16__(a.data) & ((uint64)1 << (i << 3)); }
struct i32x4 { // 4xint32 struct i32x4 { // 4xint32
int32x4_t data; int32x4_t data;
@ -176,9 +185,12 @@ force_inline i32x4 operator==(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32
force_inline i32x4 operator<(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32(vcltq_s32(a, b)); } force_inline i32x4 operator<(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32(vcltq_s32(a, b)); }
force_inline i32x4 operator>(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32(vcgtq_s32(a, b)); } force_inline i32x4 operator>(i32x4 a, i32x4 b) { return vreinterpretq_s32_u32(vcgtq_s32(a, b)); }
force_inline bool AllTrue(i32x4 a) { force_inline uint64 cmask32__(uint32x4_t mask) { return cmask16__(vreinterpretq_u16_u32(mask)); }
return AllTrue(vreinterpretq_u32_s32(a)); force_inline bool AllTrue(i32x4 a) { return cmask32__(a.data) == cmask_all__; }
} force_inline bool AnyTrue(i32x4 a) { return cmask32__(a.data); }
force_inline int CountTrue(i32x4 a) { return CountBits64(cmask32__(a.data)) >> 4; }
force_inline int FirstTrue(i32x4 a) { return CountTrailingZeroBits64(cmask32__(a.data)) >> 4; }
force_inline bool IsTrue(i32x4 a, int i) { return cmask32__(a.data) & ((uint64)1 << (i << 4)); }
struct i8x16 { // 16*int8 struct i8x16 { // 16*int8
int8x16_t data; int8x16_t data;
@ -226,10 +238,16 @@ force_inline i8x16 operator^(i8x16 a, i8x16 b) { return veorq_s8(a, b); }
force_inline i8x16& operator^=(i8x16& a, i8x16 b) { return a = a ^ b; } force_inline i8x16& operator^=(i8x16& a, i8x16 b) { return a = a ^ b; }
force_inline i8x16 operator~(i8x16 a) { return vmvnq_s8(a); } force_inline i8x16 operator~(i8x16 a) { return vmvnq_s8(a); }
force_inline i8x16 operator==(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vceqq_s8(a, b)); } force_inline i8x16 operator==(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vceqq_s8(a, b)); }
force_inline i8x16 operator<(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcltq_s8(a, b)); } force_inline i8x16 operator<(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcltq_s8(a, b)); }
force_inline i8x16 operator>(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcgtq_s8(a, b)); } force_inline i8x16 operator>(i8x16 a, i8x16 b) { return vreinterpretq_s8_u8(vcgtq_s8(a, b)); }
force_inline bool AllTrue(i8x16 a) { return AllTrue(vreinterpretq_u32_s8(a)); }
force_inline uint64 cmask8__(uint8x16_t mask) { return cmask16__(vreinterpretq_u16_u8(mask)); }
force_inline bool AllTrue(i8x16 a) { return cmask8__(a.data) == cmask_all__; }
force_inline bool AnyTrue(i8x16 a) { return cmask8__(a.data); }
force_inline int CountTrue(i8x16 a) { return CountBits64(cmask8__(a.data)) >> 2; }
force_inline int FirstTrue(i8x16 a) { return CountTrailingZeroBits64(cmask8__(a.data)) >> 2; }
force_inline bool IsTrue(i8x16 a, int i) { return cmask8__(a.data) & ((uint64)1 << (i << 2)); }
force_inline f32x4 ToFloat(i32x4 a) { return vcvtq_f32_s32(a); } force_inline f32x4 ToFloat(i32x4 a) { return vcvtq_f32_s32(a); }
force_inline i32x4 Truncate(f32x4 a) { return vcvtq_s32_f32(a); } force_inline i32x4 Truncate(f32x4 a) { return vcvtq_s32_f32(a); }

View file

@ -38,6 +38,10 @@ force_inline f32x4 operator>(f32x4 a, f32x4 b) { return _mm_cmpgt_ps(a.data,
force_inline f32x4 operator<=(f32x4 a, f32x4 b) { return _mm_cmple_ps(a.data, b.data); } force_inline f32x4 operator<=(f32x4 a, f32x4 b) { return _mm_cmple_ps(a.data, b.data); }
force_inline f32x4 operator>=(f32x4 a, f32x4 b) { return _mm_cmpge_ps(a.data, b.data); } force_inline f32x4 operator>=(f32x4 a, f32x4 b) { return _mm_cmpge_ps(a.data, b.data); }
force_inline bool AllTrue(f32x4 a) { return _mm_movemask_ps(a.data) == 0xf; } force_inline bool AllTrue(f32x4 a) { return _mm_movemask_ps(a.data) == 0xf; }
force_inline bool AnyTrue(f32x4 a) { return _mm_movemask_ps(a.data); }
force_inline int CountTrue(f32x4 a) { return CountBits(_mm_movemask_ps(a.data)); }
force_inline int FirstTrue(f32x4 a) { return CountTrailingZeroBits(_mm_movemask_ps(a.data)); }
force_inline bool IsTrue(f32x4 a, int i) { return _mm_movemask_ps(a.data) & (1 << i); }
force_inline f32x4 min(f32x4 a, f32x4 b) { return _mm_min_ps(a.data, b.data); } force_inline f32x4 min(f32x4 a, f32x4 b) { return _mm_min_ps(a.data, b.data); }
force_inline f32x4 max(f32x4 a, f32x4 b) { return _mm_max_ps(a.data, b.data); } force_inline f32x4 max(f32x4 a, f32x4 b) { return _mm_max_ps(a.data, b.data); }
@ -95,6 +99,10 @@ force_inline i16x8 operator==(i16x8 a, i16x8 b) { return _mm_cmpeq_epi16(a.da
force_inline i16x8 operator<(i16x8 a, i16x8 b) { return _mm_cmplt_epi16(a.data, b.data); } force_inline i16x8 operator<(i16x8 a, i16x8 b) { return _mm_cmplt_epi16(a.data, b.data); }
force_inline i16x8 operator>(i16x8 a, i16x8 b) { return _mm_cmpgt_epi16(a.data, b.data); } force_inline i16x8 operator>(i16x8 a, i16x8 b) { return _mm_cmpgt_epi16(a.data, b.data); }
force_inline bool AllTrue(i16x8 a) { return _mm_movemask_epi8(a.data) == 0xffff; } force_inline bool AllTrue(i16x8 a) { return _mm_movemask_epi8(a.data) == 0xffff; }
force_inline bool AnyTrue(i16x8 a) { return _mm_movemask_epi8(a.data); }
force_inline int CountTrue(i16x8 a) { return CountBits(_mm_movemask_epi8(a.data)) >> 1; }
force_inline int FirstTrue(i16x8 a) { return CountTrailingZeroBits(_mm_movemask_epi8(a.data)) >> 1; }
force_inline int IsTrue(i16x8 a, int i) { return _mm_movemask_epi8(a.data) & (1 << 2 * i); }
struct i32x4 : i16x8 { // 4xint32 struct i32x4 : i16x8 { // 4xint32
i32x4() {} i32x4() {}
@ -129,6 +137,10 @@ force_inline i32x4 operator==(i32x4 a, i32x4 b) { return _mm_cmpeq_epi32(a.da
force_inline i32x4 operator<(i32x4 a, i32x4 b) { return _mm_cmplt_epi32(a.data, b.data); } force_inline i32x4 operator<(i32x4 a, i32x4 b) { return _mm_cmplt_epi32(a.data, b.data); }
force_inline i32x4 operator>(i32x4 a, i32x4 b) { return _mm_cmpgt_epi32(a.data, b.data); } force_inline i32x4 operator>(i32x4 a, i32x4 b) { return _mm_cmpgt_epi32(a.data, b.data); }
force_inline bool AllTrue(i32x4 a) { return _mm_movemask_epi8(a.data) == 0xffff; } force_inline bool AllTrue(i32x4 a) { return _mm_movemask_epi8(a.data) == 0xffff; }
force_inline bool AnyTrue(i32x4 a) { return _mm_movemask_ps(_mm_castsi128_ps(a.data)); }
force_inline int CountTrue(i32x4 a) { return CountBits(_mm_movemask_ps(_mm_castsi128_ps(a.data))); }
force_inline int FirstTrue(i32x4 a) { return CountTrailingZeroBits(_mm_movemask_ps(_mm_castsi128_ps(a.data))); }
force_inline bool IsTrue(i32x4 a, int i) { return _mm_movemask_ps(_mm_castsi128_ps(a.data)) & (1 << i); }
struct i8x16 : i16x8 { // 16xint8 struct i8x16 : i16x8 { // 16xint8
i8x16() {} i8x16() {}
@ -158,6 +170,10 @@ force_inline i8x16 operator==(i8x16 a, i8x16 b) { return _mm_cmpeq_epi8(a.dat
force_inline i8x16 operator<(i8x16 a, i8x16 b) { return _mm_cmplt_epi8(a.data, b.data); } force_inline i8x16 operator<(i8x16 a, i8x16 b) { return _mm_cmplt_epi8(a.data, b.data); }
force_inline i8x16 operator>(i8x16 a, i8x16 b) { return _mm_cmpgt_epi8(a.data, b.data); } force_inline i8x16 operator>(i8x16 a, i8x16 b) { return _mm_cmpgt_epi8(a.data, b.data); }
force_inline bool AllTrue(i8x16 a) { return _mm_movemask_epi8(a.data) == 0xffff; } force_inline bool AllTrue(i8x16 a) { return _mm_movemask_epi8(a.data) == 0xffff; }
force_inline bool AnyTrue(i8x16 a) { return _mm_movemask_epi8(a.data); }
force_inline int CountTrue(i8x16 a) { return CountBits(_mm_movemask_epi8(a.data)); }
force_inline int FirstTrue(i8x16 a) { return CountTrailingZeroBits(_mm_movemask_epi8(a.data)); }
force_inline bool IsTrue(i8x16 a, int i) { return _mm_movemask_epi8(a.data) & (1 << i); }
force_inline f32x4 ToFloat(i32x4 a) { return _mm_cvtepi32_ps(a.data); } force_inline f32x4 ToFloat(i32x4 a) { return _mm_cvtepi32_ps(a.data); }
force_inline i32x4 Truncate(f32x4 a) { return _mm_cvttps_epi32(a.data); } force_inline i32x4 Truncate(f32x4 a) { return _mm_cvttps_epi32(a.data); }