.benchmarks

This commit is contained in:
Mirek Fidler 2023-11-11 14:32:46 +01:00
parent 867efb4927
commit e385d0bb48
4 changed files with 86 additions and 4 deletions

View file

@ -2,7 +2,8 @@ uses
CtrlLib; CtrlLib;
file file
main.cpp; main.cpp,
info.txt;
mainconfig mainconfig
"" = "GUI"; "" = "GUI";

View file

@ -0,0 +1,7 @@
TIMING Blend : 435.99 ms - 435.99 us (436.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
TIMING Stroke : 236.99 ms - 236.99 us (237.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
TIMING Fill : 471.99 ms - 471.99 us (472.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
TIMING Rect : 596.99 ms - 596.99 us (597.00 ms / 1000 ), min: 0.00 ns, max: 2.00 ms, nesting: 0 - 1000
TIMING Clear 2 : 704.99 ms - 704.99 us (705.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
TIMING Clear : 604.99 ms - 604.99 us (605.00 ms / 1000 ), min: 0.00 ns, max: 2.00 ms, nesting: 0 - 1000

View file

@ -2,10 +2,60 @@
using namespace Upp; using namespace Upp;
Buffer<byte> h(1024*1024*32);
const byte *h1 = h;
CONSOLE_APP_MAIN CONSOLE_APP_MAIN
{ {
Buffer<byte> h(1024*1024*30); for(int i = 0; i < 1000; i++) {
{
RTIMING("32MB memset");
memset(h, 0, 1024*1024 * 32);
}
{
RTIMING("32MB memset8");
memset8(h, 0, 1024*1024 * 32);
}
{
RTIMING("32MB memcpy");
memcpy(h, h1, 1024*1024 * 32);
}
{
RTIMING("32MB memcpy8");
memcpy8(h, h, 1024*1024 * 32);
}
}
for(int i = 0; i < 100000000; i++) {
{
RTIMING("32B memset");
memset(h, 0, 32);
}
{
RTIMING("32B memset8");
memset8(h, 0, 32);
}
{
RTIMING("32B memset32");
memset32(h, 31525874, 32 / 4);
}
{
RTIMING("32B loop 32");
dword *s = (dword *)~h;
dword *e = s + 32 / 4;
while(s < e)
*s++ = 31515927;
}
{
RTIMING("32B memcpy");
memcpy(h, h1, 32);
}
{
RTIMING("32B memcpy8");
memcpy8(h, h, 32);
}
}
for(int i = 0; i < 1000000; i++) { for(int i = 0; i < 1000000; i++) {
{ {
RTIMING("32KB memset"); RTIMING("32KB memset");
@ -26,5 +76,27 @@ CONSOLE_APP_MAIN
while(s < e) while(s < e)
*s++ = 31515927; *s++ = 31515927;
} }
{
RTIMING("32KB memcpy");
memcpy(h, h1, 1024 * 32);
}
{
RTIMING("32KB memcpy8");
memcpy8(h, h, 1024 * 32);
}
}
for(int i = 0; i < 1000; i++) {
{
RTIMING("32MB memset32");
memset32(h, 31525874, 1024*1024 * 32 / 4);
}
{
RTIMING("32MB loop 32");
dword *s = (dword *)~h;
dword *e = s + 1024*1024 * 32 / 4;
while(s < e)
*s++ = 31515927;
}
} }
} }

View file

@ -18,8 +18,9 @@ void memset8__(void *p, i16x8 data_, size_t len)
t = (byte *)(((uintptr_t)t | 15) + 1); t = (byte *)(((uintptr_t)t | 15) + 1);
len = e - t; len = e - t;
e -= 128; e -= 128;
#if 0 // streaming does not seem to be benefical anymore
#ifdef CPU_SSE2 #ifdef CPU_SSE2
if(len >= 1024*1024) { // for really huge data, bypass the cache if(len >= 1024*1024 && 0) { // for really huge data, bypass the cache
auto Set4S = [&](int at) { data.Stream(t + at); }; auto Set4S = [&](int at) { data.Stream(t + at); };
while(len >= 64) { while(len >= 64) {
Set4S(0*16); Set4S(1*16); Set4S(2*16); Set4S(3*16); Set4S(0*16); Set4S(1*16); Set4S(2*16); Set4S(3*16);
@ -29,6 +30,7 @@ void memset8__(void *p, i16x8 data_, size_t len)
_mm_sfence(); _mm_sfence();
e = t - 1; e = t - 1;
} }
#endif
#endif #endif
while(t <= e) { while(t <= e) {
Set4(0*16); Set4(1*16); Set4(2*16); Set4(3*16); Set4(0*16); Set4(1*16); Set4(2*16); Set4(3*16);
@ -55,7 +57,7 @@ void memcpy8__(void *p, const void *q, size_t len)
byte *t = (byte *)p; byte *t = (byte *)p;
const byte *s = (const byte *)q; const byte *s = (const byte *)q;
if(len > 4*1024*1024) { // for really huge data, call memcpy to bypass the cache if(len > 4*1024*1024) { // for really huge data, call memcpy to use possible CPU magic
memcpy(t, s, len); memcpy(t, s, len);
return; return;
} }