mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-15 06:05:58 -06:00
.benchmarks
This commit is contained in:
parent
867efb4927
commit
e385d0bb48
4 changed files with 86 additions and 4 deletions
|
|
@ -2,7 +2,8 @@ uses
|
|||
CtrlLib;
|
||||
|
||||
file
|
||||
main.cpp;
|
||||
main.cpp,
|
||||
info.txt;
|
||||
|
||||
mainconfig
|
||||
"" = "GUI";
|
||||
|
|
|
|||
7
benchmarks/PainterClear/info.txt
Normal file
7
benchmarks/PainterClear/info.txt
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
TIMING Blend : 435.99 ms - 435.99 us (436.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
|
||||
TIMING Stroke : 236.99 ms - 236.99 us (237.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
|
||||
TIMING Fill : 471.99 ms - 471.99 us (472.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
|
||||
TIMING Rect : 596.99 ms - 596.99 us (597.00 ms / 1000 ), min: 0.00 ns, max: 2.00 ms, nesting: 0 - 1000
|
||||
TIMING Clear 2 : 704.99 ms - 704.99 us (705.00 ms / 1000 ), min: 0.00 ns, max: 1.00 ms, nesting: 0 - 1000
|
||||
TIMING Clear : 604.99 ms - 604.99 us (605.00 ms / 1000 ), min: 0.00 ns, max: 2.00 ms, nesting: 0 - 1000
|
||||
|
||||
|
|
@ -2,10 +2,60 @@
|
|||
|
||||
using namespace Upp;
|
||||
|
||||
Buffer<byte> h(1024*1024*32);
|
||||
const byte *h1 = h;
|
||||
|
||||
CONSOLE_APP_MAIN
|
||||
{
|
||||
Buffer<byte> h(1024*1024*30);
|
||||
for(int i = 0; i < 1000; i++) {
|
||||
{
|
||||
RTIMING("32MB memset");
|
||||
memset(h, 0, 1024*1024 * 32);
|
||||
}
|
||||
{
|
||||
RTIMING("32MB memset8");
|
||||
memset8(h, 0, 1024*1024 * 32);
|
||||
}
|
||||
{
|
||||
RTIMING("32MB memcpy");
|
||||
memcpy(h, h1, 1024*1024 * 32);
|
||||
}
|
||||
{
|
||||
RTIMING("32MB memcpy8");
|
||||
memcpy8(h, h, 1024*1024 * 32);
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i < 100000000; i++) {
|
||||
{
|
||||
RTIMING("32B memset");
|
||||
memset(h, 0, 32);
|
||||
}
|
||||
{
|
||||
RTIMING("32B memset8");
|
||||
memset8(h, 0, 32);
|
||||
}
|
||||
{
|
||||
RTIMING("32B memset32");
|
||||
memset32(h, 31525874, 32 / 4);
|
||||
}
|
||||
{
|
||||
RTIMING("32B loop 32");
|
||||
dword *s = (dword *)~h;
|
||||
dword *e = s + 32 / 4;
|
||||
while(s < e)
|
||||
*s++ = 31515927;
|
||||
}
|
||||
{
|
||||
RTIMING("32B memcpy");
|
||||
memcpy(h, h1, 32);
|
||||
}
|
||||
{
|
||||
RTIMING("32B memcpy8");
|
||||
memcpy8(h, h, 32);
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i < 1000000; i++) {
|
||||
{
|
||||
RTIMING("32KB memset");
|
||||
|
|
@ -26,5 +76,27 @@ CONSOLE_APP_MAIN
|
|||
while(s < e)
|
||||
*s++ = 31515927;
|
||||
}
|
||||
{
|
||||
RTIMING("32KB memcpy");
|
||||
memcpy(h, h1, 1024 * 32);
|
||||
}
|
||||
{
|
||||
RTIMING("32KB memcpy8");
|
||||
memcpy8(h, h, 1024 * 32);
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i < 1000; i++) {
|
||||
{
|
||||
RTIMING("32MB memset32");
|
||||
memset32(h, 31525874, 1024*1024 * 32 / 4);
|
||||
}
|
||||
{
|
||||
RTIMING("32MB loop 32");
|
||||
dword *s = (dword *)~h;
|
||||
dword *e = s + 1024*1024 * 32 / 4;
|
||||
while(s < e)
|
||||
*s++ = 31515927;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,8 +18,9 @@ void memset8__(void *p, i16x8 data_, size_t len)
|
|||
t = (byte *)(((uintptr_t)t | 15) + 1);
|
||||
len = e - t;
|
||||
e -= 128;
|
||||
#if 0 // streaming does not seem to be benefical anymore
|
||||
#ifdef CPU_SSE2
|
||||
if(len >= 1024*1024) { // for really huge data, bypass the cache
|
||||
if(len >= 1024*1024 && 0) { // for really huge data, bypass the cache
|
||||
auto Set4S = [&](int at) { data.Stream(t + at); };
|
||||
while(len >= 64) {
|
||||
Set4S(0*16); Set4S(1*16); Set4S(2*16); Set4S(3*16);
|
||||
|
|
@ -29,6 +30,7 @@ void memset8__(void *p, i16x8 data_, size_t len)
|
|||
_mm_sfence();
|
||||
e = t - 1;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
while(t <= e) {
|
||||
Set4(0*16); Set4(1*16); Set4(2*16); Set4(3*16);
|
||||
|
|
@ -55,7 +57,7 @@ void memcpy8__(void *p, const void *q, size_t len)
|
|||
byte *t = (byte *)p;
|
||||
const byte *s = (const byte *)q;
|
||||
|
||||
if(len > 4*1024*1024) { // for really huge data, call memcpy to bypass the cache
|
||||
if(len > 4*1024*1024) { // for really huge data, call memcpy to use possible CPU magic
|
||||
memcpy(t, s, len);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue