ultimatepp/uppsrc/Core/Cpu.cpp
cxl 8ec50747cf Core, Draw, Painter: memsetd and RGBAFill optimized
git-svn-id: svn://ultimatepp.org/upp/trunk@14486 f0d560ea-af0d-0410-9eb7-867de7ffcac7
2020-05-20 14:39:18 +00:00

178 lines
3.3 KiB
C++

#include "Core.h"
namespace Upp {
#ifdef CPU_X86
#ifdef COMPILER_MSC
#include <intrin.h>
#else
#include <cpuid.h>
#endif
static bool sHasMMX;
static bool sHasSSE;
static bool sHasSSE2;
static bool sHasSSE3;
static bool sHypervisor;
static void sCheckCPU()
{
static bool done;
if(done) return;
done = true;
ONCELOCK {
unsigned int eax, ebx, ecx, edx;
#ifdef COMPILER_MSC
int cpuInfo[4];
__cpuid(cpuInfo, 1);
eax = cpuInfo[0];
ebx = cpuInfo[1];
ecx = cpuInfo[2];
edx = cpuInfo[3];
#else
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
#endif
// https://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits
sHasMMX = edx & (1 << 23);
sHasSSE = edx & (1 << 25);
sHasSSE2 = edx & (1 << 26);
sHasSSE3 = ecx & 1;
sHypervisor = ecx & (1 << 31);
}
}
INITBLOCK {
// sCheckCPU();
}
bool CpuMMX() { sCheckCPU(); return sHasMMX; }
bool CpuSSE() { sCheckCPU(); return sHasSSE; }
bool CpuSSE2() { sCheckCPU(); return sHasSSE2; }
bool CpuSSE3() { sCheckCPU(); return sHasSSE3; }
bool CpuHypervisor() { sCheckCPU(); return sHypervisor; }
#ifdef PLATFORM_POSIX
#ifdef PLATFORM_BSD
#include <sys/param.h>
#include <sys/sysctl.h>
#else
#include <sys/sysinfo.h>
#endif
#endif
int CPU_Cores()
{
static int n;
ONCELOCK {
#ifdef PLATFORM_WIN32
#ifdef CPU_64
uint64 pa, sa;
GetProcessAffinityMask(GetCurrentProcess(), &pa, &sa);
for(int i = 0; i < 64; i++)
n += !!(sa & ((uint64)1 << i));
#else
DWORD pa, sa;
GetProcessAffinityMask(GetCurrentProcess(), &pa, &sa);
for(int i = 0; i < 32; i++)
n += !!(sa & (1 << i));
#endif
#elif defined(PLATFORM_POSIX)
#ifdef PLATFORM_BSD
int mib[2];
size_t len = sizeof(n);
mib[0] = CTL_HW;
mib[1] = HW_NCPU;
sysctl(mib, 2, &n, &len, NULL, 0);
n = minmax(n, 1, 256);
#elif defined(PLATFORM_SOLARIS)
n = minmax((int)sysconf(_SC_NPROCESSORS_ONLN), 1, 256);
#else
n = minmax(get_nprocs(), 1, 256);
#endif
#else
n = 1;
#endif
}
return n;
}
#else
#ifdef PLATFORM_LINUX
#ifdef PLATFORM_ANDROID
#include <cpu-features.h>
int CPU_Cores()
{
return android_getCpuCount();
}
#else
#include <sys/sysinfo.h>
int CPU_Cores()
{
return minmax(get_nprocs(), 1, 256);
}
#endif
#else
int CPU_Cores()
{
return 1;
}
#endif
#endif
#ifdef PLATFORM_WIN32
bool IsDecentMachine()
{
if(!IsWin2K())
return false;
MEMORYSTATUS m;
GlobalMemoryStatus(&m);
return m.dwTotalPhys > 500 * 1024 * 1024;
}
#else
bool IsDecentMachine()
{
return true;
}
#endif
#define ENDIAN_SWAP { while(count--) { EndianSwap(*v++); } }
void EndianSwap(word *v, size_t count) ENDIAN_SWAP
void EndianSwap(int16 *v, size_t count) ENDIAN_SWAP
void EndianSwap(dword *v, size_t count) ENDIAN_SWAP
void EndianSwap(int *v, size_t count) ENDIAN_SWAP
void EndianSwap(int64 *v, size_t count) ENDIAN_SWAP
void EndianSwap(uint64 *v, size_t count) ENDIAN_SWAP
#ifdef CPU_X86
void huge_memsetd(void *p, dword c, int len)
{ // bypasses the cache, good for >4MB
dword *t = (dword *)p;
if(((uintptr_t)t & 3) == 0 && len > 64) {
__m128i val4 = _mm_set1_epi32(c);
auto Set4S = [&](int at) { _mm_stream_si128((__m128i *)(t + at), val4); };
while((uintptr_t)t & 15) { // align to 16 bytes for SSE
*t++ = c;
len--;
}
while(len >= 16) {
Set4S(0);
Set4S(4);
Set4S(8);
Set4S(12);
t += 16;
len -= 16;
}
_mm_sfence();
}
while(len--)
*t++ = c;
}
#endif
}