mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-15 14:16:07 -06:00
262 lines
8.6 KiB
C
262 lines
8.6 KiB
C
#ifndef CPU_LE
|
|
#error Only little endian CPUs supported
|
|
#endif
|
|
|
|
#if defined(CPU_X86) && defined(COMPILER_MSC)
|
|
|
|
#ifdef COMPILER_GCC
|
|
#ifdef CPU_64
|
|
inline word SwapEndian16(word v) { __asm__("xchgb %b0,%h0" : "=Q" (v) : "0" (v)); return v; }
|
|
inline int16 SwapEndian16(int16 v) { __asm__("xchgb %b0,%h0" : "=Q" (v) : "0" (v)); return v; }
|
|
#else
|
|
inline word SwapEndian16(word v) { __asm__("xchgb %b0,%h0" : "=q" (v) : "0" (v)); return v; }
|
|
inline int16 SwapEndian16(int16 v) { __asm__("xchgb %b0,%h0" : "=q" (v) : "0" (v)); return v; }
|
|
#endif
|
|
inline dword SwapEndian32(dword v) { __asm__("bswap %0" : "=r" (v) : "0" (v)); return v; }
|
|
inline int SwapEndian32(int v) { __asm__("bswap %0" : "=r" (v) : "0" (v)); return v; }
|
|
#endif
|
|
|
|
#ifdef COMPILER_MSC
|
|
#pragma intrinsic (_byteswap_ushort, _byteswap_ulong, _byteswap_uint64, strlen)
|
|
|
|
inline word SwapEndian16(word v) { return _byteswap_ushort(v); }
|
|
inline int16 SwapEndian16(int16 v) { return _byteswap_ushort(v); }
|
|
inline dword SwapEndian32(dword v) { return _byteswap_ulong(v); }
|
|
inline int SwapEndian32(int v) { return _byteswap_ulong(v); }
|
|
#endif
|
|
|
|
inline void EndianSwap(word& v) { v = SwapEndian16(v); }
|
|
inline void EndianSwap(int16& v) { v = SwapEndian16(v); }
|
|
inline void EndianSwap(dword& v) { v = SwapEndian32(v); }
|
|
inline void EndianSwap(int& v) { v = SwapEndian32(v); }
|
|
|
|
#else
|
|
|
|
#ifdef COMPILER_GCC
|
|
|
|
inline dword SwapEndian32(dword v) { return __builtin_bswap32(v); }
|
|
inline int SwapEndian32(int v) { return __builtin_bswap32(v); }
|
|
|
|
inline word SwapEndian16(word v) { return SwapEndian32(v) >> 16; } // GCC bug workaround
|
|
inline int16 SwapEndian16(int16 v) { return SwapEndian32(v) >> 16; }
|
|
|
|
inline void EndianSwap(word& v) { v = SwapEndian16(v); }
|
|
inline void EndianSwap(int16& v) { v = SwapEndian16(v); }
|
|
inline void EndianSwap(dword& v) { v = SwapEndian32(v); }
|
|
inline void EndianSwap(int& v) { v = SwapEndian32(v); }
|
|
|
|
#else
|
|
|
|
inline void EndianSwap(word& v) { byte *x = (byte *)(&v); Swap(x[0], x[1]); }
|
|
inline void EndianSwap(int16& v) { EndianSwap(*(word *)&v); }
|
|
inline void EndianSwap(dword& v) { byte *x = (byte *)&v; Swap(x[0], x[3]); Swap(x[1], x[2]); }
|
|
inline void EndianSwap(int& v) { EndianSwap(*(dword *)&v); }
|
|
inline word SwapEndian16(word v) { EndianSwap(v); return v; }
|
|
inline int16 SwapEndian16(int16 v) { EndianSwap(v); return v; }
|
|
inline dword SwapEndian32(dword v) { EndianSwap(v); return v; }
|
|
inline int SwapEndian32(int v) { EndianSwap(v); return v; }
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#if defined(CPU_AMD64) && (defined(COMPILER_GCC) || defined(COMPILER_MSC))
|
|
#ifdef COMPILER_GCC
|
|
inline uint64 SwapEndian64(uint64 v) { __asm__("bswap %0" : "=r" (v) : "0" (v)); return v; }
|
|
inline int64 SwapEndian64(int64 v) { __asm__("bswap %0" : "=r" (v) : "0" (v)); return v; }
|
|
#endif
|
|
#ifdef COMPILER_MSC
|
|
inline uint64 SwapEndian64(uint64 v) { return _byteswap_uint64(v); }
|
|
inline int64 SwapEndian64(int64 v) { return _byteswap_uint64(v); }
|
|
#endif
|
|
|
|
inline void EndianSwap(int64& v) { v = SwapEndian64(v); }
|
|
inline void EndianSwap(uint64& v) { v = SwapEndian64(v); }
|
|
|
|
#else
|
|
|
|
#ifdef COMPILER_GCC
|
|
|
|
inline uint64 SwapEndian64(uint64 v) { return __builtin_bswap64(v); }
|
|
inline int64 SwapEndian64(int64 v) { return __builtin_bswap64(v); }
|
|
|
|
inline void EndianSwap(int64& v) { v = SwapEndian64(v); }
|
|
inline void EndianSwap(uint64& v) { v = SwapEndian64(v); }
|
|
|
|
#else
|
|
inline void EndianSwap(int64& v) { byte *x = (byte *)&v; Swap(x[0], x[7]); Swap(x[1], x[6]); Swap(x[2], x[5]); Swap(x[3], x[4]); }
|
|
inline void EndianSwap(uint64& v) { EndianSwap(*(int64 *)&v); }
|
|
inline int64 SwapEndian64(int64 v) { EndianSwap(v); return v; }
|
|
inline uint64 SwapEndian64(uint64 v) { EndianSwap(v); return v; }
|
|
#endif
|
|
#endif
|
|
|
|
inline word SwapEndian16(int w) { return SwapEndian16((word)w); }
|
|
inline word SwapEndian16(dword w) { return SwapEndian16((word)w); }
|
|
|
|
void EndianSwap(word *v, size_t count);
|
|
void EndianSwap(int16 *v, size_t count);
|
|
void EndianSwap(dword *v, size_t count);
|
|
void EndianSwap(int *v, size_t count);
|
|
void EndianSwap(int64 *v, size_t count);
|
|
void EndianSwap(uint64 *v, size_t count);
|
|
|
|
// unligned access - memcpy converts to simple load/store with normal compilers
|
|
|
|
inline int Peek16(const void *ptr) { word x; memcpy(&x, ptr, 2); return x; }
|
|
inline int Peek32(const void *ptr) { dword x; memcpy(&x, ptr, 4); return x; }
|
|
inline int64 Peek64(const void *ptr) { uint64 x; memcpy(&x, ptr, 8); return x; }
|
|
|
|
inline void Poke16(void *ptr, int val) { memcpy(ptr, &val, 2); }
|
|
inline void Poke32(void *ptr, int val) { memcpy(ptr, &val, 4); }
|
|
inline void Poke64(void *ptr, int64 val) { memcpy(ptr, &val, 8); }
|
|
|
|
inline int Peek16le(const void *ptr) { return Peek16(ptr); }
|
|
inline int Peek32le(const void *ptr) { return Peek32(ptr); }
|
|
inline int64 Peek64le(const void *ptr) { return Peek64(ptr); }
|
|
|
|
inline void Poke16le(void *ptr, int val) { Poke16(ptr, val); }
|
|
inline void Poke32le(void *ptr, int val) { Poke32(ptr, val); }
|
|
inline void Poke64le(void *ptr, int64 val) { Poke64(ptr, val); }
|
|
|
|
inline int Peek16be(const void *ptr) { return SwapEndian16(Peek16(ptr)); }
|
|
inline int Peek32be(const void *ptr) { return SwapEndian32(Peek32(ptr)); }
|
|
inline int64 Peek64be(const void *ptr) { return SwapEndian64(Peek64(ptr)); }
|
|
|
|
inline void Poke16be(void *ptr, int val) { Poke16(ptr, SwapEndian16(val)); }
|
|
inline void Poke32be(void *ptr, int val) { Poke32(ptr, SwapEndian32(val)); }
|
|
inline void Poke64be(void *ptr, int64 val) { Poke64(ptr, SwapEndian64(val)); }
|
|
|
|
#define MAKE2B(b0, b1) MAKEWORD(b0, b1)
|
|
#define MAKE4B(b0, b1, b2, b3) MAKELONG(MAKEWORD(b0, b1), MAKEWORD(b2, b3))
|
|
#define MAKE8B(b0, b1, b2, b3, b4, b5, b6, b7) MAKEQWORD(MAKE4B(b0, b1, b2, b3), MAKE4B(b4, b5, b6, b7))
|
|
|
|
#ifdef CPU_64
|
|
|
|
#define HASH64
|
|
|
|
#define HASH_CONST1 I64(0xf7c21089bee7c0a5)
|
|
#define HASH_CONST2 I64(0xc85abc8da7534a4d)
|
|
#define HASH_CONST3 I64(0x8642b0fe3e86671b)
|
|
|
|
typedef qword hash_t;
|
|
|
|
inline dword FoldHash(qword h)
|
|
{
|
|
return (dword)SwapEndian64(HASH_CONST3 * h);
|
|
}
|
|
|
|
#else
|
|
|
|
#define HASH_CONST1 0xbee7c0a5
|
|
#define HASH_CONST2 0xa7534a4d
|
|
#define HASH_CONST3 0x8e86671b
|
|
|
|
typedef dword hash_t;
|
|
|
|
inline dword FoldHash(dword h)
|
|
{
|
|
return SwapEndian32(HASH_CONST3 * h);
|
|
}
|
|
|
|
#endif
|
|
|
|
force_inline
|
|
int SignificantBits(dword x)
|
|
{ // basically log2(x) + 1 except that for 0 this is 0, number of significant bits of x
|
|
#ifdef COMPILER_MSC
|
|
DWORD index;
|
|
return _BitScanReverse(&index, x) ? index + 1 : 0;
|
|
#else
|
|
return x ? 32 - __builtin_clz(x) : 0;
|
|
#endif
|
|
}
|
|
|
|
force_inline
|
|
int SignificantBits64(uint64 x)
|
|
{ // basically log2(x) + 1 except that for 0 this is 0, number of significant bits of x
|
|
#ifdef COMPILER_MSC
|
|
#ifdef CPU_64
|
|
DWORD index;
|
|
return _BitScanReverse64(&index, x) ? index + 1 : 0;
|
|
#else
|
|
if(x & 0xffffffff00000000)
|
|
return SignificantBits(HIDWORD(x)) + 32;
|
|
else
|
|
return SignificantBits((DWORD)x);
|
|
#endif
|
|
#else
|
|
return x ? 64 - __builtin_clzll(x) : 0;
|
|
#endif
|
|
}
|
|
|
|
inline bool FitsInInt64(double x)
|
|
{
|
|
return x >= -9223372036854775808.0 && x < 9223372036854775808.0;
|
|
}
|
|
|
|
#if defined(__SIZEOF_INT128__) && (__GNUC__ > 5 || __clang_major__ >= 5)
|
|
|
|
#ifdef CPU_X86
|
|
|
|
inline
|
|
byte addc64(uint64& result, const uint64& value, byte carry) {
|
|
return _addcarry_u64(carry, result, value, &result);
|
|
}
|
|
|
|
#else
|
|
|
|
force_inline
|
|
byte addc64(uint64& r, uint64 a, byte carry)
|
|
{
|
|
r += a + carry;
|
|
return carry ? r <= a : r < a;
|
|
}
|
|
|
|
#endif
|
|
|
|
inline
|
|
uint64 mul64(uint64 a, uint64 b, uint64& hi)
|
|
{
|
|
unsigned __int128 prod = (unsigned __int128)a * b;
|
|
hi = prod >> 64;
|
|
return prod;
|
|
}
|
|
|
|
#elif defined(COMPILER_MSC) && defined(CPU_64)
|
|
|
|
inline
|
|
uint64 mul64(uint64 a, uint64 b, uint64& hi)
|
|
{
|
|
return _umul128(a, b, &hi);
|
|
}
|
|
|
|
inline
|
|
byte addc64(uint64& result, const uint64& value, byte carry) {
|
|
return _addcarry_u64(carry, result, value, &result);
|
|
}
|
|
|
|
#else
|
|
|
|
force_inline
|
|
byte addc64(uint64& r, uint64 a, byte carry)
|
|
{
|
|
r += a + carry;
|
|
return carry ? r <= a : r < a;
|
|
}
|
|
|
|
force_inline
|
|
uint64 mul64(uint64 a, uint64 b, uint64& hi)
|
|
{
|
|
uint64 lo_lo = (a & 0xFFFFFFFF) * (b & 0xFFFFFFFF);
|
|
uint64 hi_lo = (a >> 32) * (b & 0xFFFFFFFF);
|
|
uint64 lo_hi = (a & 0xFFFFFFFF) * (b >> 32);
|
|
uint64 hi_hi = (a >> 32) * (b >> 32);
|
|
|
|
uint64 cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
|
|
hi = (hi_lo >> 32) + (cross >> 32) + hi_hi;
|
|
|
|
return (cross << 32) | (lo_lo & 0xFFFFFFFF);
|
|
}
|
|
|
|
#endif
|