Initial Linux Aarch64 support

* Update asmjit dependency (aarch64 branch)
* Disable USE_DISCORD_RPC by default
* Dump some JIT objects in rpcs3 cache dir
* Add SIGILL handler for all platforms
* Fix resetting zeroing denormals in thread pool
* Refactor most v128:: utils into global gv_** functions
* Refactor PPU interpreter (incomplete), remove "precise"
* - Instruction specializations with multiple accuracy flags
* - Adjust calling convention for speed
* - Removed precise/fast setting, replaced with static
* - Started refactoring interpreters for building at runtime JIT
*   (I got tired of poor compiler optimizations)
* - Expose some accuracy settings (SAT, NJ, VNAN, FPCC)
* - Add exec_bytes PPU thread variable (akin to cycle count)
* PPU LLVM: fix VCTUXS+VCTSXS instruction NaN results
* SPU interpreter: remove "precise" for now (extremely non-portable)
* - As with PPU, settings changed to static/dynamic for interpreters.
* - Precise options will be implemented later
* Fix termination after fatal error dialog
This commit is contained in:
Nekotekina 2021-12-30 19:39:18 +03:00
parent d6aa834b5f
commit 580bd2b25e
89 changed files with 20360 additions and 5612 deletions

View file

@ -19,15 +19,14 @@
#endif
#include "util/asm.hpp"
#include "util/fence.hpp"
#ifdef _MSC_VER
extern "C"
{
u64 _xgetbv(u32);
}
#ifdef _M_X64
extern "C" u64 _xgetbv(u32);
#endif
inline std::array<u32, 4> utils::get_cpuid(u32 func, u32 subfunc)
#if defined(ARCH_X64)
static inline std::array<u32, 4> get_cpuid(u32 func, u32 subfunc)
{
int regs[4];
#ifdef _MSC_VER
@ -38,7 +37,7 @@ inline std::array<u32, 4> utils::get_cpuid(u32 func, u32 subfunc)
return {0u+regs[0], 0u+regs[1], 0u+regs[2], 0u+regs[3]};
}
inline u64 utils::get_xgetbv(u32 xcr)
static inline u64 get_xgetbv(u32 xcr)
{
#ifdef _MSC_VER
return _xgetbv(xcr);
@ -48,6 +47,7 @@ inline u64 utils::get_xgetbv(u32 xcr)
return eax | (u64(edx) << 32);
#endif
}
#endif
#ifdef __APPLE__
// sysinfo_darwin.mm
@ -61,113 +61,192 @@ namespace Darwin_Version
bool utils::has_ssse3()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x200;
return g_value;
#else
return false;
#endif
}
bool utils::has_sse41()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x80000;
return g_value;
#else
return false;
#endif
}
bool utils::has_avx()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x10000000 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0x6) == 0x6;
return g_value;
#else
return false;
#endif
}
bool utils::has_avx2()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && get_cpuid(7, 0)[1] & 0x20 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0x6) == 0x6;
return g_value;
#else
return false;
#endif
}
bool utils::has_rtm()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x800) == 0x800;
return g_value;
#elif defined(ARCH_ARM64)
return false;
#endif
}
bool utils::has_tsx_force_abort()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[3] & 0x2000) == 0x2000;
return g_value;
#else
return false;
#endif
}
bool utils::has_rtm_always_abort()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[3] & 0x800) == 0x800;
return g_value;
#else
return false;
#endif
}
bool utils::has_mpx()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x4000) == 0x4000;
return g_value;
#else
return false;
#endif
}
bool utils::has_avx512()
{
#if defined(ARCH_X64)
// Check AVX512F, AVX512CD, AVX512DQ, AVX512BW, AVX512VL extensions (Skylake-X level support)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0xd0030000) == 0xd0030000 && (get_cpuid(1, 0)[2] & 0x0C000000) == 0x0C000000 && (get_xgetbv(0) & 0xe6) == 0xe6;
return g_value;
#else
return false;
#endif
}
bool utils::has_avx512_icl()
{
#if defined(ARCH_X64)
// Check AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512VPOPCNTDQ, AVX512BITALG, AVX512VNNI, AVX512VPCLMULQDQ, AVX512GFNI, AVX512VAES (Icelake-client level support)
static const bool g_value = has_avx512() && (get_cpuid(7, 0)[1] & 0x00200000) == 0x00200000 && (get_cpuid(7, 0)[2] & 0x00005f42) == 0x00005f42;
return g_value;
#else
return false;
#endif
}
bool utils::has_avx512_vnni()
{
#if defined(ARCH_X64)
// Check AVX512VNNI
static const bool g_value = has_avx512() && get_cpuid(7, 0)[2] & 0x00000800;
return g_value;
#else
return false;
#endif
}
bool utils::has_xop()
{
#if defined(ARCH_X64)
static const bool g_value = has_avx() && get_cpuid(0x80000001, 0)[2] & 0x800;
return g_value;
#else
return false;
#endif
}
bool utils::has_clwb()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x1000000) == 0x1000000;
return g_value;
#else
return false;
#endif
}
bool utils::has_invariant_tsc()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(0x80000007, 0)[3] & 0x100) == 0x100;
return g_value;
#elif defined(ARCH_ARM64)
return true;
#endif
}
bool utils::has_fma3()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x1 && get_cpuid(1, 0)[2] & 0x1000;
return g_value;
#elif defined(ARCH_ARM64)
return true;
#endif
}
bool utils::has_fma4()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(0x80000001, 0)[2] & 0x10000) == 0x10000;
return g_value;
#else
return false;
#endif
}
bool utils::has_erms()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[1] & 0x200) == 0x200;
return g_value;
#else
return false;
#endif
}
bool utils::has_fsrm()
{
#if defined(ARCH_X64)
static const bool g_value = get_cpuid(0, 0)[0] >= 0x7 && (get_cpuid(7, 0)[3] & 0x10) == 0x10;
return g_value;
#else
return false;
#endif
}
u32 utils::get_rep_movsb_threshold()
{
static const u32 g_value = []()
{
u32 thresh_value = 0xFFFFFFFF;
u32 thresh_value = umax;
if (has_fsrm())
{
thresh_value = 2047;
@ -187,6 +266,7 @@ std::string utils::get_cpu_brand()
{
std::string brand;
#if defined(ARCH_X64)
if (get_cpuid(0x80000000, 0)[0] >= 0x80000004)
{
for (u32 i = 0; i < 3; i++)
@ -198,6 +278,9 @@ std::string utils::get_cpu_brand()
{
brand = "Unknown CPU";
}
#else
brand = "Unidentified CPU";
#endif
brand.erase(brand.find_last_not_of('\0') + 1);
brand.erase(brand.find_last_not_of(' ') + 1);
@ -396,19 +479,6 @@ static constexpr ullong round_tsc(ullong val)
return utils::rounded_div(val, 1'000'000) * 1'000'000;
}
#ifdef _MSC_VER
extern "C" void _mm_lfence();
#endif
static inline void lfence()
{
#ifdef _MSC_VER
_mm_lfence();
#else
__builtin_ia32_lfence();
#endif
}
ullong utils::get_tsc_freq()
{
static const ullong cal_tsc = []() -> ullong
@ -449,17 +519,17 @@ ullong utils::get_tsc_freq()
{
#ifdef _WIN32
Sleep(1);
error_data[i] = (lfence(), utils::get_tsc());
error_data[i] = (utils::lfence(), utils::get_tsc());
LARGE_INTEGER ctr;
QueryPerformanceCounter(&ctr);
rdtsc_data[i] = (lfence(), utils::get_tsc());
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
timer_data[i] = ctr.QuadPart;
#else
usleep(200);
error_data[i] = (lfence(), utils::get_tsc());
error_data[i] = (utils::lfence(), utils::get_tsc());
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
rdtsc_data[i] = (lfence(), utils::get_tsc());
rdtsc_data[i] = (utils::lfence(), utils::get_tsc());
timer_data[i] = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
#endif
}
@ -511,6 +581,7 @@ u32 utils::get_thread_count()
u32 utils::get_cpu_family()
{
#if defined(ARCH_X64)
static const u32 g_value = []()
{
const u32 reg_value = get_cpuid(0x00000001, 0)[0]; // Processor feature info
@ -528,10 +599,14 @@ u32 utils::get_cpu_family()
}();
return g_value;
#elif defined(ARCH_ARM64)
return 0;
#endif
}
u32 utils::get_cpu_model()
{
#if defined(ARCH_X64)
static const u32 g_value = []()
{
const u32 reg_value = get_cpuid(0x00000001, 0)[0]; // Processor feature info
@ -550,16 +625,19 @@ u32 utils::get_cpu_model()
}();
return g_value;
#elif defined(ARCH_ARM64)
return 0;
#endif
}
namespace utils
{
extern const u64 main_tid = []() -> u64
{
#ifdef _WIN32
#ifdef _WIN32
return GetCurrentThreadId();
#else
#else
return reinterpret_cast<u64>(pthread_self());
#endif
#endif
}();
}