PPU: remove SSSE3 dependency

This commit is contained in:
Nekotekina 2017-12-20 00:01:03 +03:00
parent 1ae8850b3f
commit 83da7f9b63
6 changed files with 246 additions and 60 deletions

View file

@ -105,7 +105,11 @@ if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--allow-multiple-definition") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--allow-multiple-definition")
endif() endif()
add_compile_options(-msse -msse2 -mcx16 -mssse3 -mrtm) add_compile_options(-msse -msse2 -mcx16 -mrtm)
if(NOT TRAVIS)
add_compile_options(-march=native)
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# This fixes 'some' of the st11range issues. See issue #2516 # This fixes 'some' of the st11range issues. See issue #2516

View file

@ -5,6 +5,10 @@
#include <cmath> #include <cmath>
#if !defined(_MSC_VER) && !defined(__SSSE3__)
#define _mm_shuffle_epi8
#endif
inline u64 dup32(u32 x) { return x | static_cast<u64>(x) << 32; } inline u64 dup32(u32 x) { return x | static_cast<u64>(x) << 32; }
// Write values to CR field // Write values to CR field
@ -83,6 +87,20 @@ extern __m128 sse_log2_ps(__m128 A)
return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8)); return _mm_add_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(_mm_mul_ps(x5, x6), x7), x4), _c), _mm_add_ps(_mm_mul_ps(x4, _c), x8));
} }
extern __m128i sse_pshufb(__m128i data, __m128i index)
{
v128 m = v128::fromV(_mm_and_si128(index, _mm_set1_epi8(0xf)));
v128 a = v128::fromV(data);
v128 r;
for (int i = 0; i < 16; i++)
{
r._u8[i] = a._u8[m._u8[i]];
}
return _mm_and_si128(r.vi, _mm_cmpgt_epi8(index, _mm_set1_epi8(-1)));
}
extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C) extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C)
{ {
const auto index = _mm_andnot_si128(C, _mm_set1_epi8(0x1f)); const auto index = _mm_andnot_si128(C, _mm_set1_epi8(0x1f));
@ -92,6 +110,20 @@ extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C)
return _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb)); return _mm_or_si128(_mm_and_si128(mask, sa), _mm_andnot_si128(mask, sb));
} }
extern __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C)
{
__m128i ab[2]{B, A};
v128 index = v128::fromV(_mm_andnot_si128(C, _mm_set1_epi8(0x1f)));
v128 res;
for (int i = 0; i < 16; i++)
{
res._u8[i] = ((u8*)+ab)[index._u8[i]];
}
return res.vi;
}
extern __m128i sse_altivec_lvsl(u64 addr) extern __m128i sse_altivec_lvsl(u64 addr)
{ {
alignas(16) static const u8 lvsl_values[0x10][0x10] = alignas(16) static const u8 lvsl_values[0x10][0x10] =
@ -202,6 +234,26 @@ extern void sse_cellbe_stvrx(u64 addr, __m128i a)
_mm_maskmoveu_si128(_mm_shuffle_epi8(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf)); _mm_maskmoveu_si128(_mm_shuffle_epi8(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf));
} }
extern __m128i sse_cellbe_lvlx_v0(u64 addr)
{
return sse_pshufb(_mm_load_si128((__m128i*)vm::base(addr & ~0xf)), lvlx_masks[addr & 0xf]);
}
extern void sse_cellbe_stvlx_v0(u64 addr, __m128i a)
{
_mm_maskmoveu_si128(sse_pshufb(a, lvlx_masks[addr & 0xf]), lvrx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf));
}
extern __m128i sse_cellbe_lvrx_v0(u64 addr)
{
return sse_pshufb(_mm_load_si128((__m128i*)vm::base(addr & ~0xf)), lvrx_masks[addr & 0xf]);
}
extern void sse_cellbe_stvrx_v0(u64 addr, __m128i a)
{
_mm_maskmoveu_si128(sse_pshufb(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], (char*)vm::base(addr & ~0xf));
}
template<typename T> template<typename T>
struct add_flags_result_t struct add_flags_result_t
{ {
@ -1327,7 +1379,13 @@ bool ppu_interpreter::VOR(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::VPERM(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::VPERM(ppu_thread& ppu, ppu_opcode_t op)
{
ppu.vr[op.vd].vi = sse_altivec_vperm_v0(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
return true;
}
bool ppu_interpreter_fast::VPERM(ppu_thread& ppu, ppu_opcode_t op)
{ {
ppu.vr[op.vd].vi = sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi); ppu.vr[op.vd].vi = sse_altivec_vperm(ppu.vr[op.va].vi, ppu.vr[op.vb].vi, ppu.vr[op.vc].vi);
return true; return true;
@ -3881,7 +3939,14 @@ bool ppu_interpreter::DIVW(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::LVLX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::LVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvlx_v0(addr);
return true;
}
bool ppu_interpreter_fast::LVLX(ppu_thread& ppu, ppu_opcode_t op)
{ {
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvlx(addr); ppu.vr[op.vd].vi = sse_cellbe_lvlx(addr);
@ -3945,7 +4010,14 @@ bool ppu_interpreter::SRD(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::LVRX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::LVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvrx_v0(addr);
return true;
}
bool ppu_interpreter_fast::LVRX(ppu_thread& ppu, ppu_opcode_t op)
{ {
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
ppu.vr[op.vd].vi = sse_cellbe_lvrx(addr); ppu.vr[op.vd].vi = sse_cellbe_lvrx(addr);
@ -4013,7 +4085,14 @@ bool ppu_interpreter::LFDUX(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::STVLX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::STVLX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs].vi);
return true;
}
bool ppu_interpreter_fast::STVLX(ppu_thread& ppu, ppu_opcode_t op)
{ {
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi); sse_cellbe_stvlx(addr, ppu.vr[op.vs].vi);
@ -4061,7 +4140,14 @@ bool ppu_interpreter::STFSX(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::STVRX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::STVRX(ppu_thread& ppu, ppu_opcode_t op)
{
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs].vi);
return true;
}
bool ppu_interpreter_fast::STVRX(ppu_thread& ppu, ppu_opcode_t op)
{ {
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]; const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi); sse_cellbe_stvrx(addr, ppu.vr[op.vs].vi);
@ -4121,7 +4207,12 @@ bool ppu_interpreter::STFDUX(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::LVLXL(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVLX(ppu, op);
}
bool ppu_interpreter_fast::LVLXL(ppu_thread& ppu, ppu_opcode_t op)
{ {
return LVLX(ppu, op); return LVLX(ppu, op);
} }
@ -4171,7 +4262,12 @@ bool ppu_interpreter::SRAD(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::LVRXL(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return LVRX(ppu, op);
}
bool ppu_interpreter_fast::LVRXL(ppu_thread& ppu, ppu_opcode_t op)
{ {
return LVRX(ppu, op); return LVRX(ppu, op);
} }
@ -4208,7 +4304,12 @@ bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::STVLXL(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVLX(ppu, op);
}
bool ppu_interpreter_fast::STVLXL(ppu_thread& ppu, ppu_opcode_t op)
{ {
return STVLX(ppu, op); return STVLX(ppu, op);
} }
@ -4227,7 +4328,12 @@ bool ppu_interpreter::EXTSH(ppu_thread& ppu, ppu_opcode_t op)
return true; return true;
} }
bool ppu_interpreter::STVRXL(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter_precise::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
{
return STVRX(ppu, op);
}
bool ppu_interpreter_fast::STVRXL(ppu_thread& ppu, ppu_opcode_t op)
{ {
return STVRX(ppu, op); return STVRX(ppu, op);
} }

View file

@ -4,6 +4,8 @@
class ppu_thread; class ppu_thread;
using ppu_inter_func_t = bool(*)(ppu_thread& ppu, ppu_opcode_t op);
struct ppu_interpreter struct ppu_interpreter
{ {
static bool MFVSCR(ppu_thread&, ppu_opcode_t); static bool MFVSCR(ppu_thread&, ppu_opcode_t);
@ -75,7 +77,6 @@ struct ppu_interpreter
static bool VNMSUBFP(ppu_thread&, ppu_opcode_t); static bool VNMSUBFP(ppu_thread&, ppu_opcode_t);
static bool VNOR(ppu_thread&, ppu_opcode_t); static bool VNOR(ppu_thread&, ppu_opcode_t);
static bool VOR(ppu_thread&, ppu_opcode_t); static bool VOR(ppu_thread&, ppu_opcode_t);
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool VPKPX(ppu_thread&, ppu_opcode_t); static bool VPKPX(ppu_thread&, ppu_opcode_t);
static bool VPKUHUM(ppu_thread&, ppu_opcode_t); static bool VPKUHUM(ppu_thread&, ppu_opcode_t);
static bool VPKUWUM(ppu_thread&, ppu_opcode_t); static bool VPKUWUM(ppu_thread&, ppu_opcode_t);
@ -247,42 +248,34 @@ struct ppu_interpreter
static bool STVXL(ppu_thread&, ppu_opcode_t); static bool STVXL(ppu_thread&, ppu_opcode_t);
static bool DIVD(ppu_thread&, ppu_opcode_t); static bool DIVD(ppu_thread&, ppu_opcode_t);
static bool DIVW(ppu_thread&, ppu_opcode_t); static bool DIVW(ppu_thread&, ppu_opcode_t);
static bool LVLX(ppu_thread&, ppu_opcode_t);
static bool LDBRX(ppu_thread&, ppu_opcode_t); static bool LDBRX(ppu_thread&, ppu_opcode_t);
static bool LSWX(ppu_thread&, ppu_opcode_t); static bool LSWX(ppu_thread&, ppu_opcode_t);
static bool LWBRX(ppu_thread&, ppu_opcode_t); static bool LWBRX(ppu_thread&, ppu_opcode_t);
static bool LFSX(ppu_thread&, ppu_opcode_t); static bool LFSX(ppu_thread&, ppu_opcode_t);
static bool SRW(ppu_thread&, ppu_opcode_t); static bool SRW(ppu_thread&, ppu_opcode_t);
static bool SRD(ppu_thread&, ppu_opcode_t); static bool SRD(ppu_thread&, ppu_opcode_t);
static bool LVRX(ppu_thread&, ppu_opcode_t);
static bool LSWI(ppu_thread&, ppu_opcode_t); static bool LSWI(ppu_thread&, ppu_opcode_t);
static bool LFSUX(ppu_thread&, ppu_opcode_t); static bool LFSUX(ppu_thread&, ppu_opcode_t);
static bool SYNC(ppu_thread&, ppu_opcode_t); static bool SYNC(ppu_thread&, ppu_opcode_t);
static bool LFDX(ppu_thread&, ppu_opcode_t); static bool LFDX(ppu_thread&, ppu_opcode_t);
static bool LFDUX(ppu_thread&, ppu_opcode_t); static bool LFDUX(ppu_thread&, ppu_opcode_t);
static bool STVLX(ppu_thread&, ppu_opcode_t);
static bool STDBRX(ppu_thread&, ppu_opcode_t); static bool STDBRX(ppu_thread&, ppu_opcode_t);
static bool STSWX(ppu_thread&, ppu_opcode_t); static bool STSWX(ppu_thread&, ppu_opcode_t);
static bool STWBRX(ppu_thread&, ppu_opcode_t); static bool STWBRX(ppu_thread&, ppu_opcode_t);
static bool STFSX(ppu_thread&, ppu_opcode_t); static bool STFSX(ppu_thread&, ppu_opcode_t);
static bool STVRX(ppu_thread&, ppu_opcode_t);
static bool STFSUX(ppu_thread&, ppu_opcode_t); static bool STFSUX(ppu_thread&, ppu_opcode_t);
static bool STSWI(ppu_thread&, ppu_opcode_t); static bool STSWI(ppu_thread&, ppu_opcode_t);
static bool STFDX(ppu_thread&, ppu_opcode_t); static bool STFDX(ppu_thread&, ppu_opcode_t);
static bool STFDUX(ppu_thread&, ppu_opcode_t); static bool STFDUX(ppu_thread&, ppu_opcode_t);
static bool LVLXL(ppu_thread&, ppu_opcode_t);
static bool LHBRX(ppu_thread&, ppu_opcode_t); static bool LHBRX(ppu_thread&, ppu_opcode_t);
static bool SRAW(ppu_thread&, ppu_opcode_t); static bool SRAW(ppu_thread&, ppu_opcode_t);
static bool SRAD(ppu_thread&, ppu_opcode_t); static bool SRAD(ppu_thread&, ppu_opcode_t);
static bool LVRXL(ppu_thread&, ppu_opcode_t);
static bool DSS(ppu_thread&, ppu_opcode_t); static bool DSS(ppu_thread&, ppu_opcode_t);
static bool SRAWI(ppu_thread&, ppu_opcode_t); static bool SRAWI(ppu_thread&, ppu_opcode_t);
static bool SRADI(ppu_thread&, ppu_opcode_t); static bool SRADI(ppu_thread&, ppu_opcode_t);
static bool EIEIO(ppu_thread&, ppu_opcode_t); static bool EIEIO(ppu_thread&, ppu_opcode_t);
static bool STVLXL(ppu_thread&, ppu_opcode_t);
static bool STHBRX(ppu_thread&, ppu_opcode_t); static bool STHBRX(ppu_thread&, ppu_opcode_t);
static bool EXTSH(ppu_thread&, ppu_opcode_t); static bool EXTSH(ppu_thread&, ppu_opcode_t);
static bool STVRXL(ppu_thread&, ppu_opcode_t);
static bool EXTSB(ppu_thread&, ppu_opcode_t); static bool EXTSB(ppu_thread&, ppu_opcode_t);
static bool STFIWX(ppu_thread&, ppu_opcode_t); static bool STFIWX(ppu_thread&, ppu_opcode_t);
static bool EXTSW(ppu_thread&, ppu_opcode_t); static bool EXTSW(ppu_thread&, ppu_opcode_t);
@ -362,6 +355,16 @@ struct ppu_interpreter
struct ppu_interpreter_precise final : ppu_interpreter struct ppu_interpreter_precise final : ppu_interpreter
{ {
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool LVLX(ppu_thread&, ppu_opcode_t);
static bool LVLXL(ppu_thread&, ppu_opcode_t);
static bool LVRX(ppu_thread&, ppu_opcode_t);
static bool LVRXL(ppu_thread&, ppu_opcode_t);
static bool STVLX(ppu_thread&, ppu_opcode_t);
static bool STVLXL(ppu_thread&, ppu_opcode_t);
static bool STVRX(ppu_thread&, ppu_opcode_t);
static bool STVRXL(ppu_thread&, ppu_opcode_t);
static bool VPKSHSS(ppu_thread&, ppu_opcode_t); static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
static bool VPKSHUS(ppu_thread&, ppu_opcode_t); static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
static bool VPKSWSS(ppu_thread&, ppu_opcode_t); static bool VPKSWSS(ppu_thread&, ppu_opcode_t);
@ -395,6 +398,16 @@ struct ppu_interpreter_precise final : ppu_interpreter
struct ppu_interpreter_fast final : ppu_interpreter struct ppu_interpreter_fast final : ppu_interpreter
{ {
static bool VPERM(ppu_thread&, ppu_opcode_t);
static bool LVLX(ppu_thread&, ppu_opcode_t);
static bool LVLXL(ppu_thread&, ppu_opcode_t);
static bool LVRX(ppu_thread&, ppu_opcode_t);
static bool LVRXL(ppu_thread&, ppu_opcode_t);
static bool STVLX(ppu_thread&, ppu_opcode_t);
static bool STVLXL(ppu_thread&, ppu_opcode_t);
static bool STVRX(ppu_thread&, ppu_opcode_t);
static bool STVRXL(ppu_thread&, ppu_opcode_t);
static bool VPKSHSS(ppu_thread&, ppu_opcode_t); static bool VPKSHSS(ppu_thread&, ppu_opcode_t);
static bool VPKSHUS(ppu_thread&, ppu_opcode_t); static bool VPKSHUS(ppu_thread&, ppu_opcode_t);
static bool VPKSWSS(ppu_thread&, ppu_opcode_t); static bool VPKSWSS(ppu_thread&, ppu_opcode_t);

View file

@ -555,6 +555,12 @@ public:
}); });
} }
template <typename F>
ppu_decoder(F&& init) : ppu_decoder()
{
init(m_table);
}
const std::array<T, 0x20000>& get_table() const const std::array<T, 0x20000>& get_table() const
{ {
return m_table; return m_table;

View file

@ -56,6 +56,16 @@
const bool s_use_rtm = utils::has_rtm(); const bool s_use_rtm = utils::has_rtm();
const bool s_use_ssse3 =
#ifdef _MSC_VER
utils::has_ssse3();
#elif __SSSE3__
true;
#else
false;
#define _mm_shuffle_epi8
#endif
extern u64 get_system_time(); extern u64 get_system_time();
namespace vm { using namespace ps3; } namespace vm { using namespace ps3; }
@ -101,8 +111,57 @@ void fmt_class_string<ppu_decoder_type>::format(std::string& out, u64 arg)
}); });
} }
const ppu_decoder<ppu_interpreter_precise> s_ppu_interpreter_precise; // Table of identical interpreter functions when precise contains SSE2 version, and fast contains SSSE3 functions
const ppu_decoder<ppu_interpreter_fast> s_ppu_interpreter_fast; const std::pair<ppu_inter_func_t, ppu_inter_func_t> s_ppu_dispatch_table[]
{
#define FUNC(x) {&ppu_interpreter_precise::x, &ppu_interpreter_fast::x}
FUNC(VPERM),
FUNC(LVLX),
FUNC(LVLXL),
FUNC(LVRX),
FUNC(LVRXL),
FUNC(STVLX),
FUNC(STVLXL),
FUNC(STVRX),
FUNC(STVRXL),
#undef FUNC
};
extern const ppu_decoder<ppu_interpreter_precise> g_ppu_interpreter_precise([](auto& table)
{
if (s_use_ssse3)
{
for (auto& func : table)
{
for (const auto& pair : s_ppu_dispatch_table)
{
if (pair.first == func)
{
func = pair.second;
break;
}
}
}
}
});
extern const ppu_decoder<ppu_interpreter_fast> g_ppu_interpreter_fast([](auto& table)
{
if (!s_use_ssse3)
{
for (auto& func : table)
{
for (const auto& pair : s_ppu_dispatch_table)
{
if (pair.second == func)
{
func = pair.first;
break;
}
}
}
}
});
extern void ppu_initialize(); extern void ppu_initialize();
extern void ppu_initialize(const ppu_module& info); extern void ppu_initialize(const ppu_module& info);
@ -120,8 +179,8 @@ static u32 ppu_cache(u32 addr)
{ {
// Select opcode table // Select opcode table
const auto& table = *( const auto& table = *(
g_cfg.core.ppu_decoder == ppu_decoder_type::precise ? &s_ppu_interpreter_precise.get_table() : g_cfg.core.ppu_decoder == ppu_decoder_type::precise ? &g_ppu_interpreter_precise.get_table() :
g_cfg.core.ppu_decoder == ppu_decoder_type::fast ? &s_ppu_interpreter_fast.get_table() : g_cfg.core.ppu_decoder == ppu_decoder_type::fast ? &g_ppu_interpreter_fast.get_table() :
(fmt::throw_exception<std::logic_error>("Invalid PPU decoder"), nullptr)); (fmt::throw_exception<std::logic_error>("Invalid PPU decoder"), nullptr));
return ::narrow<u32>(reinterpret_cast<std::uintptr_t>(table[ppu_decode(vm::read32(addr))])); return ::narrow<u32>(reinterpret_cast<std::uintptr_t>(table[ppu_decode(vm::read32(addr))]));
@ -455,7 +514,9 @@ void ppu_thread::cpu_task()
{ {
case ppu_cmd::opcode: case ppu_cmd::opcode:
{ {
cmd_pop(), s_ppu_interpreter_fast.decode(arg)(*this, {arg}); cmd_pop(), g_cfg.core.ppu_decoder == ppu_decoder_type::precise
? g_ppu_interpreter_precise.decode(arg)(*this, {arg})
: g_ppu_interpreter_fast.decode(arg)(*this, {arg});
break; break;
} }
case ppu_cmd::set_gpr: case ppu_cmd::set_gpr:
@ -545,7 +606,7 @@ void ppu_thread::exec_task()
continue; continue;
} }
if (cia % 16) if (cia % 16 || !s_use_ssse3)
{ {
// Unaligned // Unaligned
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + cia); const u32 op = *reinterpret_cast<const be_t<u32>*>(base + cia);
@ -808,12 +869,17 @@ extern ppu_function_t ppu_get_syscall(u64 code);
extern __m128 sse_exp2_ps(__m128 A); extern __m128 sse_exp2_ps(__m128 A);
extern __m128 sse_log2_ps(__m128 A); extern __m128 sse_log2_ps(__m128 A);
extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C); extern __m128i sse_altivec_vperm(__m128i A, __m128i B, __m128i C);
extern __m128i sse_altivec_vperm_v0(__m128i A, __m128i B, __m128i C);
extern __m128i sse_altivec_lvsl(u64 addr); extern __m128i sse_altivec_lvsl(u64 addr);
extern __m128i sse_altivec_lvsr(u64 addr); extern __m128i sse_altivec_lvsr(u64 addr);
extern __m128i sse_cellbe_lvlx(u64 addr); extern __m128i sse_cellbe_lvlx(u64 addr);
extern __m128i sse_cellbe_lvrx(u64 addr); extern __m128i sse_cellbe_lvrx(u64 addr);
extern void sse_cellbe_stvlx(u64 addr, __m128i a); extern void sse_cellbe_stvlx(u64 addr, __m128i a);
extern void sse_cellbe_stvrx(u64 addr, __m128i a); extern void sse_cellbe_stvrx(u64 addr, __m128i a);
extern __m128i sse_cellbe_lvlx_v0(u64 addr);
extern __m128i sse_cellbe_lvrx_v0(u64 addr);
extern void sse_cellbe_stvlx_v0(u64 addr, __m128i a);
extern void sse_cellbe_stvrx_v0(u64 addr, __m128i a);
[[noreturn]] static void ppu_trap(ppu_thread& ppu, u64 addr) [[noreturn]] static void ppu_trap(ppu_thread& ppu, u64 addr)
{ {
@ -1024,13 +1090,13 @@ extern void ppu_initialize(const ppu_module& info)
{ "__stdcx", (u64)&ppu_stdcx }, { "__stdcx", (u64)&ppu_stdcx },
{ "__vexptefp", (u64)&sse_exp2_ps }, { "__vexptefp", (u64)&sse_exp2_ps },
{ "__vlogefp", (u64)&sse_log2_ps }, { "__vlogefp", (u64)&sse_log2_ps },
{ "__vperm", (u64)&sse_altivec_vperm }, { "__vperm", s_use_ssse3 ? (u64)&sse_altivec_vperm : (u64)&sse_altivec_vperm_v0 },
{ "__lvsl", (u64)&sse_altivec_lvsl }, { "__lvsl", (u64)&sse_altivec_lvsl },
{ "__lvsr", (u64)&sse_altivec_lvsr }, { "__lvsr", (u64)&sse_altivec_lvsr },
{ "__lvlx", (u64)&sse_cellbe_lvlx }, { "__lvlx", s_use_ssse3 ? (u64)&sse_cellbe_lvlx : (u64)&sse_cellbe_lvlx_v0 },
{ "__lvrx", (u64)&sse_cellbe_lvrx }, { "__lvrx", s_use_ssse3 ? (u64)&sse_cellbe_lvrx : (u64)&sse_cellbe_lvrx_v0 },
{ "__stvlx", (u64)&sse_cellbe_stvlx }, { "__stvlx", s_use_ssse3 ? (u64)&sse_cellbe_stvlx : (u64)&sse_cellbe_stvlx_v0 },
{ "__stvrx", (u64)&sse_cellbe_stvrx }, { "__stvrx", s_use_ssse3 ? (u64)&sse_cellbe_stvrx : (u64)&sse_cellbe_stvrx_v0 },
}; };
for (u64 index = 0; index < 1024; index++) for (u64 index = 0; index < 1024; index++)

View file

@ -110,15 +110,6 @@ void main_window::Init()
Q_EMIT RequestGlobalStylesheetChange(guiSettings->GetCurrentStylesheetPath()); Q_EMIT RequestGlobalStylesheetChange(guiSettings->GetCurrentStylesheetPath());
ConfigureGuiFromSettings(true); ConfigureGuiFromSettings(true);
if (!utils::has_ssse3())
{
QMessageBox::critical(this, "SSSE3 Error (with three S, not two)",
"Your system does not meet the minimum requirements needed to run RPCS3.\n"
"Your CPU does not support SSSE3 (with three S, not two).\n");
std::exit(EXIT_FAILURE);
}
#ifdef BRANCH #ifdef BRANCH
if ("RPCS3/rpcs3/master"s != STRINGIZE(BRANCH) && ""s != STRINGIZE(BRANCH)) if ("RPCS3/rpcs3/master"s != STRINGIZE(BRANCH) && ""s != STRINGIZE(BRANCH))
#else #else