mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 05:21:25 +12:00
8067 lines
200 KiB
C++
8067 lines
200 KiB
C++
#include "stdafx.h"
|
|
#include "PPUInterpreter.h"
|
|
|
|
#include "Emu/Memory/vm_reservation.h"
|
|
#include "Emu/system_config.h"
|
|
#include "PPUThread.h"
|
|
#include "Emu/Cell/Common.h"
|
|
#include "Emu/Cell/PPUFunction.h"
|
|
#include "Emu/Cell/PPUAnalyser.h"
|
|
#include "Emu/Cell/timers.hpp"
|
|
#include "Emu/IdManager.h"
|
|
|
|
#include <bit>
|
|
#include <cmath>
|
|
#include <climits>
|
|
|
|
#include "util/asm.hpp"
|
|
#include "util/v128.hpp"
|
|
#include "util/simd.hpp"
|
|
#include "util/sysinfo.hpp"
|
|
#include "Utilities/JIT.h"
|
|
|
|
#if !defined(_MSC_VER)
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
|
#endif
|
|
|
|
#if defined(_MSC_VER) || !defined(__SSE2__)
|
|
#define SSSE3_FUNC
|
|
#else
|
|
#define SSSE3_FUNC __attribute__((__target__("ssse3")))
|
|
#endif
|
|
|
|
#if defined(ARCH_ARM64)
|
|
#if !defined(_MSC_VER)
|
|
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
|
#endif
|
|
#undef FORCE_INLINE
|
|
#include "Emu/CPU/sse2neon.h"
|
|
#endif
|
|
|
|
#if (defined(ARCH_X64)) && !defined(__SSSE3__)
|
|
const bool s_use_ssse3 = utils::has_ssse3();
|
|
#else
|
|
constexpr bool s_use_ssse3 = true; // Including non-x86
|
|
#endif
|
|
|
|
extern const ppu_decoder<ppu_itype> g_ppu_itype;
|
|
extern const ppu_decoder<ppu_iname> g_ppu_iname;
|
|
|
|
enum class ppu_exec_bit : u64
|
|
{
|
|
has_oe,
|
|
has_rc,
|
|
set_sat,
|
|
use_nj,
|
|
fix_nj,
|
|
set_vnan,
|
|
fix_vnan,
|
|
set_fpcc,
|
|
use_dfma,
|
|
set_cr_stats,
|
|
|
|
__bitset_enum_max
|
|
};
|
|
|
|
using enum ppu_exec_bit;
|
|
|
|
// Helper for combining only used subset of exec flags at compile time
|
|
template <ppu_exec_bit... Flags0>
|
|
struct ppu_exec_select
|
|
{
|
|
template <ppu_exec_bit Flag, ppu_exec_bit... Flags, typename F>
|
|
static ppu_intrp_func_t select(bs_t<ppu_exec_bit> selected, F func)
|
|
{
|
|
// Make sure there is no flag duplication, otherwise skip flag
|
|
if constexpr (((Flags0 != Flag) && ...))
|
|
{
|
|
// Test only relevant flags at runtime initialization (compile both variants)
|
|
if (selected & Flag)
|
|
{
|
|
// In this branch, selected flag is added to Flags0
|
|
return ppu_exec_select<Flags0..., Flag>::template select<Flags...>(selected, func);
|
|
}
|
|
}
|
|
|
|
return ppu_exec_select<Flags0...>::template select<Flags...>(selected, func);
|
|
}
|
|
|
|
template <typename F>
|
|
static ppu_intrp_func_t select(bs_t<ppu_exec_bit>, F func)
|
|
{
|
|
// Instantiate interpreter function with required set of flags
|
|
return func.template operator()<Flags0...>();
|
|
}
|
|
|
|
template <ppu_exec_bit... Flags1>
|
|
static auto select()
|
|
{
|
|
#ifndef __INTELLISENSE__
|
|
return [](bs_t<ppu_exec_bit> selected, auto func)
|
|
{
|
|
return ppu_exec_select::select<Flags1...>(selected, func);
|
|
};
|
|
#endif
|
|
}
|
|
};
|
|
|
|
// Switch between inlined interpreter invocation (exec) and builder function
|
|
#if defined(ARCH_X64)
|
|
#define RETURN(...) \
|
|
if constexpr (Build == 0) { \
|
|
static_cast<void>(exec); \
|
|
static const ppu_intrp_func_t f = build_function_asm<ppu_intrp_func_t, asmjit::ppu_builder>("ppu_"s + __func__, [&](asmjit::ppu_builder& c) { \
|
|
static ppu_opcode_t op{}; \
|
|
static ppu_abstract_t ppu; \
|
|
exec(__VA_ARGS__); \
|
|
c.ppu_ret(); \
|
|
}); \
|
|
return f; \
|
|
}
|
|
#else
|
|
#define RETURN RETURN_
|
|
#endif
|
|
|
|
#define RETURN_(...) \
|
|
if constexpr (Build == 0) { \
|
|
static_cast<void>(exec); \
|
|
return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn) { \
|
|
const auto fn = atomic_storage<ppu_intrp_func_t>::observe(next_fn->fn); \
|
|
exec(__VA_ARGS__); \
|
|
const auto next_op = this_op + 1; \
|
|
return fn(ppu, {*next_op}, next_op, next_fn + 1); \
|
|
}; \
|
|
}
|
|
|
|
static constexpr ppu_opcode_t s_op{};
|
|
|
|
namespace asmjit
|
|
{
|
|
#if defined(ARCH_X64)
|
|
struct ppu_builder : vec_builder
|
|
{
|
|
using base = vec_builder;
|
|
|
|
#ifdef _WIN32
|
|
static constexpr x86::Gp arg_ppu = x86::rcx;
|
|
static constexpr x86::Gp arg_op = x86::edx;
|
|
static constexpr x86::Gp arg_this_op = x86::r8;
|
|
static constexpr x86::Gp arg_next_fn = x86::r9;
|
|
#else
|
|
static constexpr x86::Gp arg_ppu = x86::rdi;
|
|
static constexpr x86::Gp arg_op = x86::esi;
|
|
static constexpr x86::Gp arg_this_op = x86::rdx;
|
|
static constexpr x86::Gp arg_next_fn = x86::rcx;
|
|
#endif
|
|
|
|
u32 xmm_count = 0;
|
|
u32 ppu_base = 0;
|
|
x86::Xmm tmp;
|
|
|
|
ppu_builder(CodeHolder* ch)
|
|
: base(ch)
|
|
{
|
|
// Initialize pointer to next function
|
|
base::mov(x86::r11, x86::qword_ptr(arg_next_fn));
|
|
}
|
|
|
|
// Indexed offset to ppu.member
|
|
template <auto MPtr, u32 Size = sizeof((std::declval<ppu_thread&>().*MPtr)[0]), uint I, uint N>
|
|
x86::Mem ppu_mem(const bf_t<u32, I, N>&, bool last = false)
|
|
{
|
|
// Required index shift for array indexing
|
|
constexpr u32 Shift = std::countr_zero(sizeof((std::declval<ppu_thread&>().*MPtr)[0]));
|
|
|
|
const u32 offset = ::offset32(MPtr);
|
|
|
|
auto tmp_r32 = x86::eax;
|
|
auto reg_ppu = arg_ppu;
|
|
|
|
if (last)
|
|
{
|
|
tmp_r32 = arg_op.r32();
|
|
}
|
|
else
|
|
{
|
|
base::mov(tmp_r32, arg_op);
|
|
|
|
if (offset % 16 == 0 && ppu_base != offset)
|
|
{
|
|
// Optimistically precompute offset to avoid [ppu + tmp*x + offset] addressing
|
|
base::lea(x86::r10, x86::qword_ptr(arg_ppu, static_cast<s32>(offset)));
|
|
ppu_base = offset;
|
|
}
|
|
}
|
|
|
|
if (ppu_base == offset)
|
|
{
|
|
reg_ppu = x86::r10;
|
|
}
|
|
|
|
// Use max possible index shift
|
|
constexpr u32 X86Shift = Shift > 3 ? 3 : Shift;
|
|
constexpr u32 AddShift = Shift - X86Shift;
|
|
constexpr u32 AndMask = (1u << N) - 1;
|
|
|
|
if constexpr (I >= AddShift)
|
|
{
|
|
if constexpr (I != AddShift)
|
|
base::shr(tmp_r32, I - AddShift);
|
|
base::and_(tmp_r32, AndMask << AddShift);
|
|
}
|
|
else
|
|
{
|
|
base::and_(tmp_r32, AndMask << I);
|
|
base::shl(tmp_r32, I + AddShift);
|
|
}
|
|
|
|
return x86::ptr(reg_ppu, tmp_r32.r64(), X86Shift, static_cast<s32>(offset - ppu_base), Size);
|
|
}
|
|
|
|
// Generic offset to ppu.member
|
|
template <auto MPtr, u32 Size = sizeof(std::declval<ppu_thread&>().*MPtr)>
|
|
x86::Mem ppu_mem()
|
|
{
|
|
if (ppu_base == 0)
|
|
{
|
|
return x86::ptr(arg_ppu, static_cast<s32>(::offset32(MPtr)), Size);
|
|
}
|
|
else
|
|
{
|
|
return x86::ptr(x86::r10, static_cast<s32>(::offset32(MPtr) - ppu_base), Size);
|
|
}
|
|
}
|
|
|
|
template <u32 Size = 16, uint I, uint N>
|
|
x86::Mem ppu_vr(const bf_t<u32, I, N>& bf, bool last = false)
|
|
{
|
|
return ppu_mem<&ppu_thread::vr, Size>(bf, last);
|
|
}
|
|
|
|
x86::Mem ppu_sat()
|
|
{
|
|
return ppu_mem<&ppu_thread::sat>();
|
|
}
|
|
|
|
void ppu_ret(bool last = true)
|
|
{
|
|
base::add(arg_this_op, 4);
|
|
base::mov(arg_op, x86::dword_ptr(arg_this_op));
|
|
base::bswap(arg_op);
|
|
base::add(arg_next_fn, 8);
|
|
base::jmp(x86::r11);
|
|
|
|
// Embed constants (TODO: after last return)
|
|
if (last)
|
|
base::emit_consts();
|
|
}
|
|
};
|
|
#elif defined(ARCH_ARM64)
|
|
struct ppu_builder : a64::Assembler
|
|
{
|
|
};
|
|
#else
|
|
struct ppu_builder
|
|
{
|
|
};
|
|
#endif
|
|
}
|
|
|
|
struct ppu_abstract_t
|
|
{
|
|
struct abstract_vr
|
|
{
|
|
template <uint I, uint N>
|
|
struct lazy_vr : asmjit::mem_lazy
|
|
{
|
|
const asmjit::Operand& eval(bool is_lv)
|
|
{
|
|
if (is_lv && !this->isReg())
|
|
{
|
|
Operand::operator=(g_vc->vec_alloc());
|
|
#if defined(ARCH_X64)
|
|
g_vc->emit(asmjit::x86::Inst::kIdMovaps, *this, static_cast<asmjit::ppu_builder*>(g_vc)->ppu_vr(bf_t<u32, I, N>{}, false));
|
|
#endif
|
|
}
|
|
|
|
if (!is_lv)
|
|
{
|
|
if (this->isReg())
|
|
{
|
|
g_vc->vec_dealloc(asmjit::vec_type{this->id()});
|
|
}
|
|
else
|
|
{
|
|
#if defined(ARCH_X64)
|
|
Operand::operator=(static_cast<asmjit::ppu_builder*>(g_vc)->ppu_vr(bf_t<u32, I, N>{}, false));
|
|
#endif
|
|
}
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
template <typename T>
|
|
void operator=(T&& _val) const
|
|
{
|
|
FOR_X64(store_op, kIdMovaps, kIdVmovaps, static_cast<asmjit::ppu_builder*>(g_vc)->ppu_vr(bf_t<u32, I, N>{}, true), std::forward<T>(_val));
|
|
}
|
|
};
|
|
|
|
template <uint I, uint N>
|
|
lazy_vr<I, N> operator[](const bf_t<u32, I, N>&) const
|
|
{
|
|
return {};
|
|
}
|
|
} vr;
|
|
|
|
struct abstract_sat : asmjit::mem_lazy
|
|
{
|
|
const asmjit::Operand& eval(bool)
|
|
{
|
|
#if defined(ARCH_X64)
|
|
Operand::operator=(static_cast<asmjit::ppu_builder*>(g_vc)->ppu_sat());
|
|
#endif
|
|
|
|
return *this;
|
|
}
|
|
|
|
template <typename T>
|
|
void operator=(T&& _val) const
|
|
{
|
|
#if defined(ARCH_X64)
|
|
FOR_X64(store_op, kIdMovaps, kIdVmovaps, static_cast<asmjit::ppu_builder*>(g_vc)->ppu_sat(), std::forward<T>(_val));
|
|
#endif
|
|
}
|
|
} sat{};
|
|
};
|
|
|
|
extern void do_cell_atomic_128_store(u32 addr, const void* to_write);
|
|
|
|
inline u64 dup32(u32 x) { return x | static_cast<u64>(x) << 32; }
|
|
|
|
// Write values to CR field
|
|
inline void ppu_cr_set(ppu_thread& ppu, u32 field, bool le, bool gt, bool eq, bool so)
|
|
{
|
|
ppu.cr[field * 4 + 0] = le;
|
|
ppu.cr[field * 4 + 1] = gt;
|
|
ppu.cr[field * 4 + 2] = eq;
|
|
ppu.cr[field * 4 + 3] = so;
|
|
|
|
if (g_cfg.core.ppu_debug) [[unlikely]]
|
|
{
|
|
*reinterpret_cast<u32*>(vm::g_stat_addr + ppu.cia) |= *reinterpret_cast<u32*>(ppu.cr.bits + field * 4);
|
|
}
|
|
}
|
|
|
|
// Write comparison results to CR field
|
|
template<typename T>
|
|
inline void ppu_cr_set(ppu_thread& ppu, u32 field, const T& a, const T& b)
|
|
{
|
|
ppu_cr_set(ppu, field, a < b, a > b, a == b, ppu.xer.so);
|
|
}
|
|
|
|
// TODO
|
|
template <ppu_exec_bit... Flags>
|
|
void ppu_set_cr(ppu_thread& ppu, u32 field, bool le, bool gt, bool eq, bool so)
|
|
{
|
|
ppu.cr[field * 4 + 0] = le;
|
|
ppu.cr[field * 4 + 1] = gt;
|
|
ppu.cr[field * 4 + 2] = eq;
|
|
ppu.cr[field * 4 + 3] = so;
|
|
|
|
if constexpr (((Flags == set_cr_stats) || ...))
|
|
{
|
|
*reinterpret_cast<u32*>(vm::g_stat_addr + ppu.cia) |= *reinterpret_cast<u32*>(ppu.cr.bits + field * 4);
|
|
}
|
|
}
|
|
|
|
// Set XER.OV bit (overflow)
|
|
inline void ppu_ov_set(ppu_thread& ppu, bool bit)
|
|
{
|
|
ppu.xer.ov = bit;
|
|
ppu.xer.so |= bit;
|
|
}
|
|
|
|
// Write comparison results to FPCC field with optional CR field update
|
|
template <ppu_exec_bit... Flags>
|
|
void ppu_set_fpcc(ppu_thread& ppu, f64 a, f64 b, u64 cr_field = 1)
|
|
{
|
|
if constexpr (((Flags == set_fpcc || Flags == has_rc) || ...))
|
|
{
|
|
static_assert(std::endian::native == std::endian::little, "Not implemented");
|
|
|
|
bool fpcc[4];
|
|
#if defined(ARCH_X64) && !defined(_M_X64)
|
|
__asm__("comisd %[b], %[a]\n"
|
|
: "=@ccb" (fpcc[0])
|
|
, "=@cca" (fpcc[1])
|
|
, "=@ccz" (fpcc[2])
|
|
, "=@ccp" (fpcc[3])
|
|
: [a] "x" (a)
|
|
, [b] "x" (b)
|
|
: "cc");
|
|
if (fpcc[3]) [[unlikely]]
|
|
{
|
|
fpcc[0] = fpcc[1] = fpcc[2] = false;
|
|
}
|
|
#else
|
|
const auto cmp = a <=> b;
|
|
fpcc[0] = cmp == std::partial_ordering::less;
|
|
fpcc[1] = cmp == std::partial_ordering::greater;
|
|
fpcc[2] = cmp == std::partial_ordering::equivalent;
|
|
fpcc[3] = cmp == std::partial_ordering::unordered;
|
|
#endif
|
|
|
|
const u32 data = std::bit_cast<u32>(fpcc);
|
|
|
|
// Write FPCC
|
|
ppu.fpscr.fields[4] = data;
|
|
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
{
|
|
// Previous behaviour was throwing an exception; TODO
|
|
ppu.cr.fields[cr_field] = data;
|
|
|
|
if (g_cfg.core.ppu_debug) [[unlikely]]
|
|
{
|
|
*reinterpret_cast<u32*>(vm::g_stat_addr + ppu.cia) |= data;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Validate read data in case does not match reservation
|
|
template <typename T>
|
|
auto ppu_feed_data(ppu_thread& ppu, u64 addr)
|
|
{
|
|
static_assert(sizeof(T) <= 128, "Incompatible type-size, break down into smaller loads");
|
|
|
|
auto value = vm::_ref<T>(vm::cast(addr));
|
|
|
|
//if (!ppu.use_full_rdata)
|
|
{
|
|
return value;
|
|
}
|
|
|
|
const u32 raddr = ppu.raddr;
|
|
|
|
if (addr / 128 > raddr / 128 || (addr + sizeof(T) - 1) / 128 < raddr / 128)
|
|
{
|
|
// Out of range or reservation does not exist
|
|
return value;
|
|
}
|
|
|
|
if (sizeof(T) == 1 || addr / 128 == (addr + sizeof(T) - 1) / 128)
|
|
{
|
|
// Optimized comparison
|
|
if (std::memcmp(&value, &ppu.rdata[addr & 127], sizeof(T)))
|
|
{
|
|
// Reservation was lost
|
|
ppu.raddr = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
alignas(16) std::byte buffer[sizeof(T)];
|
|
std::memcpy(buffer, &value, sizeof(value)); // Put in memory explicitly (ensure the compiler won't do it beforehand)
|
|
|
|
const std::byte* src;
|
|
u32 size;
|
|
u32 offs = 0;
|
|
|
|
if (raddr / 128 == addr / 128)
|
|
src = &ppu.rdata[addr & 127], size = std::min<u32>(128 - (addr % 128), sizeof(T));
|
|
else
|
|
src = &ppu.rdata[0], size = (addr + u32{sizeof(T)}) % 127, offs = sizeof(T) - size;
|
|
|
|
if (std::memcmp(buffer + offs, src, size))
|
|
{
|
|
ppu.raddr = 0;
|
|
}
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
// Push called address to custom call history for debugging
|
|
inline u32 ppu_record_call(ppu_thread& ppu, u32 new_cia, ppu_opcode_t op, bool indirect = false)
|
|
{
|
|
return new_cia;
|
|
|
|
if (auto& history = ppu.call_history; !history.data.empty())
|
|
{
|
|
if (!op.lk)
|
|
{
|
|
if (indirect)
|
|
{
|
|
// Register LLE exported function trampolines
|
|
// Trampolines do not change the stack pointer, and ones to exported functions change RTOC
|
|
if (ppu.gpr[1] == history.last_r1 && ppu.gpr[2] != history.last_r2)
|
|
{
|
|
// Cancel condition
|
|
history.last_r1 = umax;
|
|
history.last_r2 = ppu.gpr[2];
|
|
|
|
// Register trampolie with TOC
|
|
history.data[history.index++ % ppu.call_history_max_size] = new_cia;
|
|
}
|
|
}
|
|
|
|
return new_cia;
|
|
}
|
|
|
|
history.data[history.index++ % ppu.call_history_max_size] = new_cia;
|
|
history.last_r1 = ppu.gpr[1];
|
|
history.last_r2 = ppu.gpr[2];
|
|
}
|
|
}
|
|
|
|
extern SAFE_BUFFERS(__m128i) sse_pshufb(__m128i data, __m128i index)
|
|
{
|
|
v128 m = _mm_and_si128(index, _mm_set1_epi8(0xf));
|
|
v128 a = data;
|
|
v128 r;
|
|
|
|
for (int i = 0; i < 16; i++)
|
|
{
|
|
r._u8[i] = a._u8[m._u8[i]];
|
|
}
|
|
|
|
return _mm_and_si128(r, _mm_cmpgt_epi8(index, _mm_set1_epi8(-1)));
|
|
}
|
|
|
|
extern __m128i sse_altivec_lvsl(u64 addr)
|
|
{
|
|
alignas(16) static const u8 lvsl_values[0x10][0x10] =
|
|
{
|
|
{ 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 },
|
|
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
|
|
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
|
|
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
|
|
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
|
|
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
|
|
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
|
|
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
|
|
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
|
|
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
|
|
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
|
|
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
|
|
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
|
|
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
|
|
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
|
|
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
|
|
};
|
|
|
|
return _mm_load_si128(reinterpret_cast<const __m128i*>(+lvsl_values[addr & 0xf]));
|
|
}
|
|
|
|
extern __m128i sse_altivec_lvsr(u64 addr)
|
|
{
|
|
alignas(16) static const u8 lvsr_values[0x10][0x10] =
|
|
{
|
|
{ 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10 },
|
|
{ 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f },
|
|
{ 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e },
|
|
{ 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d },
|
|
{ 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c },
|
|
{ 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b },
|
|
{ 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a },
|
|
{ 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09 },
|
|
{ 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 },
|
|
{ 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07 },
|
|
{ 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06 },
|
|
{ 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05 },
|
|
{ 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04 },
|
|
{ 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03 },
|
|
{ 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02 },
|
|
{ 0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01 },
|
|
};
|
|
|
|
return _mm_load_si128(reinterpret_cast<const __m128i*>(+lvsr_values[addr & 0xf]));
|
|
}
|
|
|
|
static const __m128i lvlx_masks[0x10] =
|
|
{
|
|
_mm_set_epi8(0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf),
|
|
_mm_set_epi8(0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1),
|
|
_mm_set_epi8(0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1),
|
|
_mm_set_epi8(0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1),
|
|
_mm_set_epi8(0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1),
|
|
_mm_set_epi8(0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0xa, 0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0xb, 0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0xc, 0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0xd, 0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0xe, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
};
|
|
|
|
static const __m128i lvrx_masks[0x10] =
|
|
{
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9),
|
|
_mm_set_epi8(-1, -1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa),
|
|
_mm_set_epi8(-1, -1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb),
|
|
_mm_set_epi8(-1, -1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc),
|
|
_mm_set_epi8(-1, -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd),
|
|
_mm_set_epi8(-1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe),
|
|
};
|
|
|
|
static SSSE3_FUNC __m128i sse_cellbe_lvlx(ppu_thread& ppu, u64 addr)
|
|
{
|
|
return _mm_shuffle_epi8(ppu_feed_data<__m128i>(ppu, addr & -16), lvlx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern SSSE3_FUNC __m128i sse_cellbe_lvlx(u64 addr)
|
|
{
|
|
return _mm_shuffle_epi8(_mm_load_si128(vm::_ptr<const __m128i>(addr & ~0xf)), lvlx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern SSSE3_FUNC void sse_cellbe_stvlx(u64 addr, __m128i a)
|
|
{
|
|
_mm_maskmoveu_si128(_mm_shuffle_epi8(a, lvlx_masks[addr & 0xf]), lvrx_masks[addr & 0xf], vm::_ptr<char>(addr & ~0xf));
|
|
}
|
|
|
|
static SSSE3_FUNC __m128i sse_cellbe_lvrx(ppu_thread& ppu, u64 addr)
|
|
{
|
|
return _mm_shuffle_epi8(ppu_feed_data<__m128i>(ppu, addr & -16), lvrx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern SSSE3_FUNC __m128i sse_cellbe_lvrx(u64 addr)
|
|
{
|
|
return _mm_shuffle_epi8(_mm_load_si128(vm::_ptr<const __m128i>(addr & ~0xf)), lvrx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern SSSE3_FUNC void sse_cellbe_stvrx(u64 addr, __m128i a)
|
|
{
|
|
_mm_maskmoveu_si128(_mm_shuffle_epi8(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], vm::_ptr<char>(addr & ~0xf));
|
|
}
|
|
|
|
static __m128i sse_cellbe_lvlx_v0(ppu_thread& ppu, u64 addr)
|
|
{
|
|
return sse_pshufb(ppu_feed_data<__m128i>(ppu, addr & -16), lvlx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern __m128i sse_cellbe_lvlx_v0(u64 addr)
|
|
{
|
|
return sse_pshufb(_mm_load_si128(vm::_ptr<const __m128i>(addr & ~0xf)), lvlx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern void sse_cellbe_stvlx_v0(u64 addr, __m128i a)
|
|
{
|
|
_mm_maskmoveu_si128(sse_pshufb(a, lvlx_masks[addr & 0xf]), lvrx_masks[addr & 0xf], vm::_ptr<char>(addr & ~0xf));
|
|
}
|
|
|
|
static __m128i sse_cellbe_lvrx_v0(ppu_thread& ppu, u64 addr)
|
|
{
|
|
return sse_pshufb(ppu_feed_data<__m128i>(ppu, addr & -16), lvrx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern __m128i sse_cellbe_lvrx_v0(u64 addr)
|
|
{
|
|
return sse_pshufb(_mm_load_si128(vm::_ptr<const __m128i>(addr & ~0xf)), lvrx_masks[addr & 0xf]);
|
|
}
|
|
|
|
extern void sse_cellbe_stvrx_v0(u64 addr, __m128i a)
|
|
{
|
|
_mm_maskmoveu_si128(sse_pshufb(a, lvrx_masks[addr & 0xf]), lvlx_masks[addr & 0xf], vm::_ptr<char>(addr & ~0xf));
|
|
}
|
|
|
|
template<typename T>
|
|
struct add_flags_result_t
|
|
{
|
|
T result;
|
|
bool carry;
|
|
|
|
add_flags_result_t() = default;
|
|
|
|
// Straighforward ADD with flags
|
|
add_flags_result_t(T a, T b)
|
|
: result(a + b)
|
|
, carry(result < a)
|
|
{
|
|
}
|
|
|
|
// Straighforward ADC with flags
|
|
add_flags_result_t(T a, T b, bool c)
|
|
: add_flags_result_t(a, b)
|
|
{
|
|
add_flags_result_t r(result, c);
|
|
result = r.result;
|
|
carry |= r.carry;
|
|
}
|
|
};
|
|
|
|
static add_flags_result_t<u64> add64_flags(u64 a, u64 b)
|
|
{
|
|
return{ a, b };
|
|
}
|
|
|
|
static add_flags_result_t<u64> add64_flags(u64 a, u64 b, bool c)
|
|
{
|
|
return{ a, b, c };
|
|
}
|
|
|
|
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
|
|
|
|
extern u32 ppu_lwarx(ppu_thread& ppu, u32 addr);
|
|
extern u64 ppu_ldarx(ppu_thread& ppu, u32 addr);
|
|
extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value);
|
|
extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value);
|
|
extern void ppu_trap(ppu_thread& ppu, u64 addr);
|
|
|
|
// NaNs production precedence: NaN from Va, Vb, Vc
|
|
// and lastly the result of the operation in case none of the operands is a NaN
|
|
// Signaling NaNs are 'quieted' (MSB of fraction is set) with other bits of data remain the same
|
|
inline v128 ppu_select_vnan(v128 a)
|
|
{
|
|
return a;
|
|
}
|
|
|
|
inline v128 ppu_select_vnan(v128 a, v128 b)
|
|
{
|
|
return gv_selectfs(gv_eqfs(a, a), b, a | gv_bcst32(0x7fc00000u));
|
|
}
|
|
|
|
inline v128 ppu_select_vnan(v128 a, v128 b, Vector128 auto... args)
|
|
{
|
|
return ppu_select_vnan(a, ppu_select_vnan(b, args...));
|
|
}
|
|
|
|
// Flush denormals to zero if NJ is 1
|
|
template <bool Result = false, ppu_exec_bit... Flags>
|
|
inline v128 ppu_flush_denormal(const v128& mask, const v128& a)
|
|
{
|
|
if constexpr (((Flags == use_nj) || ...) || (Result && ((Flags == fix_nj) || ...)))
|
|
{
|
|
return gv_andn(gv_shr32(gv_eq32(mask & a, gv_bcst32(0)), 1), a);
|
|
}
|
|
else
|
|
{
|
|
return a;
|
|
}
|
|
}
|
|
|
|
inline v128 ppu_fix_vnan(v128 r)
|
|
{
|
|
return gv_selectfs(gv_eqfs(r, r), r, gv_bcst32(0x7fc00000u));
|
|
}
|
|
|
|
template <ppu_exec_bit... Flags>
|
|
inline v128 ppu_set_vnan(v128 r, Vector128 auto... args)
|
|
{
|
|
if constexpr (((Flags == set_vnan) || ...) && sizeof...(args) > 0)
|
|
{
|
|
// Full propagation
|
|
return ppu_select_vnan(args..., ppu_fix_vnan(r));
|
|
}
|
|
else if constexpr (((Flags == fix_vnan) || ...))
|
|
{
|
|
// Only fix the result
|
|
return ppu_fix_vnan(r);
|
|
}
|
|
else
|
|
{
|
|
// Return as is
|
|
return r;
|
|
}
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MFVSCR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& sat, auto&& nj)
|
|
{
|
|
u32 sat_bit = 0;
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat_bit = !gv_testz(sat); //!!sat._u;
|
|
d._u64[0] = 0;
|
|
d._u64[1] = u64(sat_bit | (u32{nj} << 16)) << 32;
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.sat, ppu.nj);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MTVSCR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat, use_nj, fix_nj>();
|
|
|
|
static const auto exec = [](auto&& sat, auto&& nj, auto&& jm_mask, auto&& b)
|
|
{
|
|
const u32 vscr = b._u32[3];
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat._u = vscr & 1;
|
|
if constexpr (((Flags == use_nj || Flags == fix_nj) || ...))
|
|
jm_mask = (vscr & 0x10000) ? 0x7f80'0000 : 0x7fff'ffff;
|
|
nj = (vscr & 0x10000) != 0;
|
|
};
|
|
|
|
RETURN_(ppu.sat, ppu.nj, ppu.jm_mask, ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDCUW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
// ~a is how much can be added to a without carry
|
|
d = gv_sub32(gv_geu32(gv_not32(std::move(a)), std::move(b)), gv_bcst32(-1));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a_, auto&& b_, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto a = ppu_flush_denormal<false, Flags...>(m, std::move(a_));
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_addfs(a, b), a, b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDSBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_adds_s8(a, b);
|
|
sat = gv_or32(gv_xor32(gv_add8(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_adds_s8(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDSHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_adds_s16(a, b);
|
|
sat = gv_or32(gv_xor32(gv_add16(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_adds_s16(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDSWS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_adds_s32(a, b);
|
|
sat = gv_or32(gv_xor32(gv_add32(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_adds_s32(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDUBM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_add8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDUBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_addus_u8(a, b);
|
|
sat = gv_or32(gv_xor32(gv_add8(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_addus_u8(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDUHM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_add16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDUHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_addus_u16(a, b);
|
|
sat = gv_or32(gv_xor32(gv_add16(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_addus_u16(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDUWM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_add32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VADDUWS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_addus_u32(a, b);
|
|
sat = gv_or32(gv_xor32(gv_add32(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_addus_u32(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VAND()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_andfs(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VANDC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_andnfs(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VAVGSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_avgs8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VAVGSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_avgs16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VAVGSW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_avgs32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VAVGUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_avgu8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VAVGUH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_avgu16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VAVGUW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_avgu32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCFSX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b, u32 i)
|
|
{
|
|
d = gv_subus_u16(gv_cvts32_tofs(std::move(b)), gv_bcst32(i));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], op.vuimm << 23);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCFUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b, u32 i)
|
|
{
|
|
d = gv_subus_u16(gv_cvtu32_tofs(std::move(b)), gv_bcst32(i));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], op.vuimm << 23);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPBFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto sign = gv_bcstfs(-0.);
|
|
auto cmp1 = gv_nlefs(a, b);
|
|
auto cmp2 = gv_ngefs(a, b ^ sign);
|
|
auto r = (std::move(cmp1) & sign) | gv_shr32(std::move(cmp2) & sign, 1);
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, false, false, gv_testz(r), false);
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPEQFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_eqfs(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPEQUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_eq8(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPEQUH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_eq16(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPEQUW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_eq32(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGEFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gefs(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGTFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gtfs(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGTSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gts8(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGTSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gts16(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGTSW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gts32(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGTUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gtu8(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGTUH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gtu16(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCMPGTUW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto r = gv_gtu32(std::move(a), std::move(b));
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_cr_set(ppu, 6, gv_testall1(r), false, gv_testall0(r), false);
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu, ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCTSXS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<fix_vnan, set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b, auto&& sat, u32 i)
|
|
{
|
|
auto r = gv_mulfs(b, gv_bcst32(i));
|
|
auto l = gv_ltfs(r, gv_bcstfs(-2147483648.));
|
|
auto h = gv_gefs(r, gv_bcstfs(2147483648.));
|
|
#if !defined(ARCH_X64) && !defined(ARCH_ARM64)
|
|
r = gv_selectfs(l, gv_bcstfs(-2147483648.), std::move(r));
|
|
#endif
|
|
r = gv_cvtfs_tos32(std::move(r));
|
|
#if !defined(ARCH_ARM64)
|
|
r = gv_select32(h, gv_bcst32(0x7fffffff), std::move(r));
|
|
#endif
|
|
if constexpr (((Flags == fix_vnan) || ...))
|
|
r = gv_and32(std::move(r), gv_eqfs(b, b));
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_or32(std::move(l), std::move(h)), std::move(sat));
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.sat, (op.vuimm + 127) << 23);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VCTUXS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<fix_vnan, set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b, auto&& sat, u32 i)
|
|
{
|
|
auto r = gv_mulfs(b, gv_bcst32(i));
|
|
auto l = gv_ltfs(r, gv_bcstfs(0.));
|
|
auto h = gv_gefs(r, gv_bcstfs(4294967296.));
|
|
r = gv_cvtfs_tou32(std::move(r));
|
|
#if !defined(ARCH_ARM64)
|
|
r = gv_andn32(l, std::move(r)); // saturate to zero
|
|
#endif
|
|
#if !defined(__AVX512VL__) && !defined(ARCH_ARM64)
|
|
r = gv_or32(std::move(r), h); // saturate to 0xffffffff
|
|
#endif
|
|
if constexpr (((Flags == fix_vnan) || ...))
|
|
r = gv_and32(std::move(r), gv_eqfs(b, b));
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_or32(std::move(l), std::move(h)), std::move(sat));
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.sat, (op.vuimm + 127) << 23);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VEXPTEFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
// for (u32 i = 0; i < 4; i++) d._f[i] = std::exp2f(b._f[i]);
|
|
d = ppu_set_vnan<Flags...>(gv_exp2_approxfs(std::move(b)));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VLOGEFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
// for (u32 i = 0; i < 4; i++) d._f[i] = std::log2f(b._f[i]);
|
|
d = ppu_set_vnan<Flags...>(gv_log2_approxfs(std::move(b)));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMADDFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a_, auto&& b_, auto&& c_, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto a = ppu_flush_denormal<false, Flags...>(m, std::move(a_));
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
auto c = ppu_flush_denormal<false, Flags...>(m, std::move(c_));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_fmafs(a, c, b)));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMAXFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& jm_mask)
|
|
{
|
|
d = ppu_flush_denormal<true, Flags...>(gv_bcst32(jm_mask, &ppu_thread::jm_mask), ppu_set_vnan<Flags...>(gv_maxfs(a, b), a, b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMAXSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_maxs8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMAXSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_maxs16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMAXSW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_maxs32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMAXUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_maxu8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMAXUH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_maxu16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMAXUW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_maxu32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMHADDSHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c, auto&& sat)
|
|
{
|
|
auto m = gv_muls_hds16(a, b);
|
|
auto f = gv_gts16(gv_bcst16(0), c);
|
|
auto x = gv_eq16(gv_maxs16(std::move(a), std::move(b)), gv_bcst16(0x8000));
|
|
auto r = gv_sub16(gv_adds_s16(m, c), gv_and32(x, f));
|
|
auto s = gv_add16(std::move(m), std::move(c));
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_or32(gv_andn32(std::move(f), x), gv_andn32(x, gv_xor32(std::move(s), r))), sat);
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMHRADDSHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c, auto&& sat)
|
|
{
|
|
if constexpr (((Flags != set_sat) && ...))
|
|
{
|
|
d = gv_rmuladds_hds16(std::move(a), std::move(b), std::move(c));
|
|
}
|
|
else
|
|
{
|
|
auto m = gv_rmuls_hds16(a, b);
|
|
auto f = gv_gts16(gv_bcst16(0), c);
|
|
auto x = gv_eq16(gv_maxs16(std::move(a), std::move(b)), gv_bcst16(0x8000));
|
|
auto r = gv_sub16(gv_adds_s16(m, c), gv_and32(x, f));
|
|
auto s = gv_add16(std::move(m), std::move(c));
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_or32(gv_andn32(std::move(f), x), gv_andn32(x, gv_xor32(std::move(s), r))), sat);
|
|
d = std::move(r);
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMINFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& jm_mask)
|
|
{
|
|
d = ppu_flush_denormal<true, Flags...>(gv_bcst32(jm_mask, &ppu_thread::jm_mask), ppu_set_vnan<Flags...>(gv_minfs(a, b), a, b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMINSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mins8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMINSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mins16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMINSW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mins32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMINUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_minu8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMINUH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_minu16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMINUW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_minu32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMLADDUHM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c)
|
|
{
|
|
d = gv_muladd16(std::move(a), std::move(b), std::move(c));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMRGHB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_unpackhi8(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMRGHH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_unpackhi16(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMRGHW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_unpackhi32(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMRGLB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_unpacklo8(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMRGLH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_unpacklo16(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMRGLW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_unpacklo32(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMSUMMBM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c)
|
|
{
|
|
d = gv_dotu8s8x4(std::move(b), std::move(a), std::move(c));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMSUMSHM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c)
|
|
{
|
|
d = gv_dots16x2(std::move(a), std::move(b), std::move(c));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMSUMSHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c, auto&& sat)
|
|
{
|
|
auto r = gv_dots_s16x2(a, b, c);
|
|
auto s = gv_dots16x2(std::move(a), std::move(b), std::move(c));
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_xor32(std::move(s), r), std::move(sat));
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMSUMUBM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c)
|
|
{
|
|
d = gv_dotu8x4(std::move(a), std::move(b), std::move(c));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMSUMUHM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c)
|
|
{
|
|
d = gv_add32(std::move(c), gv_dotu16x2(std::move(a), std::move(b)));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMSUMUHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c, auto&& sat)
|
|
{
|
|
auto m1 = gv_mul_even_u16(a, b);
|
|
auto m2 = gv_mul_odds_u16(std::move(a), std::move(b));
|
|
auto s1 = gv_add32(m1, m2);
|
|
auto x1 = gv_gtu32(m1, s1);
|
|
auto s2 = gv_or32(gv_add32(s1, std::move(c)), x1);
|
|
auto x2 = gv_gtu32(std::move(s1), s2);
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_or32(std::move(x1), x2), std::move(sat));
|
|
d = gv_or32(std::move(s2), std::move(x2));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULESB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mul16(gv_sar16(std::move(a), 8), gv_sar16(std::move(b), 8));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULESH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mul_odds_s16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULEUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mul16(gv_shr16(std::move(a), 8), gv_shr16(std::move(b), 8));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULEUH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mul_odds_u16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULOSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mul16(gv_sar16(gv_shl16(std::move(a), 8), 8), gv_sar16(gv_shl16(std::move(b), 8), 8));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULOSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mul_even_s16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULOUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto mask = gv_bcst16(0x00ff);
|
|
d = gv_mul16(gv_and32(std::move(a), mask), gv_and32(std::move(b), mask));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VMULOUH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_mul_even_u16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VNMSUBFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a_, auto&& b_, auto&& c_, auto&& jm_mask)
|
|
{
|
|
// An odd case with (FLT_MIN, FLT_MIN, FLT_MIN) produces FLT_MIN instead of 0
|
|
auto s = gv_bcstfs(-0.0f);
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto a = ppu_flush_denormal<false, Flags...>(m, std::move(a_));
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
auto c = ppu_flush_denormal<false, Flags...>(m, std::move(c_));
|
|
auto r = gv_xorfs(std::move(s), gv_fmafs(std::move(a), std::move(c), gv_xorfs(std::move(b), s)));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(std::move(r)));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VNOR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_notfs(gv_orfs(std::move(a), std::move(b)));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VOR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_orfs(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPERM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
#if defined (ARCH_X64)
|
|
if constexpr (Build == 0)
|
|
{
|
|
static const ppu_intrp_func_t f = build_function_asm<ppu_intrp_func_t, asmjit::ppu_builder>("ppu_VPERM", [&](asmjit::ppu_builder& c)
|
|
{
|
|
const auto [v0, v1, v2, v3] = c.vec_alloc<4>();
|
|
c.movdqa(v0, c.ppu_vr(s_op.vc));
|
|
c.pandn(v0, c.get_const(v128::from8p(0x1f)));
|
|
c.movdqa(v1, v0);
|
|
c.pcmpgtb(v1, c.get_const(v128::from8p(0xf)));
|
|
c.movdqa(v2, c.ppu_vr(s_op.va));
|
|
c.movdqa(v3, c.ppu_vr(s_op.vb));
|
|
c.pshufb(v2, v0);
|
|
c.pshufb(v3, v0);
|
|
c.pand(v2, v1);
|
|
c.pandn(v1, v3);
|
|
c.por(v1, v2);
|
|
c.movdqa(c.ppu_vr(s_op.vd), v1);
|
|
c.ppu_ret();
|
|
});
|
|
|
|
if (utils::has_ssse3())
|
|
{
|
|
return f;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& c)
|
|
{
|
|
#if defined(ARCH_ARM64)
|
|
uint8x16x2_t ab;
|
|
ab.val[0] = b;
|
|
ab.val[1] = a;
|
|
d = vqtbl2q_u8(ab, vbicq_u8(vdupq_n_u8(0x1f), c));
|
|
#else
|
|
u8 ab[32];
|
|
std::memcpy(ab + 0, &b, 16);
|
|
std::memcpy(ab + 16, &a, 16);
|
|
|
|
for (u32 i = 0; i < 16; i++)
|
|
{
|
|
d._u8[i] = ab[~c._u8[i] & 0x1f];
|
|
}
|
|
#endif
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.vr[op.vc]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKPX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
auto a1 = gv_sar32(gv_shl32(a, 7), 7 + 9);
|
|
auto b1 = gv_sar32(gv_shl32(b, 7), 7 + 9);
|
|
auto a2 = gv_sar32(gv_shl32(a, 16), 16 + 3);
|
|
auto b2 = gv_sar32(gv_shl32(b, 16), 16 + 3);
|
|
auto p1 = gv_packss_s32(b1, a1);
|
|
auto p2 = gv_packss_s32(b2, a2);
|
|
d = gv_or32(gv_or32(gv_and32(p1, gv_bcst16(0xfc00)), gv_shl16(gv_and32(p1, gv_bcst16(0x7c)), 3)), gv_and32(p2, gv_bcst16(0x1f)));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKSHSS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_shr16(gv_add16(a, gv_bcst16(0x80)) | gv_add16(b, gv_bcst16(0x80)), 8), std::move(sat));
|
|
d = gv_packss_s16(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKSHUS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_shr16(a | b, 8), std::move(sat));
|
|
d = gv_packus_s16(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKSWSS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_shr32(gv_add32(a, gv_bcst32(0x8000)) | gv_add32(b, gv_bcst32(0x8000)), 16), std::move(sat));
|
|
d = gv_packss_s32(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKSWUS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_shr32(a | b, 16), std::move(sat));
|
|
d = gv_packus_s32(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKUHUM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_packtu16(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKUHUS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_shr16(a | b, 8), std::move(sat));
|
|
d = gv_packus_u16(std::move(b), std::move(a));
|
|
};
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKUWUM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_packtu32(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VPKUWUS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_shr32(a | b, 16), std::move(sat));
|
|
d = gv_packus_u32(std::move(b), std::move(a));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VREFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b_, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_divfs(gv_bcstfs(1.0f), b), b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRFIM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b_, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_roundfs_floor(b), b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRFIN()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_roundfs_even(b), b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRFIP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b_, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_roundfs_ceil(b), b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRFIZ()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_roundfs_trunc(b), b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRLB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
d._u8[i] = utils::rol8(a._u8[i], b._u8[i]);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRLH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint i = 0; i < 8; i++)
|
|
{
|
|
d._u16[i] = utils::rol16(a._u16[i], b._u8[i * 2] & 0xf);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRLW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint w = 0; w < 4; w++)
|
|
{
|
|
d._u32[w] = utils::rol32(a._u32[w], b._u8[w * 4] & 0x1f);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VRSQRTEFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& b_, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_divfs(gv_bcstfs(1.0f), gv_sqrtfs(b)), b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSEL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
const auto& c = ppu.vr[op.vc];
|
|
|
|
d = (b & c) | gv_andn(c, a);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
v128 VA = ppu.vr[op.va];
|
|
u8 sh = ppu.vr[op.vb]._u8[0] & 0x7;
|
|
|
|
d._u8[0] = VA._u8[0] << sh;
|
|
for (uint b = 1; b < 16; b++)
|
|
{
|
|
sh = ppu.vr[op.vb]._u8[b] & 0x7;
|
|
d._u8[b] = (VA._u8[b] << sh) | (VA._u8[b - 1] >> (8 - sh));
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSLB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
d._u8[i] = a._u8[i] << (b._u8[i] & 0x7);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSLDOI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
u8 tmpSRC[32];
|
|
std::memcpy(tmpSRC, &ppu.vr[op.vb], 16);
|
|
std::memcpy(tmpSRC + 16, &ppu.vr[op.va], 16);
|
|
|
|
for (uint b = 0; b<16; b++)
|
|
{
|
|
d._u8[15 - b] = tmpSRC[31 - (b + op.vsh)];
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSLH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint h = 0; h < 8; h++)
|
|
{
|
|
d._u16[h] = a._u16[h] << (b._u16[h] & 0xf);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSLO()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
v128 VA = ppu.vr[op.va];
|
|
u8 nShift = (ppu.vr[op.vb]._u8[0] >> 3) & 0xf;
|
|
|
|
d.clear();
|
|
|
|
for (u8 b = 0; b < 16 - nShift; b++)
|
|
{
|
|
d._u8[15 - b] = VA._u8[15 - (b + nShift)];
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSLW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint w = 0; w < 4; w++)
|
|
{
|
|
d._u32[w] = a._u32[w] << (b._u32[w] & 0x1f);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSPLTB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
u8 byte = ppu.vr[op.vb]._u8[15 - op.vuimm];
|
|
|
|
for (uint b = 0; b < 16; b++)
|
|
{
|
|
d._u8[b] = byte;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSPLTH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
ensure((op.vuimm < 8));
|
|
|
|
u16 hword = ppu.vr[op.vb]._u16[7 - op.vuimm];
|
|
|
|
for (uint h = 0; h < 8; h++)
|
|
{
|
|
d._u16[h] = hword;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSPLTISB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const s8 imm = op.vsimm;
|
|
|
|
for (uint b = 0; b < 16; b++)
|
|
{
|
|
d._u8[b] = imm;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSPLTISH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const s16 imm = op.vsimm;
|
|
|
|
for (uint h = 0; h < 8; h++)
|
|
{
|
|
d._u16[h] = imm;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSPLTISW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const s32 imm = op.vsimm;
|
|
|
|
for (uint w = 0; w < 4; w++)
|
|
{
|
|
d._u32[w] = imm;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSPLTW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
ensure((op.vuimm < 4));
|
|
|
|
u32 word = ppu.vr[op.vb]._u32[3 - op.vuimm];
|
|
|
|
for (uint w = 0; w < 4; w++)
|
|
{
|
|
d._u32[w] = word;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
v128 VA = ppu.vr[op.va];
|
|
u8 sh = ppu.vr[op.vb]._u8[15] & 0x7;
|
|
|
|
d._u8[15] = VA._u8[15] >> sh;
|
|
for (uint b = 14; ~b; b--)
|
|
{
|
|
sh = ppu.vr[op.vb]._u8[b] & 0x7;
|
|
d._u8[b] = (VA._u8[b] >> sh) | (VA._u8[b + 1] << (8 - sh));
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSRAB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
d._s8[i] = a._s8[i] >> (b._u8[i] & 0x7);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSRAH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint h = 0; h < 8; h++)
|
|
{
|
|
d._s16[h] = a._s16[h] >> (b._u16[h] & 0xf);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSRAW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint w = 0; w < 4; w++)
|
|
{
|
|
d._s32[w] = a._s32[w] >> (b._u32[w] & 0x1f);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSRB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint i = 0; i < 16; i++)
|
|
{
|
|
d._u8[i] = a._u8[i] >> (b._u8[i] & 0x7);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSRH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint h = 0; h < 8; h++)
|
|
{
|
|
d._u16[h] = a._u16[h] >> (b._u16[h] & 0xf);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSRO()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
v128 VA = ppu.vr[op.va];
|
|
u8 nShift = (ppu.vr[op.vb]._u8[0] >> 3) & 0xf;
|
|
|
|
d.clear();
|
|
|
|
for (u8 b = 0; b < 16 - nShift; b++)
|
|
{
|
|
d._u8[b] = VA._u8[b + nShift];
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSRW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto& d = ppu.vr[op.vd];
|
|
const auto& a = ppu.vr[op.va];
|
|
const auto& b = ppu.vr[op.vb];
|
|
|
|
for (uint w = 0; w < 4; w++)
|
|
{
|
|
d._u32[w] = a._u32[w] >> (b._u32[w] & 0x1f);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBCUW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_shr32(gv_geu32(std::move(a), std::move(b)), 31);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBFP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_nj, fix_nj, set_vnan, fix_vnan>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a_, auto&& b_, auto&& jm_mask)
|
|
{
|
|
auto m = gv_bcst32(jm_mask, &ppu_thread::jm_mask);
|
|
auto a = ppu_flush_denormal<false, Flags...>(m, std::move(a_));
|
|
auto b = ppu_flush_denormal<false, Flags...>(m, std::move(b_));
|
|
d = ppu_flush_denormal<true, Flags...>(std::move(m), ppu_set_vnan<Flags...>(gv_subfs(a, b), a, b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.jm_mask);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBSBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_subs_s8(a, b);
|
|
sat = gv_or32(gv_xor32(gv_sub8(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_subs_s8(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBSHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_subs_s16(a, b);
|
|
sat = gv_or32(gv_xor32(gv_sub16(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_subs_s16(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBSWS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_subs_s32(a, b);
|
|
sat = gv_or32(gv_xor32(gv_sub32(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_subs_s32(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBUBM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_sub8(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBUBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_subus_u8(a, b);
|
|
sat = gv_or32(gv_xor32(gv_sub8(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_subus_u8(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBUHM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_sub16(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBUHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_subus_u16(a, b);
|
|
sat = gv_or32(gv_xor32(gv_sub16(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_subus_u16(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBUWM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_sub32(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUBUWS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
{
|
|
auto r = gv_subus_u32(a, b);
|
|
sat = gv_or32(gv_xor32(gv_sub32(std::move(a), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
}
|
|
else
|
|
{
|
|
d = gv_subus_u32(std::move(a), std::move(b));
|
|
}
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUMSWS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
s64 sum = s64{b._s32[0]} + a._s32[0] + a._s32[1] + a._s32[2] + a._s32[3];
|
|
if (sum > INT32_MAX)
|
|
{
|
|
sum = u32(INT32_MAX);
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat._bytes[0] = 1;
|
|
}
|
|
else if (sum < INT32_MIN)
|
|
{
|
|
sum = u32(INT32_MIN);
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat._bytes[0] = 1;
|
|
}
|
|
else
|
|
{
|
|
sum = static_cast<u32>(sum);
|
|
}
|
|
|
|
d._u = sum;
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUM2SWS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
#if defined(__AVX512VL__)
|
|
const auto x = gv_add64(gv_sar64(gv_shl64(a, 32), 32), gv_sar64(a, 32));
|
|
const auto y = gv_add64(x, gv_sar64(gv_shl64(b, 32), 32));
|
|
const auto r = _mm_unpacklo_epi32(_mm_cvtsepi64_epi32(y), _mm_setzero_si128());
|
|
#elif defined(ARCH_ARM64)
|
|
const auto x = vaddl_s32(vget_low_s32(vuzp1q_s32(a, a)), vget_low_s32(vuzp2q_s32(a, a)));
|
|
const auto y = vaddw_s32(x, vget_low_s32(vuzp1q_s32(b, b)));
|
|
const auto r = vmovl_u32(uint32x2_t(vqmovn_s64(y)));
|
|
#else
|
|
v128 y{};
|
|
y._s64[0] = s64{a._s32[0]} + a._s32[1] + b._s32[0];
|
|
y._s64[1] = s64{a._s32[2]} + a._s32[3] + b._s32[2];
|
|
v128 r{};
|
|
r._u64[0] = y._s64[0] > INT32_MAX ? INT32_MAX : y._s64[0] < INT32_MIN ? u32(INT32_MIN) : static_cast<u32>(y._s64[0]);
|
|
r._u64[1] = y._s64[1] > INT32_MAX ? INT32_MAX : y._s64[1] < INT32_MIN ? u32(INT32_MIN) : static_cast<u32>(y._s64[1]);
|
|
#endif
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_shr64(gv_add64(y, gv_bcst64(0x80000000u)), 32), std::move(sat));
|
|
d = r;
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUM4SBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
//const auto r = _mm_dpbusds_epi32(b, _mm_set1_epi8(1), a);
|
|
//const auto s = _mm_dpbusd_epi32(b, _mm_set1_epi8(1), a);
|
|
auto x = gv_hadds8x4(a);
|
|
auto r = gv_adds_s32(x, b);
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_xor32(gv_add32(std::move(x), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUM4SHS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
//const auto r = _mm_dpwssds_epi32(b, a, _mm_set1_epi16(1));
|
|
//const auto s = _mm_dpwssd_epi32(b, a, _mm_set1_epi16(1));
|
|
auto x = gv_hadds16x2(a);
|
|
auto r = gv_adds_s32(x, b);
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_xor32(gv_add32(std::move(x), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VSUM4UBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_sat>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b, auto&& sat)
|
|
{
|
|
auto x = gv_haddu8x4(a);
|
|
auto r = gv_addus_u32(x, b);
|
|
if constexpr (((Flags == set_sat) || ...))
|
|
sat = gv_or32(gv_xor32(gv_add32(std::move(x), std::move(b)), r), std::move(sat));
|
|
d = std::move(r);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb], ppu.sat);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VUPKHPX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
#if defined(ARCH_X64_0)
|
|
static const auto make = [](asmjit::ppu_builder& c)
|
|
{
|
|
const auto [v0, v1, v2] = c.vec_alloc<3>();
|
|
EMIT(punpckhwd, v0, v0, c.ppu_vr(s_op.vb));
|
|
EMIT(psrad, v0, v0, c.imm(16));
|
|
EMIT(pslld, v1, v0, c.imm(6));
|
|
EMIT(pslld, v2, v0, c.imm(3));
|
|
BCST(pand, d, v0, v0, c.get_bcst<u32>(0xff00001f));
|
|
BCST(pand, d, v1, v1, c.get_bcst<u32>(0x1f0000));
|
|
BCST(pand, d, v2, v2, c.get_bcst<u32>(0x1f00));
|
|
EMIT(por, v0, v0, v1);
|
|
EMIT(por, v0, v0, v2);
|
|
LDST(movaps, c.ppu_vr(s_op.vd, true), v0);
|
|
c.ppu_ret();
|
|
};
|
|
#endif
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
const auto x = gv_extend_hi_s16(b);
|
|
d = gv_and32(x, gv_bcst32(0xff00001f)) | gv_and32(gv_shl32(x, 6), gv_bcst32(0x1f0000)) | gv_and32(gv_shl32(x, 3), gv_bcst32(0x1f00));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VUPKHSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
#if defined(ARCH_X64_0)
|
|
static const auto make = [](asmjit::ppu_builder& c)
|
|
{
|
|
const auto v0 = c.vec_alloc();
|
|
EMIT(punpckhbw, v0, v0, c.ppu_vr(s_op.vb));
|
|
EMIT(psraw, v0, v0, c.imm(8));
|
|
LDST(movaps, c.ppu_vr(s_op.vd, true), v0);
|
|
c.ppu_ret();
|
|
};
|
|
#endif
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
d = gv_extend_hi_s8(b);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VUPKHSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
#if defined(ARCH_X64_0)
|
|
static const auto make = [](asmjit::ppu_builder& c)
|
|
{
|
|
const auto v0 = c.vec_alloc();
|
|
EMIT(punpckhwd, v0, v0, c.ppu_vr(s_op.vb));
|
|
EMIT(psrad, v0, v0, c.imm(16));
|
|
LDST(movaps, c.ppu_vr(s_op.vd, true), v0);
|
|
c.ppu_ret();
|
|
};
|
|
#endif
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
d = gv_extend_hi_s16(b);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VUPKLPX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
#if defined(ARCH_X64_0)
|
|
static const auto make = [](asmjit::ppu_builder& c)
|
|
{
|
|
const auto [v0, v1, v2] = c.vec_alloc<3>();
|
|
if (utils::has_sse41())
|
|
{
|
|
LDST(pmovsxwd, v0, c.ppu_vr<8>(s_op.vb));
|
|
}
|
|
else
|
|
{
|
|
EMIT(punpcklwd, v0, v0, c.ppu_vr(s_op.vb));
|
|
EMIT(psrad, v0, v0, c.imm(16));
|
|
}
|
|
EMIT(pslld, v1, v0, c.imm(6));
|
|
EMIT(pslld, v2, v0, c.imm(3));
|
|
BCST(pand, d, v0, v0, c.get_bcst<u32>(0xff00001f));
|
|
BCST(pand, d, v1, v1, c.get_bcst<u32>(0x1f0000));
|
|
BCST(pand, d, v2, v2, c.get_bcst<u32>(0x1f00));
|
|
EMIT(por, v0, v0, v1);
|
|
EMIT(por, v0, v0, v2);
|
|
LDST(movaps, c.ppu_vr(s_op.vd, true), v0);
|
|
c.ppu_ret();
|
|
};
|
|
#endif
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
const auto x = gv_extend_lo_s16(b);
|
|
d = gv_and32(x, gv_bcst32(0xff00001f)) | gv_and32(gv_shl32(x, 6), gv_bcst32(0x1f0000)) | gv_and32(gv_shl32(x, 3), gv_bcst32(0x1f00));
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VUPKLSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
#if defined(ARCH_X64_0)
|
|
static const auto make = [](asmjit::ppu_builder& c)
|
|
{
|
|
const auto v0 = c.vec_alloc();
|
|
if (utils::has_sse41())
|
|
{
|
|
LDST(pmovsxbw, v0, c.ppu_vr<8>(s_op.vb));
|
|
}
|
|
else
|
|
{
|
|
EMIT(punpcklbw, v0, v0, c.ppu_vr(s_op.vb));
|
|
EMIT(psraw, v0, v0, c.imm(8));
|
|
}
|
|
LDST(movaps, c.ppu_vr(s_op.vd, true), v0);
|
|
c.ppu_ret();
|
|
};
|
|
#endif
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
d = gv_extend_lo_s8(b);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VUPKLSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
#if defined(ARCH_X64_0)
|
|
static const auto make = [](asmjit::ppu_builder& c)
|
|
{
|
|
const auto v0 = c.vec_alloc();
|
|
if (utils::has_sse41())
|
|
{
|
|
LDST(pmovsxwd, v0, c.ppu_vr<8>(s_op.vb));
|
|
}
|
|
else
|
|
{
|
|
EMIT(punpcklwd, v0, v0, c.ppu_vr(s_op.vb));
|
|
EMIT(psrad, v0, v0, c.imm(16));
|
|
}
|
|
LDST(movaps, c.ppu_vr(s_op.vd, true), v0);
|
|
c.ppu_ret();
|
|
};
|
|
#endif
|
|
static const auto exec = [](auto&& d, auto&& b)
|
|
{
|
|
d = gv_extend_lo_s16(b);
|
|
};
|
|
|
|
RETURN_(ppu.vr[op.vd], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto VXOR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& d, auto&& a, auto&& b)
|
|
{
|
|
d = gv_xorfs(std::move(a), std::move(b));
|
|
};
|
|
|
|
RETURN(ppu.vr[op.vd], ppu.vr[op.va], ppu.vr[op.vb]);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto TDI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
|
{
|
|
const s64 a = ppu.gpr[op.ra], b = op.simm16;
|
|
const u64 a_ = a, b_ = b;
|
|
|
|
if (((op.bo & 0x10) && a < b) ||
|
|
((op.bo & 0x8) && a > b) ||
|
|
((op.bo & 0x4) && a == b) ||
|
|
((op.bo & 0x2) && a_ < b_) ||
|
|
((op.bo & 0x1) && a_ > b_))
|
|
{
|
|
[[unlikely]]
|
|
ppu_trap(ppu, vm::get_addr(this_op));
|
|
return;
|
|
}
|
|
return next_fn->fn(ppu, {this_op[1]}, this_op + 1, next_fn + 1);
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto TWI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
|
{
|
|
const s32 a = static_cast<u32>(ppu.gpr[op.ra]), b = op.simm16;
|
|
const u32 a_ = a, b_ = b;
|
|
|
|
if (((op.bo & 0x10) && a < b) ||
|
|
((op.bo & 0x8) && a > b) ||
|
|
((op.bo & 0x4) && a == b) ||
|
|
((op.bo & 0x2) && a_ < b_) ||
|
|
((op.bo & 0x1) && a_ > b_))
|
|
{
|
|
[[unlikely]]
|
|
ppu_trap(ppu, vm::get_addr(this_op));
|
|
return;
|
|
}
|
|
return next_fn->fn(ppu, {this_op[1]}, this_op + 1, next_fn + 1);
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MULLI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.rd] = static_cast<s64>(ppu.gpr[op.ra]) * op.simm16;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SUBFIC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 a = ppu.gpr[op.ra];
|
|
const s64 i = op.simm16;
|
|
const auto r = add64_flags(~a, i, 1);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CMPLI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if (op.l10)
|
|
{
|
|
ppu_cr_set<u64>(ppu, op.crfd, ppu.gpr[op.ra], op.uimm16);
|
|
}
|
|
else
|
|
{
|
|
ppu_cr_set<u32>(ppu, op.crfd, static_cast<u32>(ppu.gpr[op.ra]), op.uimm16);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CMPI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if (op.l10)
|
|
{
|
|
ppu_cr_set<s64>(ppu, op.crfd, ppu.gpr[op.ra], op.simm16);
|
|
}
|
|
else
|
|
{
|
|
ppu_cr_set<s32>(ppu, op.crfd, static_cast<u32>(ppu.gpr[op.ra]), op.simm16);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADDIC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const s64 a = ppu.gpr[op.ra];
|
|
const s64 i = op.simm16;
|
|
const auto r = add64_flags(a, i);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if (op.main & 1) [[unlikely]] ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADDI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.rd] = op.ra ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADDIS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.rd] = op.ra ? ppu.gpr[op.ra] + (op.simm16 * 65536) : (op.simm16 * 65536);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto BC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
|
{
|
|
const bool bo0 = (op.bo & 0x10) != 0;
|
|
const bool bo1 = (op.bo & 0x08) != 0;
|
|
const bool bo2 = (op.bo & 0x04) != 0;
|
|
const bool bo3 = (op.bo & 0x02) != 0;
|
|
|
|
ppu.ctr -= (bo2 ^ true);
|
|
const u32 link = vm::get_addr(this_op) + 4;
|
|
if (op.lk) ppu.lr = link;
|
|
|
|
const bool ctr_ok = bo2 | ((ppu.ctr != 0) ^ bo3);
|
|
const bool cond_ok = bo0 | (!!(ppu.cr[op.bi]) ^ (bo1 ^ true));
|
|
|
|
const u32 old_cia = ppu.cia;
|
|
|
|
if (ctr_ok && cond_ok)
|
|
{
|
|
ppu.cia = vm::get_addr(this_op);
|
|
// Provide additional information by using the origin of the call
|
|
// Because this is a fixed target branch there's no abiguity about it
|
|
ppu_record_call(ppu, ppu.cia, op);
|
|
|
|
ppu.cia = (op.aa ? 0 : ppu.cia) + op.bt14;
|
|
}
|
|
else if (!ppu.state) [[likely]]
|
|
{
|
|
return next_fn->fn(ppu, {this_op[1]}, this_op + 1, next_fn + 1);
|
|
}
|
|
else
|
|
{
|
|
ppu.cia = link;
|
|
}
|
|
|
|
ppu.exec_bytes += link - old_cia;
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0)
|
|
{
|
|
return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func*)
|
|
{
|
|
const u32 old_cia = ppu.cia;
|
|
ppu.cia = vm::get_addr(this_op);
|
|
ppu.exec_bytes += ppu.cia - old_cia;
|
|
if (op.opcode != ppu_instructions::SC(0))
|
|
{
|
|
fmt::throw_exception("Unknown/Illegal SC: 0x%08x", op.opcode);
|
|
}
|
|
|
|
ppu_execute_syscall(ppu, ppu.gpr[11]);
|
|
};
|
|
}
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto B()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func*)
|
|
{
|
|
const u32 old_cia = ppu.cia;
|
|
const u32 link = (ppu.cia = vm::get_addr(this_op)) + 4;
|
|
// Provide additional information by using the origin of the call
|
|
// Because this is a fixed target branch there's no abiguity about it
|
|
ppu_record_call(ppu, ppu.cia, op);
|
|
|
|
ppu.cia = (op.aa ? 0 : ppu.cia) + op.bt24;
|
|
if (op.lk) ppu.lr = link;
|
|
ppu.exec_bytes += link - old_cia;
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MCRF()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
CHECK_SIZE(ppu_thread::cr, 32);
|
|
ppu.cr.fields[op.crfd] = ppu.cr.fields[op.crfs];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto BCLR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
|
{
|
|
const bool bo0 = (op.bo & 0x10) != 0;
|
|
const bool bo1 = (op.bo & 0x08) != 0;
|
|
const bool bo2 = (op.bo & 0x04) != 0;
|
|
const bool bo3 = (op.bo & 0x02) != 0;
|
|
|
|
ppu.ctr -= (bo2 ^ true);
|
|
|
|
const bool ctr_ok = bo2 | ((ppu.ctr != 0) ^ bo3);
|
|
const bool cond_ok = bo0 | (!!(ppu.cr[op.bi]) ^ (bo1 ^ true));
|
|
|
|
const u32 target = static_cast<u32>(ppu.lr) & ~3;
|
|
const u32 link = vm::get_addr(this_op) + 4;
|
|
if (op.lk) ppu.lr = link;
|
|
|
|
const u32 old_cia = ppu.cia;
|
|
|
|
if (ctr_ok && cond_ok)
|
|
{
|
|
ppu_record_call(ppu, target, op, true);
|
|
ppu.cia = target;
|
|
}
|
|
else if (!ppu.state) [[likely]]
|
|
{
|
|
return next_fn->fn(ppu, {this_op[1]}, this_op + 1, next_fn + 1);
|
|
}
|
|
else
|
|
{
|
|
ppu.cia = link;
|
|
}
|
|
|
|
ppu.exec_bytes += link - old_cia;
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CRNOR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = (ppu.cr[op.crba] | ppu.cr[op.crbb]) ^ true;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CRANDC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = ppu.cr[op.crba] & (ppu.cr[op.crbb] ^ true);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ISYNC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
atomic_fence_acquire();
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CRXOR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = ppu.cr[op.crba] ^ ppu.cr[op.crbb];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CRNAND()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = (ppu.cr[op.crba] & ppu.cr[op.crbb]) ^ true;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CRAND()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = ppu.cr[op.crba] & ppu.cr[op.crbb];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CREQV()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = (ppu.cr[op.crba] ^ ppu.cr[op.crbb]) ^ true;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CRORC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = ppu.cr[op.crba] | (ppu.cr[op.crbb] ^ true);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CROR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.cr[op.crbd] = ppu.cr[op.crba] | ppu.cr[op.crbb];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto BCCTR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
|
{
|
|
const u32 link = vm::get_addr(this_op) + 4;
|
|
if (op.lk) ppu.lr = link;
|
|
const u32 old_cia = ppu.cia;
|
|
|
|
if (op.bo & 0x10 || ppu.cr[op.bi] == ((op.bo & 0x8) != 0))
|
|
{
|
|
const u32 target = static_cast<u32>(ppu.ctr) & ~3;
|
|
ppu_record_call(ppu, target, op, true);
|
|
ppu.cia = target;
|
|
}
|
|
else if (!ppu.state) [[likely]]
|
|
{
|
|
return next_fn->fn(ppu, {this_op[1]}, this_op + 1, next_fn + 1);
|
|
}
|
|
else
|
|
{
|
|
ppu.cia = link;
|
|
}
|
|
|
|
ppu.exec_bytes += link - old_cia;
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLWIMI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 mask = ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
|
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & mask);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLWINM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), op.sh32)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLWNM()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = dup32(utils::rol32(static_cast<u32>(ppu.gpr[op.rs]), ppu.gpr[op.rb] & 0x1f)) & ppu_rotate_mask(32 + op.mb32, 32 + op.me32);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ORI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] | op.uimm16;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ORIS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] | (u64{op.uimm16} << 16);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto XORI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] ^ op.uimm16;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto XORIS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] ^ (u64{op.uimm16} << 16);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ANDI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] & op.uimm16;
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ANDIS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] & (u64{op.uimm16} << 16);
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLDICL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull >> op.mbe64);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLDICR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & (~0ull << (op.mbe64 ^ 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLDIC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], op.sh64) & ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLDIMI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 mask = ppu_rotate_mask(op.mbe64, op.sh64 ^ 63);
|
|
ppu.gpr[op.ra] = (ppu.gpr[op.ra] & ~mask) | (utils::rol64(ppu.gpr[op.rs], op.sh64) & mask);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLDCL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull >> op.mbe64);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto RLDCR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = utils::rol64(ppu.gpr[op.rs], ppu.gpr[op.rb] & 0x3f) & (~0ull << (op.mbe64 ^ 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CMP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if (op.l10)
|
|
{
|
|
ppu_cr_set<s64>(ppu, op.crfd, ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
|
}
|
|
else
|
|
{
|
|
ppu_cr_set<s32>(ppu, op.crfd, static_cast<u32>(ppu.gpr[op.ra]), static_cast<u32>(ppu.gpr[op.rb]));
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto TW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
|
{
|
|
s32 a = static_cast<s32>(ppu.gpr[op.ra]);
|
|
s32 b = static_cast<s32>(ppu.gpr[op.rb]);
|
|
|
|
if ((a < b && (op.bo & 0x10)) ||
|
|
(a > b && (op.bo & 0x8)) ||
|
|
(a == b && (op.bo & 0x4)) ||
|
|
(static_cast<u32>(a) < static_cast<u32>(b) && (op.bo & 0x2)) ||
|
|
(static_cast<u32>(a) > static_cast<u32>(b) && (op.bo & 0x1)))
|
|
{
|
|
[[unlikely]]
|
|
ppu_trap(ppu, vm::get_addr(this_op));
|
|
return;
|
|
}
|
|
return next_fn->fn(ppu, {this_op[1]}, this_op + 1, next_fn + 1);
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVSL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.vr[op.vd] = sse_altivec_lvsl(addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVEBX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~0xfull;
|
|
ppu.vr[op.vd] = ppu_feed_data<v128>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SUBFC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const u64 RB = ppu.gpr[op.rb];
|
|
const auto r = add64_flags(~RA, RB, 1);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (~RA >> 63 == RB >> 63) && (~RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MULHDU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.rd] = utils::umulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADDC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const u64 RB = ppu.gpr[op.rb];
|
|
const auto r = add64_flags(RA, RB);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (RA >> 63 == RB >> 63) && (RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MULHWU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
u32 a = static_cast<u32>(ppu.gpr[op.ra]);
|
|
u32 b = static_cast<u32>(ppu.gpr[op.rb]);
|
|
ppu.gpr[op.rd] = (u64{a} * b) >> 32;
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MFOCRF()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if (op.l11)
|
|
{
|
|
// MFOCRF
|
|
const u32 n = std::countl_zero<u32>(op.crm) & 7;
|
|
const u32 p = n * 4;
|
|
const u32 v = ppu.cr[p + 0] << 3 | ppu.cr[p + 1] << 2 | ppu.cr[p + 2] << 1 | ppu.cr[p + 3] << 0;
|
|
|
|
ppu.gpr[op.rd] = v << (p ^ 0x1c);
|
|
}
|
|
else
|
|
{
|
|
// MFCR
|
|
be_t<v128> lane0, lane1;
|
|
std::memcpy(&lane0, ppu.cr.bits, sizeof(v128));
|
|
std::memcpy(&lane1, ppu.cr.bits + 16, sizeof(v128));
|
|
const u32 mh = _mm_movemask_epi8(_mm_slli_epi64(lane0.value(), 7));
|
|
const u32 ml = _mm_movemask_epi8(_mm_slli_epi64(lane1.value(), 7));
|
|
|
|
ppu.gpr[op.rd] = (mh << 16) | ml;
|
|
}
|
|
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWARX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_lwarx(ppu, vm::cast(addr));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LDX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u64>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWZX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u32>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SLW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = static_cast<u32>(ppu.gpr[op.rs] << (ppu.gpr[op.rb] & 0x3f));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CNTLZW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = std::countl_zero(static_cast<u32>(ppu.gpr[op.rs]));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SLD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 n = ppu.gpr[op.rb] & 0x7f;
|
|
ppu.gpr[op.ra] = n & 0x40 ? 0 : ppu.gpr[op.rs] << n;
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto AND()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] & ppu.gpr[op.rb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CMPL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if (op.l10)
|
|
{
|
|
ppu_cr_set<u64>(ppu, op.crfd, ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
|
}
|
|
else
|
|
{
|
|
ppu_cr_set<u32>(ppu, op.crfd, static_cast<u32>(ppu.gpr[op.ra]), static_cast<u32>(ppu.gpr[op.rb]));
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVSR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.vr[op.vd] = sse_altivec_lvsr(addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVEHX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~0xfull;
|
|
ppu.vr[op.vd] = ppu_feed_data<v128>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SUBF()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const u64 RB = ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = RB - RA;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (~RA >> 63 == RB >> 63) && (~RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LDUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u64>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DCBST()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWZUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u32>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto CNTLZD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = std::countl_zero(ppu.gpr[op.rs]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ANDC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] & ~ppu.gpr[op.rb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto TD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0) return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func* next_fn)
|
|
{
|
|
const s64 a = ppu.gpr[op.ra], b = ppu.gpr[op.rb];
|
|
const u64 a_ = a, b_ = b;
|
|
|
|
if (((op.bo & 0x10) && a < b) ||
|
|
((op.bo & 0x8) && a > b) ||
|
|
((op.bo & 0x4) && a == b) ||
|
|
((op.bo & 0x2) && a_ < b_) ||
|
|
((op.bo & 0x1) && a_ > b_))
|
|
{
|
|
[[unlikely]]
|
|
ppu_trap(ppu, vm::get_addr(this_op));
|
|
return;
|
|
}
|
|
return next_fn->fn(ppu, {this_op[1]}, this_op + 1, next_fn + 1);
|
|
};
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVEWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~0xfull;
|
|
ppu.vr[op.vd] = ppu_feed_data<v128>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MULHD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.rd] = utils::mulh64(ppu.gpr[op.ra], ppu.gpr[op.rb]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MULHW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
s32 a = static_cast<s32>(ppu.gpr[op.ra]);
|
|
s32 b = static_cast<s32>(ppu.gpr[op.rb]);
|
|
ppu.gpr[op.rd] = (s64{a} * b) >> 32;
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LDARX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_ldarx(ppu, vm::cast(addr));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DCBF()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LBZX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u8>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~0xfull;
|
|
ppu.vr[op.vd] = ppu_feed_data<v128>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto NEG()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
ppu.gpr[op.rd] = 0 - RA;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (~RA >> 63 == 0) && (~RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LBZUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u8>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto NOR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ~(ppu.gpr[op.rs] | ppu.gpr[op.rb]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVEBX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
const u8 eb = addr & 0xf;
|
|
vm::write8(vm::cast(addr), ppu.vr[op.vs]._u8[15 - eb]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SUBFE()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const u64 RB = ppu.gpr[op.rb];
|
|
const auto r = add64_flags(~RA, RB, ppu.xer.ca);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (~RA >> 63 == RB >> 63) && (~RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADDE()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const u64 RB = ppu.gpr[op.rb];
|
|
const auto r = add64_flags(RA, RB, ppu.xer.ca);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (RA >> 63 == RB >> 63) && (RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MTOCRF()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
alignas(4) static const u8 s_table[16][4]
|
|
{
|
|
{0, 0, 0, 0},
|
|
{0, 0, 0, 1},
|
|
{0, 0, 1, 0},
|
|
{0, 0, 1, 1},
|
|
{0, 1, 0, 0},
|
|
{0, 1, 0, 1},
|
|
{0, 1, 1, 0},
|
|
{0, 1, 1, 1},
|
|
{1, 0, 0, 0},
|
|
{1, 0, 0, 1},
|
|
{1, 0, 1, 0},
|
|
{1, 0, 1, 1},
|
|
{1, 1, 0, 0},
|
|
{1, 1, 0, 1},
|
|
{1, 1, 1, 0},
|
|
{1, 1, 1, 1},
|
|
};
|
|
|
|
const u64 s = ppu.gpr[op.rs];
|
|
|
|
if (op.l11)
|
|
{
|
|
// MTOCRF
|
|
|
|
const u32 n = std::countl_zero<u32>(op.crm) & 7;
|
|
const u64 v = (s >> ((n * 4) ^ 0x1c)) & 0xf;
|
|
ppu.cr.fields[n] = *reinterpret_cast<const u32*>(s_table + v);
|
|
}
|
|
else
|
|
{
|
|
// MTCRF
|
|
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
if (op.crm & (128 >> i))
|
|
{
|
|
const u64 v = (s >> ((i * 4) ^ 0x1c)) & 0xf;
|
|
ppu.cr.fields[i] = *reinterpret_cast<const u32*>(s_table + v);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STDX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::write64(vm::cast(addr), ppu.gpr[op.rs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STWCX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu_cr_set(ppu, 0, false, false, ppu_stwcx(ppu, vm::cast(addr), static_cast<u32>(ppu.gpr[op.rs])), ppu.xer.so);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::write32(vm::cast(addr), static_cast<u32>(ppu.gpr[op.rs]));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVEHX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~1ULL;
|
|
const u8 eb = (addr & 0xf) >> 1;
|
|
vm::write16(vm::cast(addr), ppu.vr[op.vs]._u16[7 - eb]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STDUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
vm::write64(vm::cast(addr), ppu.gpr[op.rs]);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STWUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
vm::write32(vm::cast(addr), static_cast<u32>(ppu.gpr[op.rs]));
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVEWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~3ULL;
|
|
const u8 eb = (addr & 0xf) >> 2;
|
|
vm::write32(vm::cast(addr), ppu.vr[op.vs]._u32[3 - eb]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SUBFZE()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const auto r = add64_flags(~RA, 0, ppu.xer.ca);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (~RA >> 63 == 0) && (~RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADDZE()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const auto r = add64_flags(RA, 0, ppu.xer.ca);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (RA >> 63 == 0) && (RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STDCX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu_cr_set(ppu, 0, false, false, ppu_stdcx(ppu, vm::cast(addr), ppu.gpr[op.rs]), ppu.xer.so);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STBX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::write8(vm::cast(addr), static_cast<u8>(ppu.gpr[op.rs]));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~0xfull;
|
|
vm::_ref<v128>(vm::cast(addr)) = ppu.vr[op.vs];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MULLD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const s64 RA = ppu.gpr[op.ra];
|
|
const s64 RB = ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = RA * RB;
|
|
if (op.oe) [[unlikely]]
|
|
{
|
|
const s64 high = utils::mulh64(RA, RB);
|
|
ppu_ov_set(ppu, high != s64(ppu.gpr[op.rd]) >> 63);
|
|
}
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SUBFME()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const auto r = add64_flags(~RA, ~0ull, ppu.xer.ca);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (~RA >> 63 == 1) && (~RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADDME()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const s64 RA = ppu.gpr[op.ra];
|
|
const auto r = add64_flags(RA, ~0ull, ppu.xer.ca);
|
|
ppu.gpr[op.rd] = r.result;
|
|
ppu.xer.ca = r.carry;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (u64(RA) >> 63 == 1) && (u64(RA) >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, r.result, 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MULLW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.rd] = s64{static_cast<s32>(ppu.gpr[op.ra])} * static_cast<s32>(ppu.gpr[op.rb]);
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, s64(ppu.gpr[op.rd]) < INT32_MIN || s64(ppu.gpr[op.rd]) > INT32_MAX);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DCBTST()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STBUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
vm::write8(vm::cast(addr), static_cast<u8>(ppu.gpr[op.rs]));
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ADD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const u64 RB = ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = RA + RB;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, (RA >> 63 == RB >> 63) && (RA >> 63 != ppu.gpr[op.rd] >> 63));
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DCBT()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHZX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u16>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto EQV()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ~(ppu.gpr[op.rs] ^ ppu.gpr[op.rb]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ECIWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
fmt::throw_exception("ECIWX");
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHZUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<u16>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto XOR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] ^ ppu.gpr[op.rb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MFSPR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5);
|
|
|
|
switch (n)
|
|
{
|
|
case 0x001: ppu.gpr[op.rd] = u32{ppu.xer.so} << 31 | ppu.xer.ov << 30 | ppu.xer.ca << 29 | ppu.xer.cnt; break;
|
|
case 0x008: ppu.gpr[op.rd] = ppu.lr; break;
|
|
case 0x009: ppu.gpr[op.rd] = ppu.ctr; break;
|
|
case 0x100: ppu.gpr[op.rd] = ppu.vrsave; break;
|
|
|
|
case 0x10C: ppu.gpr[op.rd] = get_timebased_time(); break;
|
|
case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break;
|
|
default: fmt::throw_exception("MFSPR 0x%x", n);
|
|
}
|
|
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWAX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<s32>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DST()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHAX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<s16>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVXL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~0xfull;
|
|
ppu.vr[op.vd] = ppu_feed_data<v128>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MFTB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5);
|
|
|
|
switch (n)
|
|
{
|
|
case 0x10C: ppu.gpr[op.rd] = get_timebased_time(); break;
|
|
case 0x10D: ppu.gpr[op.rd] = get_timebased_time() >> 32; break;
|
|
default: fmt::throw_exception("MFTB 0x%x", n);
|
|
}
|
|
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWAUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<s32>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DSTST()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHAUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<s16>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STHX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::write16(vm::cast(addr), static_cast<u16>(ppu.gpr[op.rs]));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ORC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] | ~ppu.gpr[op.rb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ECOWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
fmt::throw_exception("ECOWX");
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STHUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
vm::write16(vm::cast(addr), static_cast<u16>(ppu.gpr[op.rs]));
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto OR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ppu.gpr[op.rs] | ppu.gpr[op.rb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DIVDU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 RA = ppu.gpr[op.ra];
|
|
const u64 RB = ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = RB == 0 ? 0 : RA / RB;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, RB == 0);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DIVWU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 RA = static_cast<u32>(ppu.gpr[op.ra]);
|
|
const u32 RB = static_cast<u32>(ppu.gpr[op.rb]);
|
|
ppu.gpr[op.rd] = RB == 0 ? 0 : RA / RB;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, RB == 0);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MTSPR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 n = (op.spr >> 5) | ((op.spr & 0x1f) << 5);
|
|
|
|
switch (n)
|
|
{
|
|
case 0x001:
|
|
{
|
|
const u64 value = ppu.gpr[op.rs];
|
|
ppu.xer.so = (value & 0x80000000) != 0;
|
|
ppu.xer.ov = (value & 0x40000000) != 0;
|
|
ppu.xer.ca = (value & 0x20000000) != 0;
|
|
ppu.xer.cnt = value & 0x7f;
|
|
break;
|
|
}
|
|
case 0x008: ppu.lr = ppu.gpr[op.rs]; break;
|
|
case 0x009: ppu.ctr = ppu.gpr[op.rs]; break;
|
|
case 0x100: ppu.vrsave = static_cast<u32>(ppu.gpr[op.rs]); break;
|
|
default: fmt::throw_exception("MTSPR 0x%x", n);
|
|
}
|
|
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DCBI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto NAND()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = ~(ppu.gpr[op.rs] & ppu.gpr[op.rb]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVXL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb]) & ~0xfull;
|
|
vm::_ref<v128>(vm::cast(addr)) = ppu.vr[op.vs];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DIVD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const s64 RA = ppu.gpr[op.ra];
|
|
const s64 RB = ppu.gpr[op.rb];
|
|
const bool o = RB == 0 || (RA == INT64_MIN && RB == -1);
|
|
ppu.gpr[op.rd] = o ? 0 : RA / RB;
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, o);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DIVW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const s32 RA = static_cast<s32>(ppu.gpr[op.ra]);
|
|
const s32 RB = static_cast<s32>(ppu.gpr[op.rb]);
|
|
const bool o = RB == 0 || (RA == INT32_MIN && RB == -1);
|
|
ppu.gpr[op.rd] = o ? 0 : static_cast<u32>(RA / RB);
|
|
if constexpr (((Flags == has_oe) || ...))
|
|
ppu_ov_set(ppu, o);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.rd], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVLX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.vr[op.vd] = s_use_ssse3 ? sse_cellbe_lvlx(ppu, addr) : sse_cellbe_lvlx_v0(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LDBRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<le_t<u64>>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LSWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
u32 count = ppu.xer.cnt & 0x7f;
|
|
for (; count >= 4; count -= 4, addr += 4, op.rd = (op.rd + 1) & 31)
|
|
{
|
|
ppu.gpr[op.rd] = ppu_feed_data<u32>(ppu, addr);
|
|
}
|
|
if (count)
|
|
{
|
|
u32 value = 0;
|
|
for (u32 byte = 0; byte < count; byte++)
|
|
{
|
|
u32 byte_value = ppu_feed_data<u8>(ppu, addr + byte);
|
|
value |= byte_value << ((3 ^ byte) * 8);
|
|
}
|
|
ppu.gpr[op.rd] = value;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWBRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<le_t<u32>>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFSX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.fpr[op.frd] = ppu_feed_data<f32>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SRW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = (ppu.gpr[op.rs] & 0xffffffff) >> (ppu.gpr[op.rb] & 0x3f);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SRD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 n = ppu.gpr[op.rb] & 0x7f;
|
|
ppu.gpr[op.ra] = n & 0x40 ? 0 : ppu.gpr[op.rs] >> n;
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.vr[op.vd] = s_use_ssse3 ? sse_cellbe_lvrx(ppu, addr) : sse_cellbe_lvrx_v0(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LSWI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
u64 addr = op.ra ? ppu.gpr[op.ra] : 0;
|
|
u64 N = op.rb ? op.rb : 32;
|
|
u8 reg = op.rd;
|
|
|
|
while (N > 0)
|
|
{
|
|
if (N > 3)
|
|
{
|
|
ppu.gpr[reg] = ppu_feed_data<u32>(ppu, addr);
|
|
addr += 4;
|
|
N -= 4;
|
|
}
|
|
else
|
|
{
|
|
u32 buf = 0;
|
|
u32 i = 3;
|
|
while (N > 0)
|
|
{
|
|
N = N - 1;
|
|
buf |= ppu_feed_data<u8>(ppu, addr) << (i * 8);
|
|
addr++;
|
|
i--;
|
|
}
|
|
ppu.gpr[reg] = buf;
|
|
}
|
|
reg = (reg + 1) % 32;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFSUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
ppu.fpr[op.frd] = ppu_feed_data<f32>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SYNC()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
atomic_fence_seq_cst();
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFDX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.fpr[op.frd] = ppu_feed_data<f64>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFDUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
ppu.fpr[op.frd] = ppu_feed_data<f64>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVLX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
s_use_ssse3 ? sse_cellbe_stvlx(addr, ppu.vr[op.vs]) : sse_cellbe_stvlx_v0(addr, ppu.vr[op.vs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STDBRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::_ref<le_t<u64>>(vm::cast(addr)) = ppu.gpr[op.rs];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STSWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
u32 count = ppu.xer.cnt & 0x7F;
|
|
for (; count >= 4; count -= 4, addr += 4, op.rs = (op.rs + 1) & 31)
|
|
{
|
|
vm::write32(vm::cast(addr), static_cast<u32>(ppu.gpr[op.rs]));
|
|
}
|
|
if (count)
|
|
{
|
|
u32 value = static_cast<u32>(ppu.gpr[op.rs]);
|
|
for (u32 byte = 0; byte < count; byte++)
|
|
{
|
|
u8 byte_value = static_cast<u8>(value >> ((3 ^ byte) * 8));
|
|
vm::write8(vm::cast(addr + byte), byte_value);
|
|
}
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STWBRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::_ref<le_t<u32>>(vm::cast(addr)) = static_cast<u32>(ppu.gpr[op.rs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFSX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::_ref<f32>(vm::cast(addr)) = static_cast<float>(ppu.fpr[op.frs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
s_use_ssse3 ? sse_cellbe_stvrx(addr, ppu.vr[op.vs]) : sse_cellbe_stvrx_v0(addr, ppu.vr[op.vs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFSUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
vm::_ref<f32>(vm::cast(addr)) = static_cast<float>(ppu.fpr[op.frs]);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STSWI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
u64 addr = op.ra ? ppu.gpr[op.ra] : 0;
|
|
u64 N = op.rb ? op.rb : 32;
|
|
u8 reg = op.rd;
|
|
|
|
while (N > 0)
|
|
{
|
|
if (N > 3)
|
|
{
|
|
vm::write32(vm::cast(addr), static_cast<u32>(ppu.gpr[reg]));
|
|
addr += 4;
|
|
N -= 4;
|
|
}
|
|
else
|
|
{
|
|
u32 buf = static_cast<u32>(ppu.gpr[reg]);
|
|
while (N > 0)
|
|
{
|
|
N = N - 1;
|
|
vm::write8(vm::cast(addr), (0xFF000000 & buf) >> 24);
|
|
buf <<= 8;
|
|
addr++;
|
|
}
|
|
}
|
|
reg = (reg + 1) % 32;
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFDX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::_ref<f64>(vm::cast(addr)) = ppu.fpr[op.frs];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFDUX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + ppu.gpr[op.rb];
|
|
vm::_ref<f64>(vm::cast(addr)) = ppu.fpr[op.frs];
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVLXL()
|
|
{
|
|
return LVLX<Build, Flags...>();
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHBRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
ppu.gpr[op.rd] = ppu_feed_data<le_t<u16>>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SRAW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
s32 RS = static_cast<s32>(ppu.gpr[op.rs]);
|
|
u8 shift = ppu.gpr[op.rb] & 63;
|
|
if (shift > 31)
|
|
{
|
|
ppu.gpr[op.ra] = 0 - (RS < 0);
|
|
ppu.xer.ca = (RS < 0);
|
|
}
|
|
else
|
|
{
|
|
ppu.gpr[op.ra] = RS >> shift;
|
|
ppu.xer.ca = (RS < 0) && ((ppu.gpr[op.ra] << shift) != static_cast<u64>(RS));
|
|
}
|
|
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SRAD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
s64 RS = ppu.gpr[op.rs];
|
|
u8 shift = ppu.gpr[op.rb] & 127;
|
|
if (shift > 63)
|
|
{
|
|
ppu.gpr[op.ra] = 0 - (RS < 0);
|
|
ppu.xer.ca = (RS < 0);
|
|
}
|
|
else
|
|
{
|
|
ppu.gpr[op.ra] = RS >> shift;
|
|
ppu.xer.ca = (RS < 0) && ((ppu.gpr[op.ra] << shift) != static_cast<u64>(RS));
|
|
}
|
|
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LVRXL()
|
|
{
|
|
return LVRX<Build, Flags...>();
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DSS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SRAWI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
s32 RS = static_cast<u32>(ppu.gpr[op.rs]);
|
|
ppu.gpr[op.ra] = RS >> op.sh32;
|
|
ppu.xer.ca = (RS < 0) && (static_cast<u32>(ppu.gpr[op.ra] << op.sh32) != static_cast<u32>(RS));
|
|
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto SRADI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
auto sh = op.sh64;
|
|
s64 RS = ppu.gpr[op.rs];
|
|
ppu.gpr[op.ra] = RS >> sh;
|
|
ppu.xer.ca = (RS < 0) && ((ppu.gpr[op.ra] << sh) != static_cast<u64>(RS));
|
|
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto EIEIO()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
atomic_fence_seq_cst();
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVLXL()
|
|
{
|
|
return STVLX<Build, Flags...>();
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STHBRX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::_ref<le_t<u16>>(vm::cast(addr)) = static_cast<u16>(ppu.gpr[op.rs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto EXTSH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = static_cast<s16>(ppu.gpr[op.rs]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STVRXL()
|
|
{
|
|
return STVRX<Build, Flags...>();
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto EXTSB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = static_cast<s8>(ppu.gpr[op.rs]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFIWX()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
vm::write32(vm::cast(addr), static_cast<u32>(std::bit_cast<u64>(ppu.fpr[op.frs])));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto EXTSW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.gpr[op.ra] = static_cast<s32>(ppu.gpr[op.rs]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set<s64>(ppu, 0, ppu.gpr[op.ra], 0);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto ICBI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&&, auto) {
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto DCBZ()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra ? ppu.gpr[op.ra] + ppu.gpr[op.rb] : ppu.gpr[op.rb];
|
|
const u32 addr0 = vm::cast(addr) & ~127;
|
|
|
|
if (g_cfg.core.accurate_cache_line_stores)
|
|
{
|
|
alignas(64) static constexpr u8 zero_buf[128]{};
|
|
do_cell_atomic_128_store(addr0, zero_buf);
|
|
return;
|
|
}
|
|
|
|
std::memset(vm::base(addr0), 0, 128);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWZ()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<u32>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWZU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<u32>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LBZ()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<u8>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LBZU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<u8>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
const u32 value = static_cast<u32>(ppu.gpr[op.rs]);
|
|
vm::write32(vm::cast(addr), value);
|
|
|
|
//Insomniac engine v3 & v4 (newer R&C, Fuse, Resitance 3)
|
|
if (value == 0xAAAAAAAA) [[unlikely]]
|
|
{
|
|
vm::reservation_update(vm::cast(addr));
|
|
}
|
|
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STWU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
vm::write32(vm::cast(addr), static_cast<u32>(ppu.gpr[op.rs]));
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
vm::write8(vm::cast(addr), static_cast<u8>(ppu.gpr[op.rs]));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STBU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
vm::write8(vm::cast(addr), static_cast<u8>(ppu.gpr[op.rs]));
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHZ()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<u16>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHZU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<u16>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHA()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<s16>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LHAU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
ppu.gpr[op.rd] = ppu_feed_data<s16>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STH()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
vm::write16(vm::cast(addr), static_cast<u16>(ppu.gpr[op.rs]));
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STHU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
vm::write16(vm::cast(addr), static_cast<u16>(ppu.gpr[op.rs]));
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LMW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
u64 addr = op.ra ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
for (u32 i = op.rd; i<32; ++i, addr += 4)
|
|
{
|
|
ppu.gpr[i] = ppu_feed_data<u32>(ppu, addr);
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STMW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
u64 addr = op.ra ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
for (u32 i = op.rs; i<32; ++i, addr += 4)
|
|
{
|
|
vm::write32(vm::cast(addr), static_cast<u32>(ppu.gpr[i]));
|
|
}
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
ppu.fpr[op.frd] = ppu_feed_data<f32>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFSU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
ppu.fpr[op.frd] = ppu_feed_data<f32>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
ppu.fpr[op.frd] = ppu_feed_data<f64>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LFDU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
ppu.fpr[op.frd] = ppu_feed_data<f64>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
vm::_ref<f32>(vm::cast(addr)) = static_cast<float>(ppu.fpr[op.frs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFSU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
vm::_ref<f32>(vm::cast(addr)) = static_cast<float>(ppu.fpr[op.frs]);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = op.ra || 1 ? ppu.gpr[op.ra] + op.simm16 : op.simm16;
|
|
vm::_ref<f64>(vm::cast(addr)) = ppu.fpr[op.frs];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STFDU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + op.simm16;
|
|
vm::_ref<f64>(vm::cast(addr)) = ppu.fpr[op.frs];
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.simm16 & ~3) + (op.ra ? ppu.gpr[op.ra] : 0);
|
|
ppu.gpr[op.rd] = ppu_feed_data<u64>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LDU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + (op.simm16 & ~3);
|
|
ppu.gpr[op.rd] = ppu_feed_data<u64>(ppu, addr);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto LWA()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.simm16 & ~3) + (op.ra ? ppu.gpr[op.ra] : 0);
|
|
ppu.gpr[op.rd] = ppu_feed_data<s32>(ppu, addr);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = (op.simm16 & ~3) + (op.ra ? ppu.gpr[op.ra] : 0);
|
|
vm::write64(vm::cast(addr), ppu.gpr[op.rs]);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto STDU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u64 addr = ppu.gpr[op.ra] + (op.simm16 & ~3);
|
|
vm::write64(vm::cast(addr), ppu.gpr[op.rs]);
|
|
ppu.gpr[op.ra] = addr;
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FDIVS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = f32(ppu.fpr[op.fra] / ppu.fpr[op.frb]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FSUBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = f32(ppu.fpr[op.fra] - ppu.fpr[op.frb]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FADDS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = f32(ppu.fpr[op.fra] + ppu.fpr[op.frb]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FSQRTS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = f32(std::sqrt(ppu.fpr[op.frb]));
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FRES()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = f32(1.0 / ppu.fpr[op.frb]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FMULS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = f32(ppu.fpr[op.fra] * ppu.fpr[op.frc]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FMADDS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = f32(std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]));
|
|
else
|
|
ppu.fpr[op.frd] = f32(ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb]);
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FMSUBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc, use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = f32(std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]));
|
|
else
|
|
ppu.fpr[op.frd] = f32(ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb]);
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FNMSUBS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc, use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = f32(-std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]));
|
|
else
|
|
ppu.fpr[op.frd] = f32(-(ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb]));
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FNMADDS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc, use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = f32(-std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]));
|
|
else
|
|
ppu.fpr[op.frd] = f32(-(ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb]));
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MTFSB1()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 bit = op.crbd;
|
|
if (bit < 16 || bit > 19) ppu_log.warning("MTFSB1(%d)", bit);
|
|
ppu.fpscr.bits[bit] = 1;
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MCRFS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if (op.crfs != 4) ppu_log.warning("MCRFS(%d)", op.crfs);
|
|
ppu.cr.fields[op.crfd] = ppu.fpscr.fields[op.crfs];
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MTFSB0()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 bit = op.crbd;
|
|
if (bit < 16 || bit > 19) ppu_log.warning("MTFSB0(%d)", bit);
|
|
ppu.fpscr.bits[bit] = 0;
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MTFSFI()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const u32 bf = op.crfd;
|
|
|
|
if (bf != 4)
|
|
{
|
|
// Do nothing on non-FPCC field (TODO)
|
|
ppu_log.warning("MTFSFI(%d)", op.crfd);
|
|
}
|
|
else
|
|
{
|
|
static constexpr std::array<u32, 16> all_values = []() -> std::array<u32, 16>
|
|
{
|
|
std::array<u32, 16> values{};
|
|
|
|
for (u32 i = 0; i < values.size(); i++)
|
|
{
|
|
u32 value = 0, im = i;
|
|
value |= (im & 1) << (8 * 3); im >>= 1;
|
|
value |= (im & 1) << (8 * 2); im >>= 1;
|
|
value |= (im & 1) << (8 * 1); im >>= 1;
|
|
value |= (im & 1) << (8 * 0);
|
|
values[i] = value;
|
|
}
|
|
|
|
return values;
|
|
}();
|
|
|
|
ppu.fpscr.fields[bf] = all_values[op.i];
|
|
}
|
|
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MFFS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu_log.warning("MFFS");
|
|
ppu.fpr[op.frd] = std::bit_cast<f64>(u64{ppu.fpscr.fl} << 15 | u64{ppu.fpscr.fg} << 14 | u64{ppu.fpscr.fe} << 13 | u64{ppu.fpscr.fu} << 12);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto MTFSF()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](auto&& ppu, auto) {
|
|
ppu_log.warning("MTFSF");
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FCMPU()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const f64 a = ppu.fpr[op.fra];
|
|
const f64 b = ppu.fpr[op.frb];
|
|
ppu_set_fpcc<set_fpcc, has_rc, Flags...>(ppu, a, b, op.crfd);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FCTIW()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
|
|
const auto res = _mm_xor_si128(_mm_cvtpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000))));
|
|
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsi128_si32(res));
|
|
ppu_set_fpcc<Flags...>(ppu, 0., 0.); // undefined (TODO)
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FCTIWZ()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
|
|
const auto res = _mm_xor_si128(_mm_cvttpd_epi32(b), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(0x80000000))));
|
|
ppu.fpr[op.frd] = std::bit_cast<f64, s64>(_mm_cvtsi128_si32(res));
|
|
ppu_set_fpcc<Flags...>(ppu, 0., 0.); // undefined (TODO)
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FRSP()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = f32(ppu.fpr[op.frb]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FDIV()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = ppu.fpr[op.fra] / ppu.fpr[op.frb];
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FSUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = ppu.fpr[op.fra] - ppu.fpr[op.frb];
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FADD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = ppu.fpr[op.fra] + ppu.fpr[op.frb];
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FSQRT()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = std::sqrt(ppu.fpr[op.frb]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FSEL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = ppu.fpr[op.fra] >= 0.0 ? ppu.fpr[op.frc] : ppu.fpr[op.frb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FMUL()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = ppu.fpr[op.fra] * ppu.fpr[op.frc];
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FRSQRTE()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = 1.0 / std::sqrt(ppu.fpr[op.frb]);
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FMSUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc, use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]);
|
|
else
|
|
ppu.fpr[op.frd] = ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb];
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FMADD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc, use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]);
|
|
else
|
|
ppu.fpr[op.frd] = ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb];
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FNMSUB()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc, use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = -std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], -ppu.fpr[op.frb]);
|
|
else
|
|
ppu.fpr[op.frd] = -(ppu.fpr[op.fra] * ppu.fpr[op.frc] - ppu.fpr[op.frb]);
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FNMADD()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc, use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
if constexpr (((Flags == use_dfma) || ...))
|
|
ppu.fpr[op.frd] = -std::fma(ppu.fpr[op.fra], ppu.fpr[op.frc], ppu.fpr[op.frb]);
|
|
else
|
|
ppu.fpr[op.frd] = -(ppu.fpr[op.fra] * ppu.fpr[op.frc] + ppu.fpr[op.frb]);
|
|
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FCMPO()
|
|
{
|
|
return FCMPU<Build, Flags...>();
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FNEG()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<use_dfma>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = -ppu.fpr[op.frb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FMR()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = ppu.fpr[op.frb];
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FNABS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = -std::fabs(ppu.fpr[op.frb]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FABS()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
ppu.fpr[op.frd] = std::fabs(ppu.fpr[op.frb]);
|
|
if constexpr (((Flags == has_rc) || ...))
|
|
ppu_cr_set(ppu, 1, ppu.fpscr.fg, ppu.fpscr.fl, ppu.fpscr.fe, ppu.fpscr.fu);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FCTID()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
|
|
const auto res = _mm_xor_si128(_mm_set1_epi64x(_mm_cvtsd_si64(b)), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(f64(1ull << 63)))));
|
|
ppu.fpr[op.frd] = std::bit_cast<f64>(_mm_cvtsi128_si64(res));
|
|
ppu_set_fpcc<Flags...>(ppu, 0., 0.); // undefined (TODO)
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FCTIDZ()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
const auto b = _mm_load_sd(&ppu.fpr[op.frb]);
|
|
const auto res = _mm_xor_si128(_mm_set1_epi64x(_mm_cvttsd_si64(b)), _mm_castpd_si128(_mm_cmpge_pd(b, _mm_set1_pd(f64(1ull << 63)))));
|
|
ppu.fpr[op.frd] = std::bit_cast<f64>(_mm_cvtsi128_si64(res));
|
|
ppu_set_fpcc<Flags...>(ppu, 0., 0.); // undefined (TODO)
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto FCFID()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<set_fpcc>();
|
|
|
|
static const auto exec = [](ppu_thread& ppu, ppu_opcode_t op) {
|
|
_mm_store_sd(&ppu.fpr[op.frd], _mm_cvtsi64_sd(_mm_setzero_pd(), std::bit_cast<s64>(ppu.fpr[op.frb])));
|
|
ppu_set_fpcc<Flags...>(ppu, ppu.fpr[op.frd], 0.);
|
|
};
|
|
RETURN_(ppu, op);
|
|
}
|
|
|
|
template <u32 Build, ppu_exec_bit... Flags>
|
|
auto UNK()
|
|
{
|
|
if constexpr (Build == 0xf1a6)
|
|
return ppu_exec_select<Flags...>::template select<>();
|
|
|
|
if constexpr (Build == 0)
|
|
{
|
|
return +[](ppu_thread& ppu, ppu_opcode_t op, be_t<u32>* this_op, ppu_intrp_func*)
|
|
{
|
|
const u32 old_cia = ppu.cia;
|
|
ppu.cia = vm::get_addr(this_op);
|
|
ppu.exec_bytes += ppu.cia - old_cia;
|
|
|
|
// HLE function index
|
|
const u32 index = (ppu.cia - g_fxo->get<ppu_function_manager>().addr) / 8;
|
|
|
|
const auto& hle_funcs = ppu_function_manager::get();
|
|
|
|
if (ppu.cia % 8 == 4 && index < hle_funcs.size())
|
|
{
|
|
return hle_funcs[index](ppu, op, this_op, nullptr);
|
|
}
|
|
|
|
fmt::throw_exception("Unknown/Illegal opcode: 0x%08x at 0x%x", op.opcode, ppu.cia);
|
|
};
|
|
}
|
|
}
|
|
|
|
template <typename IT>
|
|
struct ppu_interpreter_t
|
|
{
|
|
IT MFVSCR;
|
|
IT MTVSCR;
|
|
IT VADDCUW;
|
|
IT VADDFP;
|
|
IT VADDSBS;
|
|
IT VADDSHS;
|
|
IT VADDSWS;
|
|
IT VADDUBM;
|
|
IT VADDUBS;
|
|
IT VADDUHM;
|
|
IT VADDUHS;
|
|
IT VADDUWM;
|
|
IT VADDUWS;
|
|
IT VAND;
|
|
IT VANDC;
|
|
IT VAVGSB;
|
|
IT VAVGSH;
|
|
IT VAVGSW;
|
|
IT VAVGUB;
|
|
IT VAVGUH;
|
|
IT VAVGUW;
|
|
IT VCFSX;
|
|
IT VCFUX;
|
|
IT VCMPBFP;
|
|
IT VCMPBFP_;
|
|
IT VCMPEQFP;
|
|
IT VCMPEQFP_;
|
|
IT VCMPEQUB;
|
|
IT VCMPEQUB_;
|
|
IT VCMPEQUH;
|
|
IT VCMPEQUH_;
|
|
IT VCMPEQUW;
|
|
IT VCMPEQUW_;
|
|
IT VCMPGEFP;
|
|
IT VCMPGEFP_;
|
|
IT VCMPGTFP;
|
|
IT VCMPGTFP_;
|
|
IT VCMPGTSB;
|
|
IT VCMPGTSB_;
|
|
IT VCMPGTSH;
|
|
IT VCMPGTSH_;
|
|
IT VCMPGTSW;
|
|
IT VCMPGTSW_;
|
|
IT VCMPGTUB;
|
|
IT VCMPGTUB_;
|
|
IT VCMPGTUH;
|
|
IT VCMPGTUH_;
|
|
IT VCMPGTUW;
|
|
IT VCMPGTUW_;
|
|
IT VCTSXS;
|
|
IT VCTUXS;
|
|
IT VEXPTEFP;
|
|
IT VLOGEFP;
|
|
IT VMADDFP;
|
|
IT VMAXFP;
|
|
IT VMAXSB;
|
|
IT VMAXSH;
|
|
IT VMAXSW;
|
|
IT VMAXUB;
|
|
IT VMAXUH;
|
|
IT VMAXUW;
|
|
IT VMHADDSHS;
|
|
IT VMHRADDSHS;
|
|
IT VMINFP;
|
|
IT VMINSB;
|
|
IT VMINSH;
|
|
IT VMINSW;
|
|
IT VMINUB;
|
|
IT VMINUH;
|
|
IT VMINUW;
|
|
IT VMLADDUHM;
|
|
IT VMRGHB;
|
|
IT VMRGHH;
|
|
IT VMRGHW;
|
|
IT VMRGLB;
|
|
IT VMRGLH;
|
|
IT VMRGLW;
|
|
IT VMSUMMBM;
|
|
IT VMSUMSHM;
|
|
IT VMSUMSHS;
|
|
IT VMSUMUBM;
|
|
IT VMSUMUHM;
|
|
IT VMSUMUHS;
|
|
IT VMULESB;
|
|
IT VMULESH;
|
|
IT VMULEUB;
|
|
IT VMULEUH;
|
|
IT VMULOSB;
|
|
IT VMULOSH;
|
|
IT VMULOUB;
|
|
IT VMULOUH;
|
|
IT VNMSUBFP;
|
|
IT VNOR;
|
|
IT VOR;
|
|
IT VPERM;
|
|
IT VPKPX;
|
|
IT VPKSHSS;
|
|
IT VPKSHUS;
|
|
IT VPKSWSS;
|
|
IT VPKSWUS;
|
|
IT VPKUHUM;
|
|
IT VPKUHUS;
|
|
IT VPKUWUM;
|
|
IT VPKUWUS;
|
|
IT VREFP;
|
|
IT VRFIM;
|
|
IT VRFIN;
|
|
IT VRFIP;
|
|
IT VRFIZ;
|
|
IT VRLB;
|
|
IT VRLH;
|
|
IT VRLW;
|
|
IT VRSQRTEFP;
|
|
IT VSEL;
|
|
IT VSL;
|
|
IT VSLB;
|
|
IT VSLDOI;
|
|
IT VSLH;
|
|
IT VSLO;
|
|
IT VSLW;
|
|
IT VSPLTB;
|
|
IT VSPLTH;
|
|
IT VSPLTISB;
|
|
IT VSPLTISH;
|
|
IT VSPLTISW;
|
|
IT VSPLTW;
|
|
IT VSR;
|
|
IT VSRAB;
|
|
IT VSRAH;
|
|
IT VSRAW;
|
|
IT VSRB;
|
|
IT VSRH;
|
|
IT VSRO;
|
|
IT VSRW;
|
|
IT VSUBCUW;
|
|
IT VSUBFP;
|
|
IT VSUBSBS;
|
|
IT VSUBSHS;
|
|
IT VSUBSWS;
|
|
IT VSUBUBM;
|
|
IT VSUBUBS;
|
|
IT VSUBUHM;
|
|
IT VSUBUHS;
|
|
IT VSUBUWM;
|
|
IT VSUBUWS;
|
|
IT VSUMSWS;
|
|
IT VSUM2SWS;
|
|
IT VSUM4SBS;
|
|
IT VSUM4SHS;
|
|
IT VSUM4UBS;
|
|
IT VUPKHPX;
|
|
IT VUPKHSB;
|
|
IT VUPKHSH;
|
|
IT VUPKLPX;
|
|
IT VUPKLSB;
|
|
IT VUPKLSH;
|
|
IT VXOR;
|
|
IT TDI;
|
|
IT TWI;
|
|
IT MULLI;
|
|
IT SUBFIC;
|
|
IT CMPLI;
|
|
IT CMPI;
|
|
IT ADDIC;
|
|
IT ADDI;
|
|
IT ADDIS;
|
|
IT BC;
|
|
IT SC;
|
|
IT B;
|
|
IT MCRF;
|
|
IT BCLR;
|
|
IT CRNOR;
|
|
IT CRANDC;
|
|
IT ISYNC;
|
|
IT CRXOR;
|
|
IT CRNAND;
|
|
IT CRAND;
|
|
IT CREQV;
|
|
IT CRORC;
|
|
IT CROR;
|
|
IT BCCTR;
|
|
IT RLWIMI;
|
|
IT RLWINM;
|
|
IT RLWNM;
|
|
IT ORI;
|
|
IT ORIS;
|
|
IT XORI;
|
|
IT XORIS;
|
|
IT ANDI;
|
|
IT ANDIS;
|
|
IT RLDICL;
|
|
IT RLDICR;
|
|
IT RLDIC;
|
|
IT RLDIMI;
|
|
IT RLDCL;
|
|
IT RLDCR;
|
|
IT CMP;
|
|
IT TW;
|
|
IT LVSL;
|
|
IT LVEBX;
|
|
IT SUBFC;
|
|
IT ADDC;
|
|
IT MULHDU;
|
|
IT MULHWU;
|
|
IT MFOCRF;
|
|
IT LWARX;
|
|
IT LDX;
|
|
IT LWZX;
|
|
IT SLW;
|
|
IT CNTLZW;
|
|
IT SLD;
|
|
IT AND;
|
|
IT CMPL;
|
|
IT LVSR;
|
|
IT LVEHX;
|
|
IT SUBF;
|
|
IT LDUX;
|
|
IT DCBST;
|
|
IT LWZUX;
|
|
IT CNTLZD;
|
|
IT ANDC;
|
|
IT TD;
|
|
IT LVEWX;
|
|
IT MULHD;
|
|
IT MULHW;
|
|
IT LDARX;
|
|
IT DCBF;
|
|
IT LBZX;
|
|
IT LVX;
|
|
IT NEG;
|
|
IT LBZUX;
|
|
IT NOR;
|
|
IT STVEBX;
|
|
IT SUBFE;
|
|
IT ADDE;
|
|
IT MTOCRF;
|
|
IT STDX;
|
|
IT STWCX;
|
|
IT STWX;
|
|
IT STVEHX;
|
|
IT STDUX;
|
|
IT STWUX;
|
|
IT STVEWX;
|
|
IT SUBFZE;
|
|
IT ADDZE;
|
|
IT STDCX;
|
|
IT STBX;
|
|
IT STVX;
|
|
IT SUBFME;
|
|
IT MULLD;
|
|
IT ADDME;
|
|
IT MULLW;
|
|
IT DCBTST;
|
|
IT STBUX;
|
|
IT ADD;
|
|
IT DCBT;
|
|
IT LHZX;
|
|
IT EQV;
|
|
IT ECIWX;
|
|
IT LHZUX;
|
|
IT XOR;
|
|
IT MFSPR;
|
|
IT LWAX;
|
|
IT DST;
|
|
IT LHAX;
|
|
IT LVXL;
|
|
IT MFTB;
|
|
IT LWAUX;
|
|
IT DSTST;
|
|
IT LHAUX;
|
|
IT STHX;
|
|
IT ORC;
|
|
IT ECOWX;
|
|
IT STHUX;
|
|
IT OR;
|
|
IT DIVDU;
|
|
IT DIVWU;
|
|
IT MTSPR;
|
|
IT DCBI;
|
|
IT NAND;
|
|
IT STVXL;
|
|
IT DIVD;
|
|
IT DIVW;
|
|
IT LVLX;
|
|
IT LDBRX;
|
|
IT LSWX;
|
|
IT LWBRX;
|
|
IT LFSX;
|
|
IT SRW;
|
|
IT SRD;
|
|
IT LVRX;
|
|
IT LSWI;
|
|
IT LFSUX;
|
|
IT SYNC;
|
|
IT LFDX;
|
|
IT LFDUX;
|
|
IT STVLX;
|
|
IT STDBRX;
|
|
IT STSWX;
|
|
IT STWBRX;
|
|
IT STFSX;
|
|
IT STVRX;
|
|
IT STFSUX;
|
|
IT STSWI;
|
|
IT STFDX;
|
|
IT STFDUX;
|
|
IT LVLXL;
|
|
IT LHBRX;
|
|
IT SRAW;
|
|
IT SRAD;
|
|
IT LVRXL;
|
|
IT DSS;
|
|
IT SRAWI;
|
|
IT SRADI;
|
|
IT EIEIO;
|
|
IT STVLXL;
|
|
IT STHBRX;
|
|
IT EXTSH;
|
|
IT STVRXL;
|
|
IT EXTSB;
|
|
IT STFIWX;
|
|
IT EXTSW;
|
|
IT ICBI;
|
|
IT DCBZ;
|
|
IT LWZ;
|
|
IT LWZU;
|
|
IT LBZ;
|
|
IT LBZU;
|
|
IT STW;
|
|
IT STWU;
|
|
IT STB;
|
|
IT STBU;
|
|
IT LHZ;
|
|
IT LHZU;
|
|
IT LHA;
|
|
IT LHAU;
|
|
IT STH;
|
|
IT STHU;
|
|
IT LMW;
|
|
IT STMW;
|
|
IT LFS;
|
|
IT LFSU;
|
|
IT LFD;
|
|
IT LFDU;
|
|
IT STFS;
|
|
IT STFSU;
|
|
IT STFD;
|
|
IT STFDU;
|
|
IT LD;
|
|
IT LDU;
|
|
IT LWA;
|
|
IT STD;
|
|
IT STDU;
|
|
IT FDIVS;
|
|
IT FSUBS;
|
|
IT FADDS;
|
|
IT FSQRTS;
|
|
IT FRES;
|
|
IT FMULS;
|
|
IT FMADDS;
|
|
IT FMSUBS;
|
|
IT FNMSUBS;
|
|
IT FNMADDS;
|
|
IT MTFSB1;
|
|
IT MCRFS;
|
|
IT MTFSB0;
|
|
IT MTFSFI;
|
|
IT MFFS;
|
|
IT MTFSF;
|
|
IT FCMPU;
|
|
IT FRSP;
|
|
IT FCTIW;
|
|
IT FCTIWZ;
|
|
IT FDIV;
|
|
IT FSUB;
|
|
IT FADD;
|
|
IT FSQRT;
|
|
IT FSEL;
|
|
IT FMUL;
|
|
IT FRSQRTE;
|
|
IT FMSUB;
|
|
IT FMADD;
|
|
IT FNMSUB;
|
|
IT FNMADD;
|
|
IT FCMPO;
|
|
IT FNEG;
|
|
IT FMR;
|
|
IT FNABS;
|
|
IT FABS;
|
|
IT FCTID;
|
|
IT FCTIDZ;
|
|
IT FCFID;
|
|
IT UNK;
|
|
|
|
IT SUBFCO;
|
|
IT ADDCO;
|
|
IT SUBFO;
|
|
IT NEGO;
|
|
IT SUBFEO;
|
|
IT ADDEO;
|
|
IT SUBFZEO;
|
|
IT ADDZEO;
|
|
IT SUBFMEO;
|
|
IT MULLDO;
|
|
IT ADDMEO;
|
|
IT MULLWO;
|
|
IT ADDO;
|
|
IT DIVDUO;
|
|
IT DIVWUO;
|
|
IT DIVDO;
|
|
IT DIVWO;
|
|
|
|
IT SUBFCO_;
|
|
IT ADDCO_;
|
|
IT SUBFO_;
|
|
IT NEGO_;
|
|
IT SUBFEO_;
|
|
IT ADDEO_;
|
|
IT SUBFZEO_;
|
|
IT ADDZEO_;
|
|
IT SUBFMEO_;
|
|
IT MULLDO_;
|
|
IT ADDMEO_;
|
|
IT MULLWO_;
|
|
IT ADDO_;
|
|
IT DIVDUO_;
|
|
IT DIVWUO_;
|
|
IT DIVDO_;
|
|
IT DIVWO_;
|
|
|
|
IT RLWIMI_;
|
|
IT RLWINM_;
|
|
IT RLWNM_;
|
|
IT RLDICL_;
|
|
IT RLDICR_;
|
|
IT RLDIC_;
|
|
IT RLDIMI_;
|
|
IT RLDCL_;
|
|
IT RLDCR_;
|
|
IT SUBFC_;
|
|
IT MULHDU_;
|
|
IT ADDC_;
|
|
IT MULHWU_;
|
|
IT SLW_;
|
|
IT CNTLZW_;
|
|
IT SLD_;
|
|
IT AND_;
|
|
IT SUBF_;
|
|
IT CNTLZD_;
|
|
IT ANDC_;
|
|
IT MULHD_;
|
|
IT MULHW_;
|
|
IT NEG_;
|
|
IT NOR_;
|
|
IT SUBFE_;
|
|
IT ADDE_;
|
|
IT SUBFZE_;
|
|
IT ADDZE_;
|
|
IT MULLD_;
|
|
IT SUBFME_;
|
|
IT ADDME_;
|
|
IT MULLW_;
|
|
IT ADD_;
|
|
IT EQV_;
|
|
IT XOR_;
|
|
IT ORC_;
|
|
IT OR_;
|
|
IT DIVDU_;
|
|
IT DIVWU_;
|
|
IT NAND_;
|
|
IT DIVD_;
|
|
IT DIVW_;
|
|
IT SRW_;
|
|
IT SRD_;
|
|
IT SRAW_;
|
|
IT SRAD_;
|
|
IT SRAWI_;
|
|
IT SRADI_;
|
|
IT EXTSH_;
|
|
IT EXTSB_;
|
|
IT EXTSW_;
|
|
IT FDIVS_;
|
|
IT FSUBS_;
|
|
IT FADDS_;
|
|
IT FSQRTS_;
|
|
IT FRES_;
|
|
IT FMULS_;
|
|
IT FMADDS_;
|
|
IT FMSUBS_;
|
|
IT FNMSUBS_;
|
|
IT FNMADDS_;
|
|
IT MTFSB1_;
|
|
IT MTFSB0_;
|
|
IT MTFSFI_;
|
|
IT MFFS_;
|
|
IT MTFSF_;
|
|
IT FRSP_;
|
|
IT FCTIW_;
|
|
IT FCTIWZ_;
|
|
IT FDIV_;
|
|
IT FSUB_;
|
|
IT FADD_;
|
|
IT FSQRT_;
|
|
IT FSEL_;
|
|
IT FMUL_;
|
|
IT FRSQRTE_;
|
|
IT FMSUB_;
|
|
IT FMADD_;
|
|
IT FNMSUB_;
|
|
IT FNMADD_;
|
|
IT FNEG_;
|
|
IT FMR_;
|
|
IT FNABS_;
|
|
IT FABS_;
|
|
IT FCTID_;
|
|
IT FCTIDZ_;
|
|
IT FCFID_;
|
|
|
|
/* Optimized variants */
|
|
};
|
|
|
|
ppu_interpreter_rt_base::ppu_interpreter_rt_base() noexcept
|
|
{
|
|
// Obtain required set of flags from settings
|
|
bs_t<ppu_exec_bit> selected{};
|
|
if (g_cfg.core.ppu_set_sat_bit)
|
|
selected += set_sat;
|
|
if (g_cfg.core.ppu_use_nj_bit)
|
|
selected += use_nj + fix_nj;
|
|
if (g_cfg.core.ppu_llvm_nj_fixup)
|
|
selected += fix_nj;
|
|
if (g_cfg.core.ppu_set_vnan)
|
|
selected += set_vnan + fix_vnan;
|
|
if (g_cfg.core.ppu_fix_vnan)
|
|
selected += fix_vnan;
|
|
if (g_cfg.core.ppu_set_fpcc)
|
|
selected += set_fpcc;
|
|
if (g_cfg.core.use_accurate_dfma)
|
|
selected += use_dfma;
|
|
if (g_cfg.core.ppu_debug)
|
|
selected += set_cr_stats; // TODO
|
|
|
|
if (selected & use_nj)
|
|
ppu_log.success("Enabled: Accurate Non-Java Mode");
|
|
else if (selected & fix_nj)
|
|
ppu_log.success("Enabled: Non-Java Mode Fixup");
|
|
if (selected & set_vnan)
|
|
ppu_log.success("Enabled: Accurate VNAN");
|
|
else if (selected & fix_vnan)
|
|
ppu_log.success("Enabled: VNAN Fixup");
|
|
if (selected & set_sat)
|
|
ppu_log.success("Enabled: Accurate SAT");
|
|
if (selected & set_fpcc)
|
|
ppu_log.success("Enabled: Accurate FPCC");
|
|
|
|
ptrs = std::make_unique<decltype(ptrs)::element_type>();
|
|
|
|
#ifndef __INTELLISENSE__
|
|
|
|
#define INIT_VCMP(name) \
|
|
ptrs->name = ::name<0>(); \
|
|
ptrs->name##_ = ::name<0, has_oe>(); \
|
|
|
|
#define INIT_OV(name) \
|
|
ptrs->name = ::name<0>(); \
|
|
ptrs->name##O = ::name<0, has_oe>(); \
|
|
|
|
#define INIT_RC(name) \
|
|
ptrs->name = ::name<0xf1a6>()(selected, []<ppu_exec_bit... Flags>() { \
|
|
return ::name<0, Flags...>(); \
|
|
}); \
|
|
ptrs->name##_ = ::name<0xf1a6, set_fpcc>()(selected, []<ppu_exec_bit... Flags>() { \
|
|
/* Minor optimization: has_rc implies set_fpcc so don't compile has_rc alone */ \
|
|
return ::name<0, has_rc, Flags...>(); \
|
|
}); \
|
|
|
|
#define INIT_RC_OV(name) \
|
|
ptrs->name = ::name<0>(); \
|
|
ptrs->name##O = ::name<0, has_oe>(); \
|
|
ptrs->name##_ = ::name<0, has_rc>(); \
|
|
ptrs->name##O_ = ::name<0, has_oe, has_rc>(); \
|
|
|
|
// Initialize instructions with their own sets of supported flags (except INIT_VCMP, INIT_OV, INIT_RC_OV)
|
|
#define INIT(name) \
|
|
ptrs->name = ::name<0xf1a6>()(selected, []<ppu_exec_bit... Flags>() { \
|
|
return ::name<0, Flags...>(); \
|
|
}); \
|
|
|
|
INIT(MFVSCR);
|
|
INIT(MTVSCR);
|
|
INIT(VADDCUW);
|
|
INIT(VADDFP);
|
|
INIT(VADDSBS);
|
|
INIT(VADDSHS);
|
|
INIT(VADDSWS);
|
|
INIT(VADDUBM);
|
|
INIT(VADDUBS);
|
|
INIT(VADDUHM);
|
|
INIT(VADDUHS);
|
|
INIT(VADDUWM);
|
|
INIT(VADDUWS);
|
|
INIT(VAND);
|
|
INIT(VANDC);
|
|
INIT(VAVGSB);
|
|
INIT(VAVGSH);
|
|
INIT(VAVGSW);
|
|
INIT(VAVGUB);
|
|
INIT(VAVGUH);
|
|
INIT(VAVGUW);
|
|
INIT(VCFSX);
|
|
INIT(VCFUX);
|
|
INIT_VCMP(VCMPBFP);
|
|
INIT_VCMP(VCMPEQFP);
|
|
INIT_VCMP(VCMPEQUB);
|
|
INIT_VCMP(VCMPEQUH);
|
|
INIT_VCMP(VCMPEQUW);
|
|
INIT_VCMP(VCMPGEFP);
|
|
INIT_VCMP(VCMPGTFP);
|
|
INIT_VCMP(VCMPGTSB);
|
|
INIT_VCMP(VCMPGTSH);
|
|
INIT_VCMP(VCMPGTSW);
|
|
INIT_VCMP(VCMPGTUB);
|
|
INIT_VCMP(VCMPGTUH);
|
|
INIT_VCMP(VCMPGTUW);
|
|
INIT(VCTSXS);
|
|
INIT(VCTUXS);
|
|
INIT(VEXPTEFP);
|
|
INIT(VLOGEFP);
|
|
INIT(VMADDFP);
|
|
INIT(VMAXFP);
|
|
INIT(VMAXSB);
|
|
INIT(VMAXSH);
|
|
INIT(VMAXSW);
|
|
INIT(VMAXUB);
|
|
INIT(VMAXUH);
|
|
INIT(VMAXUW);
|
|
INIT(VMHADDSHS);
|
|
INIT(VMHRADDSHS);
|
|
INIT(VMINFP);
|
|
INIT(VMINSB);
|
|
INIT(VMINSH);
|
|
INIT(VMINSW);
|
|
INIT(VMINUB);
|
|
INIT(VMINUH);
|
|
INIT(VMINUW);
|
|
INIT(VMLADDUHM);
|
|
INIT(VMRGHB);
|
|
INIT(VMRGHH);
|
|
INIT(VMRGHW);
|
|
INIT(VMRGLB);
|
|
INIT(VMRGLH);
|
|
INIT(VMRGLW);
|
|
INIT(VMSUMMBM);
|
|
INIT(VMSUMSHM);
|
|
INIT(VMSUMSHS);
|
|
INIT(VMSUMUBM);
|
|
INIT(VMSUMUHM);
|
|
INIT(VMSUMUHS);
|
|
INIT(VMULESB);
|
|
INIT(VMULESH);
|
|
INIT(VMULEUB);
|
|
INIT(VMULEUH);
|
|
INIT(VMULOSB);
|
|
INIT(VMULOSH);
|
|
INIT(VMULOUB);
|
|
INIT(VMULOUH);
|
|
INIT(VNMSUBFP);
|
|
INIT(VNOR);
|
|
INIT(VOR);
|
|
INIT(VPERM);
|
|
INIT(VPKPX);
|
|
INIT(VPKSHSS);
|
|
INIT(VPKSHUS);
|
|
INIT(VPKSWSS);
|
|
INIT(VPKSWUS);
|
|
INIT(VPKUHUM);
|
|
INIT(VPKUHUS);
|
|
INIT(VPKUWUM);
|
|
INIT(VPKUWUS);
|
|
INIT(VREFP);
|
|
INIT(VRFIM);
|
|
INIT(VRFIN);
|
|
INIT(VRFIP);
|
|
INIT(VRFIZ);
|
|
INIT(VRLB);
|
|
INIT(VRLH);
|
|
INIT(VRLW);
|
|
INIT(VRSQRTEFP);
|
|
INIT(VSEL);
|
|
INIT(VSL);
|
|
INIT(VSLB);
|
|
INIT(VSLDOI);
|
|
INIT(VSLH);
|
|
INIT(VSLO);
|
|
INIT(VSLW);
|
|
INIT(VSPLTB);
|
|
INIT(VSPLTH);
|
|
INIT(VSPLTISB);
|
|
INIT(VSPLTISH);
|
|
INIT(VSPLTISW);
|
|
INIT(VSPLTW);
|
|
INIT(VSR);
|
|
INIT(VSRAB);
|
|
INIT(VSRAH);
|
|
INIT(VSRAW);
|
|
INIT(VSRB);
|
|
INIT(VSRH);
|
|
INIT(VSRO);
|
|
INIT(VSRW);
|
|
INIT(VSUBCUW);
|
|
INIT(VSUBFP);
|
|
INIT(VSUBSBS);
|
|
INIT(VSUBSHS);
|
|
INIT(VSUBSWS);
|
|
INIT(VSUBUBM);
|
|
INIT(VSUBUBS);
|
|
INIT(VSUBUHM);
|
|
INIT(VSUBUHS);
|
|
INIT(VSUBUWM);
|
|
INIT(VSUBUWS);
|
|
INIT(VSUMSWS);
|
|
INIT(VSUM2SWS);
|
|
INIT(VSUM4SBS);
|
|
INIT(VSUM4SHS);
|
|
INIT(VSUM4UBS);
|
|
INIT(VUPKHPX);
|
|
INIT(VUPKHSB);
|
|
INIT(VUPKHSH);
|
|
INIT(VUPKLPX);
|
|
INIT(VUPKLSB);
|
|
INIT(VUPKLSH);
|
|
INIT(VXOR);
|
|
INIT(TDI);
|
|
INIT(TWI);
|
|
INIT(MULLI);
|
|
INIT(SUBFIC);
|
|
INIT(CMPLI);
|
|
INIT(CMPI);
|
|
INIT(ADDIC);
|
|
INIT(ADDI);
|
|
INIT(ADDIS);
|
|
INIT(BC);
|
|
INIT(SC);
|
|
INIT(B);
|
|
INIT(MCRF);
|
|
INIT(BCLR);
|
|
INIT(CRNOR);
|
|
INIT(CRANDC);
|
|
INIT(ISYNC);
|
|
INIT(CRXOR);
|
|
INIT(CRNAND);
|
|
INIT(CRAND);
|
|
INIT(CREQV);
|
|
INIT(CRORC);
|
|
INIT(CROR);
|
|
INIT(BCCTR);
|
|
INIT_RC(RLWIMI);
|
|
INIT_RC(RLWINM);
|
|
INIT_RC(RLWNM);
|
|
INIT(ORI);
|
|
INIT(ORIS);
|
|
INIT(XORI);
|
|
INIT(XORIS);
|
|
INIT(ANDI);
|
|
INIT(ANDIS);
|
|
INIT_RC(RLDICL);
|
|
INIT_RC(RLDICR);
|
|
INIT_RC(RLDIC);
|
|
INIT_RC(RLDIMI);
|
|
INIT_RC(RLDCL);
|
|
INIT_RC(RLDCR);
|
|
INIT(CMP);
|
|
INIT(TW);
|
|
INIT(LVSL);
|
|
INIT(LVEBX);
|
|
INIT_RC_OV(SUBFC);
|
|
INIT_RC_OV(ADDC);
|
|
INIT_RC(MULHDU);
|
|
INIT_RC(MULHWU);
|
|
INIT(MFOCRF);
|
|
INIT(LWARX);
|
|
INIT(LDX);
|
|
INIT(LWZX);
|
|
INIT_RC(SLW);
|
|
INIT_RC(CNTLZW);
|
|
INIT_RC(SLD);
|
|
INIT_RC(AND);
|
|
INIT(CMPL);
|
|
INIT(LVSR);
|
|
INIT(LVEHX);
|
|
INIT_RC_OV(SUBF);
|
|
INIT(LDUX);
|
|
INIT(DCBST);
|
|
INIT(LWZUX);
|
|
INIT_RC(CNTLZD);
|
|
INIT_RC(ANDC);
|
|
INIT(TD);
|
|
INIT(LVEWX);
|
|
INIT_RC(MULHD);
|
|
INIT_RC(MULHW);
|
|
INIT(LDARX);
|
|
INIT(DCBF);
|
|
INIT(LBZX);
|
|
INIT(LVX);
|
|
INIT_RC_OV(NEG);
|
|
INIT(LBZUX);
|
|
INIT_RC(NOR);
|
|
INIT(STVEBX);
|
|
INIT_OV(SUBFE);
|
|
INIT_OV(ADDE);
|
|
INIT(MTOCRF);
|
|
INIT(STDX);
|
|
INIT(STWCX);
|
|
INIT(STWX);
|
|
INIT(STVEHX);
|
|
INIT(STDUX);
|
|
INIT(STWUX);
|
|
INIT(STVEWX);
|
|
INIT_RC_OV(SUBFZE);
|
|
INIT_RC_OV(ADDZE);
|
|
INIT(STDCX);
|
|
INIT(STBX);
|
|
INIT(STVX);
|
|
INIT_RC_OV(SUBFME);
|
|
INIT_RC_OV(MULLD);
|
|
INIT_RC_OV(ADDME);
|
|
INIT_RC_OV(MULLW);
|
|
INIT(DCBTST);
|
|
INIT(STBUX);
|
|
INIT_RC_OV(ADD);
|
|
INIT(DCBT);
|
|
INIT(LHZX);
|
|
INIT_RC(EQV);
|
|
INIT(ECIWX);
|
|
INIT(LHZUX);
|
|
INIT_RC(XOR);
|
|
INIT(MFSPR);
|
|
INIT(LWAX);
|
|
INIT(DST);
|
|
INIT(LHAX);
|
|
INIT(LVXL);
|
|
INIT(MFTB);
|
|
INIT(LWAUX);
|
|
INIT(DSTST);
|
|
INIT(LHAUX);
|
|
INIT(STHX);
|
|
INIT_RC(ORC);
|
|
INIT(ECOWX);
|
|
INIT(STHUX);
|
|
INIT_RC(OR);
|
|
INIT_RC_OV(DIVDU);
|
|
INIT_RC_OV(DIVWU);
|
|
INIT(MTSPR);
|
|
INIT(DCBI);
|
|
INIT_RC(NAND);
|
|
INIT(STVXL);
|
|
INIT_RC_OV(DIVD);
|
|
INIT_RC_OV(DIVW);
|
|
INIT(LVLX);
|
|
INIT(LDBRX);
|
|
INIT(LSWX);
|
|
INIT(LWBRX);
|
|
INIT(LFSX);
|
|
INIT_RC(SRW);
|
|
INIT_RC(SRD);
|
|
INIT(LVRX);
|
|
INIT(LSWI);
|
|
INIT(LFSUX);
|
|
INIT(SYNC);
|
|
INIT(LFDX);
|
|
INIT(LFDUX);
|
|
INIT(STVLX);
|
|
INIT(STDBRX);
|
|
INIT(STSWX);
|
|
INIT(STWBRX);
|
|
INIT(STFSX);
|
|
INIT(STVRX);
|
|
INIT(STFSUX);
|
|
INIT(STSWI);
|
|
INIT(STFDX);
|
|
INIT(STFDUX);
|
|
INIT(LVLXL);
|
|
INIT(LHBRX);
|
|
INIT_RC(SRAW);
|
|
INIT_RC(SRAD);
|
|
INIT(LVRXL);
|
|
INIT(DSS);
|
|
INIT_RC(SRAWI);
|
|
INIT_RC(SRADI);
|
|
INIT(EIEIO);
|
|
INIT(STVLXL);
|
|
INIT(STHBRX);
|
|
INIT_RC(EXTSH);
|
|
INIT(STVRXL);
|
|
INIT_RC(EXTSB);
|
|
INIT(STFIWX);
|
|
INIT_RC(EXTSW);
|
|
INIT(ICBI);
|
|
INIT(DCBZ);
|
|
INIT(LWZ);
|
|
INIT(LWZU);
|
|
INIT(LBZ);
|
|
INIT(LBZU);
|
|
INIT(STW);
|
|
INIT(STWU);
|
|
INIT(STB);
|
|
INIT(STBU);
|
|
INIT(LHZ);
|
|
INIT(LHZU);
|
|
INIT(LHA);
|
|
INIT(LHAU);
|
|
INIT(STH);
|
|
INIT(STHU);
|
|
INIT(LMW);
|
|
INIT(STMW);
|
|
INIT(LFS);
|
|
INIT(LFSU);
|
|
INIT(LFD);
|
|
INIT(LFDU);
|
|
INIT(STFS);
|
|
INIT(STFSU);
|
|
INIT(STFD);
|
|
INIT(STFDU);
|
|
INIT(LD);
|
|
INIT(LDU);
|
|
INIT(LWA);
|
|
INIT(STD);
|
|
INIT(STDU);
|
|
INIT_RC(FDIVS);
|
|
INIT_RC(FSUBS);
|
|
INIT_RC(FADDS);
|
|
INIT_RC(FSQRTS);
|
|
INIT_RC(FRES);
|
|
INIT_RC(FMULS);
|
|
INIT_RC(FMADDS);
|
|
INIT_RC(FMSUBS);
|
|
INIT_RC(FNMSUBS);
|
|
INIT_RC(FNMADDS);
|
|
INIT_RC(MTFSB1);
|
|
INIT(MCRFS);
|
|
INIT_RC(MTFSB0);
|
|
INIT_RC(MTFSFI);
|
|
INIT_RC(MFFS);
|
|
INIT_RC(MTFSF);
|
|
INIT(FCMPU);
|
|
INIT_RC(FRSP);
|
|
INIT_RC(FCTIW);
|
|
INIT_RC(FCTIWZ);
|
|
INIT_RC(FDIV);
|
|
INIT_RC(FSUB);
|
|
INIT_RC(FADD);
|
|
INIT_RC(FSQRT);
|
|
INIT_RC(FSEL);
|
|
INIT_RC(FMUL);
|
|
INIT_RC(FRSQRTE);
|
|
INIT_RC(FMSUB);
|
|
INIT_RC(FMADD);
|
|
INIT_RC(FNMSUB);
|
|
INIT_RC(FNMADD);
|
|
INIT(FCMPO);
|
|
INIT_RC(FNEG);
|
|
INIT_RC(FMR);
|
|
INIT_RC(FNABS);
|
|
INIT_RC(FABS);
|
|
INIT_RC(FCTID);
|
|
INIT_RC(FCTIDZ);
|
|
INIT_RC(FCFID);
|
|
INIT(UNK);
|
|
#endif
|
|
}
|
|
|
|
ppu_interpreter_rt_base::~ppu_interpreter_rt_base()
|
|
{
|
|
}
|
|
|
|
ppu_interpreter_rt::ppu_interpreter_rt() noexcept
|
|
: ppu_interpreter_rt_base()
|
|
, table(*ptrs)
|
|
{
|
|
}
|
|
|
|
ppu_intrp_func_t ppu_interpreter_rt::decode(u32 opv) const noexcept
|
|
{
|
|
const auto op = ppu_opcode_t{opv};
|
|
|
|
switch (g_ppu_itype.decode(opv))
|
|
{
|
|
case ppu_itype::LWZ:
|
|
case ppu_itype::LBZ:
|
|
case ppu_itype::STW:
|
|
case ppu_itype::STB:
|
|
case ppu_itype::LHZ:
|
|
case ppu_itype::LHA:
|
|
case ppu_itype::STH:
|
|
case ppu_itype::LFS:
|
|
case ppu_itype::LFD:
|
|
case ppu_itype::STFS:
|
|
case ppu_itype::STFD:
|
|
{
|
|
// Minor optimization: 16-bit absolute addressing never points to a valid memory
|
|
if (!op.ra)
|
|
{
|
|
return [](ppu_thread&, ppu_opcode_t op, be_t<u32>*, ppu_intrp_func*)
|
|
{
|
|
fmt::throw_exception("Invalid instruction: %s r%d,0x%016x(r0)", g_ppu_iname.decode(op.opcode), op.rd, op.simm16);
|
|
};
|
|
}
|
|
|
|
break;
|
|
}
|
|
default: break;
|
|
}
|
|
|
|
return table.decode(opv);
|
|
}
|