mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 14:31:24 +12:00
Initial Linux Aarch64 support
* Update asmjit dependency (aarch64 branch) * Disable USE_DISCORD_RPC by default * Dump some JIT objects in rpcs3 cache dir * Add SIGILL handler for all platforms * Fix resetting zeroing denormals in thread pool * Refactor most v128:: utils into global gv_** functions * Refactor PPU interpreter (incomplete), remove "precise" * - Instruction specializations with multiple accuracy flags * - Adjust calling convention for speed * - Removed precise/fast setting, replaced with static * - Started refactoring interpreters for building at runtime JIT * (I got tired of poor compiler optimizations) * - Expose some accuracy settings (SAT, NJ, VNAN, FPCC) * - Add exec_bytes PPU thread variable (akin to cycle count) * PPU LLVM: fix VCTUXS+VCTSXS instruction NaN results * SPU interpreter: remove "precise" for now (extremely non-portable) * - As with PPU, settings changed to static/dynamic for interpreters. * - Precise options will be implemented later * Fix termination after fatal error dialog
This commit is contained in:
parent
d6aa834b5f
commit
580bd2b25e
89 changed files with 20360 additions and 5612 deletions
|
@ -2,9 +2,12 @@
|
|||
|
||||
#include "ProgramStateCache.h"
|
||||
|
||||
#include "emmintrin.h"
|
||||
#include "util/asm.hpp"
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
#include "emmintrin.h"
|
||||
#endif
|
||||
|
||||
template <typename Traits>
|
||||
void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32> dst_buffer, const RSXFragmentProgram &fragment_program, bool sanitize) const
|
||||
{
|
||||
|
@ -19,12 +22,23 @@ void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32>
|
|||
for (usz offset_in_fragment_program : I->second.FragmentConstantOffsetCache)
|
||||
{
|
||||
char* data = static_cast<char*>(fragment_program.get_data()) + offset_in_fragment_program;
|
||||
|
||||
#if defined(ARCH_X64)
|
||||
const __m128i vector = _mm_loadu_si128(reinterpret_cast<__m128i*>(data));
|
||||
const __m128i shuffled_vector = _mm_or_si128(_mm_slli_epi16(vector, 8), _mm_srli_epi16(vector, 8));
|
||||
#else
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const u32 value = reinterpret_cast<u32*>(data)[i];
|
||||
tmp[i] = std::bit_cast<f32, u32>(((value >> 8) & 0xff00ff) | ((value << 8) & 0xff00ff00));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!patch_table.is_empty())
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
_mm_store_ps(tmp, _mm_castsi128_ps(shuffled_vector));
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
|
@ -47,15 +61,29 @@ void program_state_cache<Traits>::fill_fragment_constants_buffer(std::span<f32>
|
|||
}
|
||||
else if (sanitize)
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
//Convert NaNs and Infs to 0
|
||||
const auto masked = _mm_and_si128(shuffled_vector, _mm_set1_epi32(0x7fffffff));
|
||||
const auto valid = _mm_cmplt_epi32(masked, _mm_set1_epi32(0x7f800000));
|
||||
const auto result = _mm_and_si128(shuffled_vector, valid);
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), result);
|
||||
#else
|
||||
for (u32 i = 0; i < 4; i++)
|
||||
{
|
||||
const u32 value = std::bit_cast<u32>(tmp[i]);
|
||||
tmp[i] = (value & 0x7fffffff) < 0x7f800000 ? value : 0;
|
||||
}
|
||||
|
||||
std::memcpy(dst, tmp, 16);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined(ARCH_X64)
|
||||
_mm_stream_si128(utils::bless<__m128i>(dst), shuffled_vector);
|
||||
#else
|
||||
std::memcpy(dst, tmp, 16);
|
||||
#endif
|
||||
}
|
||||
|
||||
dst += 4;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue