PPU: fix LDARX/LWARX in accurate mode (closes #9058)

Fixup after #9048
Use SSE intrinsics in mov_rdata.
This commit is contained in:
Nekotekina 2020-10-11 17:32:00 +03:00
parent 1885e4345c
commit 5bd5a382c0
2 changed files with 29 additions and 3 deletions

View file

@ -197,8 +197,25 @@ extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src)
return;
}
// TODO: use std::assume_aligned
std::memcpy(reinterpret_cast<v128*>(_dst), reinterpret_cast<const v128*>(_src), 128);
{
const __m128i v0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 0));
const __m128i v1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 16));
const __m128i v2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 32));
const __m128i v3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 48));
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 0), v0);
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 16), v1);
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 32), v2);
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 48), v3);
}
const __m128i v0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 64));
const __m128i v1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 80));
const __m128i v2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 96));
const __m128i v3 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(_src + 112));
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 64), v0);
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 80), v1);
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 96), v2);
_mm_storeu_si128(reinterpret_cast<__m128i*>(_dst + 112), v3);
}
extern u64 get_timebased_time();