mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 06:21:26 +12:00
SPU: Copy with memcpy() instead of hand-rolled SSE2
In some very unscientific benchmark: spu_thread::do_dma_transfer() was taking 2.27% of my CPU before, now 0.07%, while __memmove_avx_unaligned_erms() was taking 1.47% and now 2.88%, which added makes about 0.8% saved.
This commit is contained in:
parent
5261886449
commit
425e032a62
1 changed files with 8 additions and 0 deletions
|
@ -1433,6 +1433,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||||
|
|
||||||
auto lock = vm::passive_lock(eal & -128, ::align(eal + size, 128));
|
auto lock = vm::passive_lock(eal & -128, ::align(eal + size, 128));
|
||||||
|
|
||||||
|
#ifdef __GNUG__
|
||||||
|
std::memcpy(dst, src, size);
|
||||||
|
#else
|
||||||
while (size >= 128)
|
while (size >= 128)
|
||||||
{
|
{
|
||||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||||
|
@ -1450,6 +1453,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||||
src += 16;
|
src += 16;
|
||||||
size -= 16;
|
size -= 16;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
lock->release(0);
|
lock->release(0);
|
||||||
break;
|
break;
|
||||||
|
@ -1483,6 +1487,9 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
|
#ifdef __GNUG__
|
||||||
|
std::memcpy(dst, src, size);
|
||||||
|
#else
|
||||||
while (size >= 128)
|
while (size >= 128)
|
||||||
{
|
{
|
||||||
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||||
|
@ -1500,6 +1507,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||||
src += 16;
|
src += 16;
|
||||||
size -= 16;
|
size -= 16;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue