Add new accuracy control for PUTLLUC accuracy setting (non-TSX)

With the option enabled GET commands are blocked until the current PUTLLC/PUTLLUC executer on that address finishes

Additional improvements:
- Minor race fix of sys_ppu_thread_exit (wait until the writer finishes)
- Max number of ppu threads bumped to 8
This commit is contained in:
Eladash 2019-06-11 22:45:11 +03:00 committed by Ivan
parent d8402008dd
commit 0d88f037ff
4 changed files with 24 additions and 24 deletions

View file

@ -1315,7 +1315,12 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
u8* dst = vm::_ptr<u8>(eal); u8* dst = vm::_ptr<u8>(eal);
u8* src = vm::_ptr<u8>(offset + lsa); u8* src = vm::_ptr<u8>(offset + lsa);
if (UNLIKELY(!is_get && !g_use_rtm)) if (is_get)
{
std::swap(dst, src);
}
if (UNLIKELY(!g_use_rtm && (!is_get || g_cfg.core.spu_accurate_putlluc)))
{ {
switch (u32 size = args.size) switch (u32 size = args.size)
{ {
@ -1323,28 +1328,28 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
{ {
auto& res = vm::reservation_lock(eal, 1); auto& res = vm::reservation_lock(eal, 1);
*reinterpret_cast<u8*>(dst) = *reinterpret_cast<const u8*>(src); *reinterpret_cast<u8*>(dst) = *reinterpret_cast<const u8*>(src);
res.release(res.load() + 127); res.release(res.load() - 1);
break; break;
} }
case 2: case 2:
{ {
auto& res = vm::reservation_lock(eal, 2); auto& res = vm::reservation_lock(eal, 2);
*reinterpret_cast<u16*>(dst) = *reinterpret_cast<const u16*>(src); *reinterpret_cast<u16*>(dst) = *reinterpret_cast<const u16*>(src);
res.release(res.load() + 127); res.release(res.load() - 1);
break; break;
} }
case 4: case 4:
{ {
auto& res = vm::reservation_lock(eal, 4); auto& res = vm::reservation_lock(eal, 4);
*reinterpret_cast<u32*>(dst) = *reinterpret_cast<const u32*>(src); *reinterpret_cast<u32*>(dst) = *reinterpret_cast<const u32*>(src);
res.release(res.load() + 127); res.release(res.load() - 1);
break; break;
} }
case 8: case 8:
{ {
auto& res = vm::reservation_lock(eal, 8); auto& res = vm::reservation_lock(eal, 8);
*reinterpret_cast<u64*>(dst) = *reinterpret_cast<const u64*>(src); *reinterpret_cast<u64*>(dst) = *reinterpret_cast<const u64*>(src);
res.release(res.load() + 127); res.release(res.load() - 1);
break; break;
} }
default: default:
@ -1363,7 +1368,7 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
size -= 16; size -= 16;
} }
res.release(res.load() + 127); res.release(res.load() - 1);
break; break;
} }
@ -1395,11 +1400,6 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
return; return;
} }
if (is_get)
{
std::swap(dst, src);
}
switch (u32 size = args.size) switch (u32 size = args.size)
{ {
case 1: case 1:

View file

@ -15,6 +15,9 @@ void _sys_ppu_thread_exit(ppu_thread& ppu, u64 errorcode)
{ {
vm::temporary_unlock(ppu); vm::temporary_unlock(ppu);
// Need to wait until the current writer finish
if (ppu.state & cpu_flag::memory) vm::g_mutex.lock_unlock();
sys_ppu_thread.trace("_sys_ppu_thread_exit(errorcode=0x%llx)", errorcode); sys_ppu_thread.trace("_sys_ppu_thread_exit(errorcode=0x%llx)", errorcode);
ppu.state += cpu_flag::exit; ppu.state += cpu_flag::exit;

View file

@ -62,7 +62,7 @@ namespace vm
// Memory mutex acknowledgement // Memory mutex acknowledgement
thread_local atomic_t<cpu_thread*>* g_tls_locked = nullptr; thread_local atomic_t<cpu_thread*>* g_tls_locked = nullptr;
// Currently locked address // Currently locked cache line
atomic_t<u32> g_addr_lock = 0; atomic_t<u32> g_addr_lock = 0;
// Memory mutex: passive locks // Memory mutex: passive locks
@ -71,13 +71,15 @@ namespace vm
static void _register_lock(cpu_thread* _cpu) static void _register_lock(cpu_thread* _cpu)
{ {
for (u32 i = 0;; i = (i + 1) % g_locks.size()) for (u32 i = 0, max = g_cfg.core.ppu_threads;;)
{ {
if (!g_locks[i] && g_locks[i].compare_and_swap_test(nullptr, _cpu)) if (!g_locks[i] && g_locks[i].compare_and_swap_test(nullptr, _cpu))
{ {
g_tls_locked = g_locks.data() + i; g_tls_locked = g_locks.data() + i;
return; return;
} }
if (++i == max) i = 0;
} }
} }
@ -165,7 +167,7 @@ namespace vm
void cleanup_unlock(cpu_thread& cpu) noexcept void cleanup_unlock(cpu_thread& cpu) noexcept
{ {
for (u32 i = 0; i < g_locks.size(); i++) for (u32 i = 0, max = g_cfg.core.ppu_threads; i < max; i++)
{ {
if (g_locks[i] == &cpu) if (g_locks[i] == &cpu)
{ {
@ -247,9 +249,9 @@ namespace vm
if (addr) if (addr)
{ {
for (auto& lock : g_locks) for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++)
{ {
if (cpu_thread* ptr = lock) if (cpu_thread* ptr = *lock)
{ {
ptr->state.test_and_set(cpu_flag::memory); ptr->state.test_and_set(cpu_flag::memory);
} }
@ -279,15 +281,10 @@ namespace vm
} }
} }
for (auto& lock : g_locks) for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++)
{ {
while (cpu_thread* ptr = lock) while (*lock)
{ {
if (ptr->is_stopped())
{
break;
}
_mm_pause(); _mm_pause();
} }
} }

View file

@ -388,7 +388,7 @@ struct cfg_root : cfg::node
node_core(cfg::node* _this) : cfg::node(_this, "Core") {} node_core(cfg::node* _this) : cfg::node(_this, "Core") {}
cfg::_enum<ppu_decoder_type> ppu_decoder{this, "PPU Decoder", ppu_decoder_type::llvm}; cfg::_enum<ppu_decoder_type> ppu_decoder{this, "PPU Decoder", ppu_decoder_type::llvm};
cfg::_int<1, 4> ppu_threads{this, "PPU Threads", 2}; // Amount of PPU threads running simultaneously (must be 2) cfg::_int<1, 8> ppu_threads{this, "PPU Threads", 2}; // Amount of PPU threads running simultaneously (must be 2)
cfg::_bool ppu_debug{this, "PPU Debug"}; cfg::_bool ppu_debug{this, "PPU Debug"};
cfg::_bool llvm_logs{this, "Save LLVM logs"}; cfg::_bool llvm_logs{this, "Save LLVM logs"};
cfg::string llvm_cpu{this, "Use LLVM CPU"}; cfg::string llvm_cpu{this, "Use LLVM CPU"};