mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 13:31:27 +12:00
Optimize vm::range_lock
Only test address on `range_locked` Don't check current transaction Remove vm::clear_range_locks completely
This commit is contained in:
parent
8d12816001
commit
46d3066c62
3 changed files with 39 additions and 47 deletions
|
@ -2104,8 +2104,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
|
||||||
|
|
||||||
perf_meter<"DMA_PUT"_u64> perf2;
|
perf_meter<"DMA_PUT"_u64> perf2;
|
||||||
|
|
||||||
// TODO: split range-locked stores in cache lines for consistency
|
// Temporarily disabled, may be removed at all
|
||||||
auto res = &vm::reservation_acquire(eal, args.size);
|
atomic_t<u64>* res = nullptr;
|
||||||
|
|
||||||
switch (u32 size = args.size)
|
switch (u32 size = args.size)
|
||||||
{
|
{
|
||||||
|
|
|
@ -156,7 +156,14 @@ namespace vm
|
||||||
{
|
{
|
||||||
perf_meter<"RHW_LOCK"_u64> perf0;
|
perf_meter<"RHW_LOCK"_u64> perf0;
|
||||||
|
|
||||||
while (true)
|
auto _cpu = get_current_cpu_thread();
|
||||||
|
|
||||||
|
if (_cpu)
|
||||||
|
{
|
||||||
|
_cpu->state += cpu_flag::wait + cpu_flag::temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u64 i = 0;; i++)
|
||||||
{
|
{
|
||||||
const u64 lock_val = g_range_lock.load();
|
const u64 lock_val = g_range_lock.load();
|
||||||
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
const u64 lock_addr = static_cast<u32>(lock_val); // -> u64
|
||||||
|
@ -166,12 +173,13 @@ namespace vm
|
||||||
|
|
||||||
u64 addr = begin;
|
u64 addr = begin;
|
||||||
|
|
||||||
if (g_shareable[begin >> 16] || lock_bits == range_sharing)
|
// Only useful for range_locked, and is reliable in this case
|
||||||
|
if (g_shareable[begin >> 16])
|
||||||
{
|
{
|
||||||
addr = addr & 0xffff;
|
addr = addr & 0xffff;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
|
if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
|
||||||
{
|
{
|
||||||
range_lock->store(begin | (u64{size} << 32));
|
range_lock->store(begin | (u64{size} << 32));
|
||||||
|
|
||||||
|
@ -180,18 +188,28 @@ namespace vm
|
||||||
|
|
||||||
if (!new_lock_val && !new_res_val) [[likely]]
|
if (!new_lock_val && !new_res_val) [[likely]]
|
||||||
{
|
{
|
||||||
return;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_lock_val == lock_val && !new_res_val) [[likely]]
|
if (new_lock_val == lock_val && !new_res_val) [[likely]]
|
||||||
{
|
{
|
||||||
return;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
range_lock->release(0);
|
range_lock->release(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_lock lock(g_mutex);
|
std::shared_lock lock(g_mutex, std::try_to_lock);
|
||||||
|
|
||||||
|
if (!lock && i < 15)
|
||||||
|
{
|
||||||
|
busy_wait(200);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (!lock)
|
||||||
|
{
|
||||||
|
lock.lock();
|
||||||
|
}
|
||||||
|
|
||||||
u32 test = 0;
|
u32 test = 0;
|
||||||
|
|
||||||
|
@ -213,6 +231,14 @@ namespace vm
|
||||||
vm::_ref<atomic_t<u8>>(test) += 0;
|
vm::_ref<atomic_t<u8>>(test) += 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
range_lock->release(begin | (u64{size} << 32));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_cpu)
|
||||||
|
{
|
||||||
|
_cpu->check_state();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -252,36 +278,6 @@ namespace vm
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear_range_locks(u32 addr, u32 size)
|
|
||||||
{
|
|
||||||
ASSUME(size);
|
|
||||||
|
|
||||||
const auto range = utils::address_range::start_length(addr, size);
|
|
||||||
|
|
||||||
// Wait for range locks to clear
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
const u64 bads = for_all_range_locks([&](u32 addr2, u32 size2)
|
|
||||||
{
|
|
||||||
ASSUME(size2);
|
|
||||||
|
|
||||||
if (range.overlaps(utils::address_range::start_length(addr2, size2))) [[unlikely]]
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!bads)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
_mm_pause();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void _lock_shareable_cache(u64 flags, u32 addr, u32 size)
|
static void _lock_shareable_cache(u64 flags, u32 addr, u32 size)
|
||||||
{
|
{
|
||||||
// Can't do 512 MiB or more at once
|
// Can't do 512 MiB or more at once
|
||||||
|
@ -290,10 +286,8 @@ namespace vm
|
||||||
fmt::throw_exception("Failed to lock range (flags=0x%x, addr=0x%x, size=0x%x)" HERE, flags >> 32, addr, size);
|
fmt::throw_exception("Failed to lock range (flags=0x%x, addr=0x%x, size=0x%x)" HERE, flags >> 32, addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Block new range locks
|
// Block or signal new range locks
|
||||||
g_range_lock = addr | u64{size} << 35 | flags;
|
g_range_lock = addr | u64{size} << 35 | flags;
|
||||||
|
|
||||||
clear_range_locks(addr, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void passive_lock(cpu_thread& cpu)
|
void passive_lock(cpu_thread& cpu)
|
||||||
|
|
|
@ -23,7 +23,7 @@ namespace vm
|
||||||
/* flag combinations with special meaning */
|
/* flag combinations with special meaning */
|
||||||
|
|
||||||
range_normal = 3ull << 32, // R+W
|
range_normal = 3ull << 32, // R+W
|
||||||
range_locked = 2ull << 32, // R+W as well but do not
|
range_locked = 2ull << 32, // R+W as well, the only range flag that should block by address
|
||||||
range_sharing = 4ull << 32, // Range being registered as shared, flags are unchanged
|
range_sharing = 4ull << 32, // Range being registered as shared, flags are unchanged
|
||||||
range_allocation = 0, // Allocation, no safe access
|
range_allocation = 0, // Allocation, no safe access
|
||||||
range_deallocation = 6ull << 32, // Deallocation, no safe access
|
range_deallocation = 6ull << 32, // Deallocation, no safe access
|
||||||
|
@ -52,12 +52,13 @@ namespace vm
|
||||||
|
|
||||||
u64 addr = begin;
|
u64 addr = begin;
|
||||||
|
|
||||||
if (g_shareable[begin >> 16] || lock_bits == range_sharing)
|
// Only used for range_locked and is reliable in this case
|
||||||
|
if (g_shareable[begin >> 16])
|
||||||
{
|
{
|
||||||
addr = addr & 0xffff;
|
addr = addr & 0xffff;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
|
if ((lock_bits != range_locked || addr + size <= lock_addr || addr >= lock_addr + lock_size) && !res_val) [[likely]]
|
||||||
{
|
{
|
||||||
// Optimistic locking
|
// Optimistic locking
|
||||||
range_lock->store(begin | (u64{size} << 32));
|
range_lock->store(begin | (u64{size} << 32));
|
||||||
|
@ -82,9 +83,6 @@ namespace vm
|
||||||
range_lock_internal(res, range_lock, begin, size);
|
range_lock_internal(res, range_lock, begin, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all range locks to release in specified range
|
|
||||||
void clear_range_locks(u32 addr, u32 size);
|
|
||||||
|
|
||||||
// Release it
|
// Release it
|
||||||
void free_range_lock(atomic_t<u64, 64>*) noexcept;
|
void free_range_lock(atomic_t<u64, 64>*) noexcept;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue