mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 05:51:27 +12:00
Experimental squashing of reservation memory area.
Enables trivial synchronization between shared mem. Reduces memory usage, but potentially degrades performance. Rename an overload of vm::passive_lock to vm::range_lock.
This commit is contained in:
parent
8cb1f4fe26
commit
c7fe8567b8
10 changed files with 182 additions and 69 deletions
|
@ -141,8 +141,9 @@ namespace utils
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
shm::shm(u32 size)
|
shm::shm(u32 size, u32 flags)
|
||||||
: m_size(::align(size, 0x10000))
|
: m_size(::align(size, 0x10000))
|
||||||
|
, m_flags(flags)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
m_handle = ::CreateFileMappingW(INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, 0, m_size, NULL);
|
m_handle = ::CreateFileMappingW(INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, 0, m_size, NULL);
|
||||||
|
|
|
@ -48,9 +48,10 @@ namespace utils
|
||||||
int m_file;
|
int m_file;
|
||||||
#endif
|
#endif
|
||||||
u32 m_size;
|
u32 m_size;
|
||||||
|
u32 m_flags;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit shm(u32 size);
|
explicit shm(u32 size, u32 flags = 0);
|
||||||
|
|
||||||
shm(const shm&) = delete;
|
shm(const shm&) = delete;
|
||||||
|
|
||||||
|
@ -74,5 +75,11 @@ namespace utils
|
||||||
{
|
{
|
||||||
return m_size;
|
return m_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flags are unspecified, consider it userdata
|
||||||
|
u32 flags() const
|
||||||
|
{
|
||||||
|
return m_flags;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -604,7 +604,7 @@ void ppu_thread::cpu_task()
|
||||||
}
|
}
|
||||||
case ppu_cmd::opd_call:
|
case ppu_cmd::opd_call:
|
||||||
{
|
{
|
||||||
const ppu_func_opd_t opd = cmd_get(1).as<ppu_func_opd_t>();
|
const ppu_func_opd_t opd = cmd_get(1).as<ppu_func_opd_t>();
|
||||||
cmd_pop(1), fast_call(opd.addr, opd.rtoc);
|
cmd_pop(1), fast_call(opd.addr, opd.rtoc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1089,13 +1089,13 @@ const auto ppu_stwcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rd
|
||||||
Label fail = c.newLabel();
|
Label fail = c.newLabel();
|
||||||
|
|
||||||
// Prepare registers
|
// Prepare registers
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
c.mov(x86::r10, imm_ptr(+vm::g_reservations));
|
||||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||||
c.shr(args[0], 7);
|
c.and_(args[0].r32(), 0xff80);
|
||||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
c.shr(args[0].r32(), 1);
|
||||||
|
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0]));
|
||||||
c.xor_(args[0].r32(), args[0].r32());
|
c.xor_(args[0].r32(), args[0].r32());
|
||||||
c.bswap(args[2].r32());
|
c.bswap(args[2].r32());
|
||||||
c.bswap(args[3].r32());
|
c.bswap(args[3].r32());
|
||||||
|
@ -1135,13 +1135,13 @@ const auto ppu_stdcx_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, u64 rd
|
||||||
Label fail = c.newLabel();
|
Label fail = c.newLabel();
|
||||||
|
|
||||||
// Prepare registers
|
// Prepare registers
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
c.mov(x86::r10, imm_ptr(+vm::g_reservations));
|
||||||
c.mov(x86::r10, x86::qword_ptr(x86::rax));
|
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||||
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
c.mov(x86::r11, x86::qword_ptr(x86::rax));
|
||||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||||
c.shr(args[0], 7);
|
c.and_(args[0].r32(), 0xff80);
|
||||||
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0], 3));
|
c.shr(args[0].r32(), 1);
|
||||||
|
c.lea(x86::r10, x86::qword_ptr(x86::r10, args[0]));
|
||||||
c.xor_(args[0].r32(), args[0].r32());
|
c.xor_(args[0].r32(), args[0].r32());
|
||||||
c.bswap(args[2]);
|
c.bswap(args[2]);
|
||||||
c.bswap(args[3]);
|
c.bswap(args[3]);
|
||||||
|
|
|
@ -1266,10 +1266,10 @@ void spu_recompiler::get_events()
|
||||||
Label fail = c->newLabel();
|
Label fail = c->newLabel();
|
||||||
c->bind(rcheck);
|
c->bind(rcheck);
|
||||||
c->mov(qw1->r32(), *addr);
|
c->mov(qw1->r32(), *addr);
|
||||||
c->mov(*qw0, imm_ptr(vm::g_reservations));
|
c->mov(*qw0, imm_ptr(+vm::g_reservations));
|
||||||
c->shr(qw1->r32(), 4);
|
c->and_(qw1->r32(), 0xff80);
|
||||||
|
c->shr(qw1->r32(), 1);
|
||||||
c->mov(*qw0, x86::qword_ptr(*qw0, *qw1));
|
c->mov(*qw0, x86::qword_ptr(*qw0, *qw1));
|
||||||
c->and_(qw0->r64(), -128);
|
|
||||||
c->cmp(*qw0, SPU_OFF_64(rtime));
|
c->cmp(*qw0, SPU_OFF_64(rtime));
|
||||||
c->jne(fail);
|
c->jne(fail);
|
||||||
c->mov(*qw0, imm_ptr(vm::g_base_addr));
|
c->mov(*qw0, imm_ptr(vm::g_base_addr));
|
||||||
|
|
|
@ -219,12 +219,12 @@ const auto spu_putllc_tx = build_function_asm<u32(*)(u32 raddr, u64 rtime, const
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Prepare registers
|
// Prepare registers
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
|
||||||
c.mov(x86::rbx, x86::qword_ptr(x86::rax));
|
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||||
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
||||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||||
c.shr(args[0], 4);
|
c.and_(args[0].r32(), 0xff80);
|
||||||
|
c.shr(args[0].r32(), 1);
|
||||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||||
c.xor_(x86::r12d, x86::r12d);
|
c.xor_(x86::r12d, x86::r12d);
|
||||||
c.mov(x86::r13, args[1]);
|
c.mov(x86::r13, args[1]);
|
||||||
|
@ -496,12 +496,12 @@ const auto spu_getll_tx = build_function_asm<u64(*)(u32 raddr, void* rdata)>([](
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Prepare registers
|
// Prepare registers
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
|
||||||
c.mov(x86::rbx, x86::qword_ptr(x86::rax));
|
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||||
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
||||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||||
c.shr(args[0], 4);
|
c.and_(args[0].r32(), 0xff80);
|
||||||
|
c.shr(args[0].r32(), 1);
|
||||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||||
c.xor_(x86::r12d, x86::r12d);
|
c.xor_(x86::r12d, x86::r12d);
|
||||||
c.mov(x86::r13, args[1]);
|
c.mov(x86::r13, args[1]);
|
||||||
|
@ -608,12 +608,12 @@ const auto spu_getll_inexact = build_function_asm<u64(*)(u32 raddr, void* rdata)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Prepare registers
|
// Prepare registers
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
|
||||||
c.mov(x86::rbx, x86::qword_ptr(x86::rax));
|
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||||
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
||||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||||
c.shr(args[0], 4);
|
c.and_(args[0].r32(), 0xff80);
|
||||||
|
c.shr(args[0].r32(), 1);
|
||||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||||
c.xor_(x86::r12d, x86::r12d);
|
c.xor_(x86::r12d, x86::r12d);
|
||||||
c.mov(x86::r13, args[1]);
|
c.mov(x86::r13, args[1]);
|
||||||
|
@ -775,12 +775,12 @@ const auto spu_putlluc_tx = build_function_asm<u32(*)(u32 raddr, const void* rda
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Prepare registers
|
// Prepare registers
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_reservations));
|
c.mov(x86::rbx, imm_ptr(+vm::g_reservations));
|
||||||
c.mov(x86::rbx, x86::qword_ptr(x86::rax));
|
|
||||||
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
c.mov(x86::rax, imm_ptr(&vm::g_base_addr));
|
||||||
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
c.mov(x86::rbp, x86::qword_ptr(x86::rax));
|
||||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||||
c.shr(args[0], 4);
|
c.and_(args[0].r32(), 0xff80);
|
||||||
|
c.shr(args[0].r32(), 1);
|
||||||
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
c.lea(x86::rbx, x86::qword_ptr(x86::rbx, args[0]));
|
||||||
c.xor_(x86::r12d, x86::r12d);
|
c.xor_(x86::r12d, x86::r12d);
|
||||||
c.mov(x86::r13, args[1]);
|
c.mov(x86::r13, args[1]);
|
||||||
|
@ -1464,7 +1464,46 @@ void spu_thread::do_dma_transfer(const spu_mfc_cmd& args)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto lock = vm::passive_lock(eal & -128, ::align(eal + size, 128));
|
u32 range_addr = eal & -128;
|
||||||
|
u32 range_end = ::align(eal + size, 128);
|
||||||
|
|
||||||
|
// Handle the case of crossing 64K page borders
|
||||||
|
if (range_addr >> 16 != (range_end - 1) >> 16)
|
||||||
|
{
|
||||||
|
u32 nexta = range_end & -65536;
|
||||||
|
u32 size0 = nexta - eal;
|
||||||
|
size -= size0;
|
||||||
|
|
||||||
|
// Split locking + transfer in two parts (before 64K border, and after it)
|
||||||
|
const auto lock = vm::range_lock(range_addr, nexta);
|
||||||
|
#ifdef __GNUG__
|
||||||
|
std::memcpy(dst, src, size0);
|
||||||
|
dst += size0;
|
||||||
|
src += size0;
|
||||||
|
#else
|
||||||
|
while (size0 >= 128)
|
||||||
|
{
|
||||||
|
mov_rdata(*reinterpret_cast<decltype(spu_thread::rdata)*>(dst), *reinterpret_cast<const decltype(spu_thread::rdata)*>(src));
|
||||||
|
|
||||||
|
dst += 128;
|
||||||
|
src += 128;
|
||||||
|
size0 -= 128;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (size0)
|
||||||
|
{
|
||||||
|
*reinterpret_cast<v128*>(dst) = *reinterpret_cast<const v128*>(src);
|
||||||
|
|
||||||
|
dst += 16;
|
||||||
|
src += 16;
|
||||||
|
size0 -= 16;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
lock->release(0);
|
||||||
|
range_addr = nexta;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto lock = vm::range_lock(range_addr, range_end);
|
||||||
|
|
||||||
#ifdef __GNUG__
|
#ifdef __GNUG__
|
||||||
std::memcpy(dst, src, size);
|
std::memcpy(dst, src, size);
|
||||||
|
|
|
@ -17,7 +17,7 @@ lv2_memory::lv2_memory(u32 size, u32 align, u64 flags, lv2_memory_container* ct)
|
||||||
, align(align)
|
, align(align)
|
||||||
, flags(flags)
|
, flags(flags)
|
||||||
, ct(ct)
|
, ct(ct)
|
||||||
, shm(std::make_shared<utils::shm>(size))
|
, shm(std::make_shared<utils::shm>(size, 1 /* shareable flag */))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,8 +47,11 @@ namespace vm
|
||||||
// Stats for debugging
|
// Stats for debugging
|
||||||
u8* const g_stat_addr = memory_reserve_4GiB(g_exec_addr);
|
u8* const g_stat_addr = memory_reserve_4GiB(g_exec_addr);
|
||||||
|
|
||||||
// Reservation stats (compressed x16)
|
// Reservation stats
|
||||||
u8* const g_reservations = memory_reserve_4GiB(g_stat_addr);
|
alignas(4096) u8 g_reservations[65536 / 128 * 64]{0};
|
||||||
|
|
||||||
|
// Shareable memory bits
|
||||||
|
alignas(4096) atomic_t<u8> g_shareable[65536]{0};
|
||||||
|
|
||||||
// Memory locations
|
// Memory locations
|
||||||
std::vector<std::shared_ptr<block_t>> g_locations;
|
std::vector<std::shared_ptr<block_t>> g_locations;
|
||||||
|
@ -94,6 +97,34 @@ namespace vm
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void _lock_shareable_cache(u8 /*value*/, u32 addr /*mutable*/, u32 end /*mutable*/)
|
||||||
|
{
|
||||||
|
// Special value to block new range locks
|
||||||
|
g_addr_lock = 1;
|
||||||
|
|
||||||
|
// Convert to 64K-page numbers
|
||||||
|
addr >>= 16;
|
||||||
|
end >>= 16;
|
||||||
|
|
||||||
|
// Wait for range locks to clear
|
||||||
|
for (auto& lock : g_range_locks)
|
||||||
|
{
|
||||||
|
while (const u64 _lock = lock.load())
|
||||||
|
{
|
||||||
|
if (const u32 lock_page = static_cast<u32>(_lock) >> 16)
|
||||||
|
{
|
||||||
|
if (lock_page < addr || lock_page >= end)
|
||||||
|
{
|
||||||
|
// Ignoreable range lock
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_mm_pause();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void passive_lock(cpu_thread& cpu)
|
void passive_lock(cpu_thread& cpu)
|
||||||
{
|
{
|
||||||
if (g_tls_locked && *g_tls_locked == &cpu) [[unlikely]]
|
if (g_tls_locked && *g_tls_locked == &cpu) [[unlikely]]
|
||||||
|
@ -118,21 +149,45 @@ namespace vm
|
||||||
_register_lock(&cpu);
|
_register_lock(&cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_t<u64>* passive_lock(const u32 addr, const u32 end)
|
atomic_t<u64>* range_lock(u32 addr, u32 end)
|
||||||
{
|
{
|
||||||
static const auto test_addr = [](const u32 target, const u32 addr, const u32 end)
|
static const auto test_addr = [](u32 target, u32 addr, u32 end) -> u64
|
||||||
{
|
{
|
||||||
return addr > target || end <= target;
|
if (target == 1)
|
||||||
|
{
|
||||||
|
// Shareable info is being modified
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_shareable[target >> 16])
|
||||||
|
{
|
||||||
|
// Target within shareable memory range
|
||||||
|
target &= 0xffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_shareable[addr >> 16])
|
||||||
|
{
|
||||||
|
// Track shareable memory locks in 0x0..0xffff address range
|
||||||
|
addr &= 0xffff;
|
||||||
|
end = ((end - 1) & 0xffff) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (addr > target || end <= target)
|
||||||
|
{
|
||||||
|
return u64{end} << 32 | addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
atomic_t<u64>* _ret;
|
atomic_t<u64>* _ret;
|
||||||
|
|
||||||
if (test_addr(g_addr_lock.load(), addr, end)) [[likely]]
|
if (u64 _a1 = test_addr(g_addr_lock.load(), addr, end)) [[likely]]
|
||||||
{
|
{
|
||||||
// Optimistic path (hope that address range is not locked)
|
// Optimistic path (hope that address range is not locked)
|
||||||
_ret = _register_range_lock(u64{end} << 32 | addr);
|
_ret = _register_range_lock(_a1);
|
||||||
|
|
||||||
if (test_addr(g_addr_lock.load(), addr, end)) [[likely]]
|
if (_a1 == test_addr(g_addr_lock.load(), addr, end)) [[likely]]
|
||||||
{
|
{
|
||||||
return _ret;
|
return _ret;
|
||||||
}
|
}
|
||||||
|
@ -142,7 +197,7 @@ namespace vm
|
||||||
|
|
||||||
{
|
{
|
||||||
::reader_lock lock(g_mutex);
|
::reader_lock lock(g_mutex);
|
||||||
_ret = _register_range_lock(u64{end} << 32 | addr);
|
_ret = _register_range_lock(test_addr(-1, addr, end));
|
||||||
}
|
}
|
||||||
|
|
||||||
return _ret;
|
return _ret;
|
||||||
|
@ -233,7 +288,7 @@ namespace vm
|
||||||
m_upgraded = true;
|
m_upgraded = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
writer_lock::writer_lock(u32 addr)
|
writer_lock::writer_lock(u32 addr /*mutable*/)
|
||||||
{
|
{
|
||||||
auto cpu = get_current_cpu_thread();
|
auto cpu = get_current_cpu_thread();
|
||||||
|
|
||||||
|
@ -244,7 +299,7 @@ namespace vm
|
||||||
|
|
||||||
g_mutex.lock();
|
g_mutex.lock();
|
||||||
|
|
||||||
if (addr)
|
if (addr >= 0x10000)
|
||||||
{
|
{
|
||||||
for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++)
|
for (auto lock = g_locks.cbegin(), end = lock + g_cfg.core.ppu_threads; lock != end; lock++)
|
||||||
{
|
{
|
||||||
|
@ -256,6 +311,12 @@ namespace vm
|
||||||
|
|
||||||
g_addr_lock = addr;
|
g_addr_lock = addr;
|
||||||
|
|
||||||
|
if (g_shareable[addr >> 16])
|
||||||
|
{
|
||||||
|
// Reservation address in shareable memory range
|
||||||
|
addr = addr & 0xffff;
|
||||||
|
}
|
||||||
|
|
||||||
for (auto& lock : g_range_locks)
|
for (auto& lock : g_range_locks)
|
||||||
{
|
{
|
||||||
while (true)
|
while (true)
|
||||||
|
@ -345,6 +406,19 @@ namespace vm
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (shm && shm->flags() != 0)
|
||||||
|
{
|
||||||
|
_lock_shareable_cache(1, addr, addr + size);
|
||||||
|
|
||||||
|
for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++)
|
||||||
|
{
|
||||||
|
g_shareable[i] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unlock
|
||||||
|
g_addr_lock.compare_and_swap(1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
// Notify rsx that range has become valid
|
// Notify rsx that range has become valid
|
||||||
// Note: This must be done *before* memory gets mapped while holding the vm lock, otherwise
|
// Note: This must be done *before* memory gets mapped while holding the vm lock, otherwise
|
||||||
// the RSX might try to invalidate memory that got unmapped and remapped
|
// the RSX might try to invalidate memory that got unmapped and remapped
|
||||||
|
@ -482,6 +556,19 @@ namespace vm
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g_shareable[addr >> 16])
|
||||||
|
{
|
||||||
|
_lock_shareable_cache(0, addr, addr + size);
|
||||||
|
|
||||||
|
for (u32 i = addr / 65536; i < addr / 65536 + size / 65536; i++)
|
||||||
|
{
|
||||||
|
g_shareable[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unlock
|
||||||
|
g_addr_lock.compare_and_swap(1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
// Notify rsx to invalidate range
|
// Notify rsx to invalidate range
|
||||||
// Note: This must be done *before* memory gets unmapped while holding the vm lock, otherwise
|
// Note: This must be done *before* memory gets unmapped while holding the vm lock, otherwise
|
||||||
// the RSX might try to call VirtualProtect on memory that is already unmapped
|
// the RSX might try to call VirtualProtect on memory that is already unmapped
|
||||||
|
@ -624,35 +711,12 @@ namespace vm
|
||||||
, size(size)
|
, size(size)
|
||||||
, flags(flags)
|
, flags(flags)
|
||||||
{
|
{
|
||||||
// Allocate compressed reservation info area (avoid SPU MMIO area)
|
|
||||||
if (addr != 0xe0000000)
|
|
||||||
{
|
|
||||||
// Beginning of the address space
|
|
||||||
if (addr == 0x10000)
|
|
||||||
{
|
|
||||||
utils::memory_commit(g_reservations, 0x1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
utils::memory_commit(g_reservations + addr / 16, size / 16);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// RawSPU LS
|
|
||||||
for (u32 i = 0; i < 6; i++)
|
|
||||||
{
|
|
||||||
utils::memory_commit(g_reservations + addr / 16 + i * 0x10000, 0x4000);
|
|
||||||
}
|
|
||||||
|
|
||||||
// End of the address space
|
|
||||||
utils::memory_commit(g_reservations + 0xfff0000, 0x10000);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (flags & 0x100)
|
if (flags & 0x100)
|
||||||
{
|
{
|
||||||
// Special path for 4k-aligned pages
|
// Special path for 4k-aligned pages
|
||||||
m_common = std::make_shared<utils::shm>(size);
|
m_common = std::make_shared<utils::shm>(size);
|
||||||
verify(HERE), m_common->map_critical(vm::base(addr), utils::protection::no) == vm::base(addr);
|
verify(HERE), m_common->map_critical(vm::base(addr), utils::protection::no) == vm::base(addr);
|
||||||
verify(HERE), m_common->map_critical(vm::get_super_ptr(addr), utils::protection::rw) == vm::get_super_ptr(addr);
|
verify(HERE), m_common->map_critical(vm::get_super_ptr(addr)) == vm::get_super_ptr(addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1142,7 +1206,7 @@ namespace vm
|
||||||
g_sudo_addr, g_sudo_addr + UINT32_MAX,
|
g_sudo_addr, g_sudo_addr + UINT32_MAX,
|
||||||
g_exec_addr, g_exec_addr + 0x200000000 - 1,
|
g_exec_addr, g_exec_addr + 0x200000000 - 1,
|
||||||
g_stat_addr, g_stat_addr + UINT32_MAX,
|
g_stat_addr, g_stat_addr + UINT32_MAX,
|
||||||
g_reservations, g_reservations + UINT32_MAX);
|
g_reservations, g_reservations + sizeof(g_reservations) - 1);
|
||||||
|
|
||||||
g_locations =
|
g_locations =
|
||||||
{
|
{
|
||||||
|
@ -1154,6 +1218,9 @@ namespace vm
|
||||||
std::make_shared<block_t>(0xD0000000, 0x10000000, 0x111), // stack
|
std::make_shared<block_t>(0xD0000000, 0x10000000, 0x111), // stack
|
||||||
std::make_shared<block_t>(0xE0000000, 0x20000000), // SPU reserved
|
std::make_shared<block_t>(0xE0000000, 0x20000000), // SPU reserved
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::memset(g_reservations, 0, sizeof(g_reservations));
|
||||||
|
std::memset(g_shareable, 0, sizeof(g_shareable));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1164,7 +1231,6 @@ namespace vm
|
||||||
utils::memory_decommit(g_base_addr, 0x100000000);
|
utils::memory_decommit(g_base_addr, 0x100000000);
|
||||||
utils::memory_decommit(g_exec_addr, 0x100000000);
|
utils::memory_decommit(g_exec_addr, 0x100000000);
|
||||||
utils::memory_decommit(g_stat_addr, 0x100000000);
|
utils::memory_decommit(g_stat_addr, 0x100000000);
|
||||||
utils::memory_decommit(g_reservations, 0x100000000);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ namespace vm
|
||||||
extern u8* const g_sudo_addr;
|
extern u8* const g_sudo_addr;
|
||||||
extern u8* const g_exec_addr;
|
extern u8* const g_exec_addr;
|
||||||
extern u8* const g_stat_addr;
|
extern u8* const g_stat_addr;
|
||||||
extern u8* const g_reservations;
|
extern u8 g_reservations[];
|
||||||
|
|
||||||
struct writer_lock;
|
struct writer_lock;
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ namespace vm
|
||||||
|
|
||||||
// Register reader
|
// Register reader
|
||||||
void passive_lock(cpu_thread& cpu);
|
void passive_lock(cpu_thread& cpu);
|
||||||
atomic_t<u64>* passive_lock(const u32 begin, const u32 end);
|
atomic_t<u64>* range_lock(u32 begin, u32 end);
|
||||||
|
|
||||||
// Unregister reader
|
// Unregister reader
|
||||||
void passive_unlock(cpu_thread& cpu);
|
void passive_unlock(cpu_thread& cpu);
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace vm
|
||||||
inline atomic_t<u64>& reservation_acquire(u32 addr, u32 size)
|
inline atomic_t<u64>& reservation_acquire(u32 addr, u32 size)
|
||||||
{
|
{
|
||||||
// Access reservation info: stamp and the lock bit
|
// Access reservation info: stamp and the lock bit
|
||||||
return reinterpret_cast<atomic_t<u64>*>(g_reservations)[addr / 128];
|
return *reinterpret_cast<atomic_t<u64>*>(g_reservations + (addr & 0xff80) / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update reservation status
|
// Update reservation status
|
||||||
|
@ -23,7 +23,7 @@ namespace vm
|
||||||
// Get reservation sync variable
|
// Get reservation sync variable
|
||||||
inline atomic_t<u64>& reservation_notifier(u32 addr, u32 size)
|
inline atomic_t<u64>& reservation_notifier(u32 addr, u32 size)
|
||||||
{
|
{
|
||||||
return reinterpret_cast<atomic_t<u64>*>(g_reservations)[addr / 128];
|
return *reinterpret_cast<atomic_t<u64>*>(g_reservations + (addr & 0xff80) / 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void reservation_lock_internal(atomic_t<u64>&);
|
void reservation_lock_internal(atomic_t<u64>&);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue