mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 21:41:26 +12:00
PPU LLVM: Unify memory limit for PRX and Overlay files
This commit is contained in:
parent
f0abb4473e
commit
e38b005a8b
1 changed files with 71 additions and 6 deletions
|
@ -3968,7 +3968,13 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
|
|
||||||
lf_queue<file_info> possible_exec_file_paths;
|
lf_queue<file_info> possible_exec_file_paths;
|
||||||
|
|
||||||
::semaphore<2> ovl_sema;
|
// Allow to allocate 2000 times the size of each file for the use of LLVM
|
||||||
|
// This works very nicely with Metal Gear Solid 4 for example:
|
||||||
|
// 2 7MB overlay files -> 14GB
|
||||||
|
// The growth in memory requirements of LLVM is not linear with file size of course
|
||||||
|
// But these estimates should hopefully protect RPCS3 in the coming years
|
||||||
|
// Especially when thread count is on the rise with each CPU generation
|
||||||
|
atomic_t<u32> file_size_limit = static_cast<u32>(std::clamp<u64>(utils::aligned_div<u64>(utils::get_total_memory(), 2000), 65536, u32{umax}));
|
||||||
|
|
||||||
const u32 software_thread_limit = std::min<u32>(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue));
|
const u32 software_thread_limit = std::min<u32>(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue));
|
||||||
const u32 cpu_thread_limit = utils::get_thread_count() > 8u ? std::max<u32>(utils::get_thread_count(), 2) - 1 : utils::get_thread_count(); // One LLVM thread less
|
const u32 cpu_thread_limit = utils::get_thread_count() > 8u ? std::max<u32>(utils::get_thread_count(), 2) - 1 : utils::get_thread_count(); // One LLVM thread less
|
||||||
|
@ -3981,6 +3987,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
// Set low priority
|
// Set low priority
|
||||||
thread_ctrl::scoped_priority low_prio(-1);
|
thread_ctrl::scoped_priority low_prio(-1);
|
||||||
u32 inc_fdone = 1;
|
u32 inc_fdone = 1;
|
||||||
|
u32 restore_mem = 0;
|
||||||
|
|
||||||
for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone += std::exchange(inc_fdone, 1))
|
for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone += std::exchange(inc_fdone, 1))
|
||||||
{
|
{
|
||||||
|
@ -3989,6 +3996,16 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (restore_mem)
|
||||||
|
{
|
||||||
|
if (!file_size_limit.fetch_add(restore_mem))
|
||||||
|
{
|
||||||
|
file_size_limit.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
restore_mem = 0;
|
||||||
|
}
|
||||||
|
|
||||||
auto& [path, offset, file_size] = file_queue[func_i];
|
auto& [path, offset, file_size] = file_queue[func_i];
|
||||||
|
|
||||||
ppu_log.notice("Trying to load: %s", path);
|
ppu_log.notice("Trying to load: %s", path);
|
||||||
|
@ -4020,10 +4037,48 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto wait_for_memory = [&]() -> bool
|
||||||
|
{
|
||||||
|
// Try not to process too many files at once because it seems to reduce performance and cause RAM shortages
|
||||||
|
// Concurrently compiling more OVL or huge PRX files does not have much theoretical benefit
|
||||||
|
while (!file_size_limit.fetch_op([&](u32& value)
|
||||||
|
{
|
||||||
|
if (value)
|
||||||
|
{
|
||||||
|
// Allow at least one file, make 0 the "memory unavailable" sign value for atomic waiting efficiency
|
||||||
|
const u32 new_val = static_cast<u32>(utils::sub_saturate<u64>(value, file_size));
|
||||||
|
restore_mem = value - new_val;
|
||||||
|
value = new_val;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resort to waiting
|
||||||
|
restore_mem = 0;
|
||||||
|
return false;
|
||||||
|
}).second)
|
||||||
|
{
|
||||||
|
// Wait until not 0
|
||||||
|
file_size_limit.wait(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Emu.IsStopped())
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
elf_error prx_err{}, ovl_err{};
|
elf_error prx_err{}, ovl_err{};
|
||||||
|
|
||||||
if (ppu_prx_object obj = src; (prx_err = obj, obj == elf_error::ok))
|
if (ppu_prx_object obj = src; (prx_err = obj, obj == elf_error::ok))
|
||||||
{
|
{
|
||||||
|
if (!wait_for_memory())
|
||||||
|
{
|
||||||
|
// Emulation stopped
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (auto prx = ppu_load_prx(obj, true, path, offset))
|
if (auto prx = ppu_load_prx(obj, true, path, offset))
|
||||||
{
|
{
|
||||||
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
obj.clear(), src.close(); // Clear decrypted file and elf object memory
|
||||||
|
@ -4040,10 +4095,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
{
|
{
|
||||||
while (ovl_err == elf_error::ok)
|
while (ovl_err == elf_error::ok)
|
||||||
{
|
{
|
||||||
// Try not to process too many files at once because it seems to reduce performance
|
|
||||||
// Concurrently compiling more OVL files does not have much theoretical benefit
|
|
||||||
std::lock_guard lock(ovl_sema);
|
|
||||||
|
|
||||||
if (Emu.IsStopped())
|
if (Emu.IsStopped())
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
|
@ -4064,6 +4115,12 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!wait_for_memory())
|
||||||
|
{
|
||||||
|
// Emulation stopped
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Participate in thread execution limitation (takes a long time)
|
// Participate in thread execution limitation (takes a long time)
|
||||||
if (std::lock_guard lock(g_fxo->get<jit_core_allocator>().sem); !ovlm->analyse(0, ovlm->entry, ovlm->seg0_code_end, ovlm->applied_patches, []()
|
if (std::lock_guard lock(g_fxo->get<jit_core_allocator>().sem); !ovlm->analyse(0, ovlm->entry, ovlm->seg0_code_end, ovlm->applied_patches, []()
|
||||||
{
|
{
|
||||||
|
@ -4086,10 +4143,18 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ppu_log.notice("Failed to precompile '%s' (prx: %s, ovl: %s): Attempting tratment as executable file", path, prx_err, ovl_err);
|
ppu_log.notice("Failed to precompile '%s' (prx: %s, ovl: %s): Attempting compilation as executable file", path, prx_err, ovl_err);
|
||||||
possible_exec_file_paths.push(path, offset, file_size);
|
possible_exec_file_paths.push(path, offset, file_size);
|
||||||
inc_fdone = 0;
|
inc_fdone = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (restore_mem)
|
||||||
|
{
|
||||||
|
if (!file_size_limit.fetch_add(restore_mem))
|
||||||
|
{
|
||||||
|
file_size_limit.notify_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Join every thread
|
// Join every thread
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue