PPU LLVM: Precompile all executable (PRX, MSELF, overlay) code at startup (#9680)

* Precompile LLVM cache at startup of games, like the GUI "Create PPU Cache" option.
* Allow OVL (overlay) precompilation as well (used by certain games).

Co-authored-by: Megamouse <studienricky89@googlemail.com>
Co-authored-by: Nekotekina <nekotekina@gmail.com>
This commit is contained in:
Eladash 2021-01-30 15:08:22 +02:00 committed by GitHub
parent bb2cc196a6
commit e3b3b0cda7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 288 additions and 46 deletions

View file

@ -3,6 +3,7 @@
#include "Utilities/bin_patch.h" #include "Utilities/bin_patch.h"
#include "Utilities/StrUtil.h" #include "Utilities/StrUtil.h"
#include "Utilities/address_range.h"
#include "Crypto/sha1.h" #include "Crypto/sha1.h"
#include "Crypto/unself.h" #include "Crypto/unself.h"
#include "Loader/ELF.h" #include "Loader/ELF.h"
@ -30,7 +31,7 @@ extern std::string ppu_get_function_name(const std::string& _module, u32 fnid);
extern std::string ppu_get_variable_name(const std::string& _module, u32 vnid); extern std::string ppu_get_variable_name(const std::string& _module, u32 vnid);
extern void ppu_register_range(u32 addr, u32 size); extern void ppu_register_range(u32 addr, u32 size);
extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr); extern void ppu_register_function_at(u32 addr, u32 size, ppu_function_t ptr);
extern void ppu_initialize(const ppu_module& info); extern bool ppu_initialize(const ppu_module& info, bool = false);
extern void ppu_initialize(); extern void ppu_initialize();
extern void sys_initialize_tls(ppu_thread&, u64, u32, u32, u32); extern void sys_initialize_tls(ppu_thread&, u64, u32, u32, u32);
@ -1648,10 +1649,8 @@ void ppu_load_exec(const ppu_exec_object& elf)
} }
} }
std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const std::string& path) std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object& elf, const std::string& path)
{ {
const auto ovlm = idm::make_ptr<lv2_obj, lv2_overlay>();
// Access linkage information object // Access linkage information object
const auto link = g_fxo->get<ppu_linkage_info>(); const auto link = g_fxo->get<ppu_linkage_info>();
@ -1659,6 +1658,25 @@ std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const
sha1_context sha; sha1_context sha;
sha1_starts(&sha); sha1_starts(&sha);
// Check if it is an overlay executable first
for (const auto& prog : elf.progs)
{
if (prog.p_type == 0x1u /* LOAD */ && prog.p_memsz)
{
using addr_range = utils::address_range;
const addr_range r = addr_range::start_length(::narrow<u32>(prog.p_vaddr), ::narrow<u32>(prog.p_memsz));
if (!r.valid() || !r.inside(addr_range::start_length(0x30000000, 0x10000000)))
{
// TODO: Check error and if there's a better way to error check
return {nullptr, CELL_ENOEXEC};
}
}
}
const auto ovlm = std::make_shared<lv2_overlay>();
// Allocate memory at fixed positions // Allocate memory at fixed positions
for (const auto& prog : elf.progs) for (const auto& prog : elf.progs)
{ {
@ -1682,7 +1700,18 @@ std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const
fmt::throw_exception("Invalid binary size (0x%llx, memsz=0x%x)", prog.bin.size(), size); fmt::throw_exception("Invalid binary size (0x%llx, memsz=0x%x)", prog.bin.size(), size);
if (!vm::get(vm::any, 0x30000000)->falloc(addr, size)) if (!vm::get(vm::any, 0x30000000)->falloc(addr, size))
fmt::throw_exception("vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size); {
ppu_loader.error("ppu_load_overlay(): vm::falloc() failed (addr=0x%x, memsz=0x%x)", addr, size);
// Revert previous allocations
for (const auto& seg : ovlm->segs)
{
ensure(vm::dealloc(seg.addr));
}
// TODO: Check error code, maybe disallow more than one overlay instance completely
return {nullptr, CELL_EBUSY};
}
// Copy segment data, hash it // Copy segment data, hash it
std::memcpy(vm::base(addr), prog.bin.data(), prog.bin.size()); std::memcpy(vm::base(addr), prog.bin.data(), prog.bin.size());
@ -1845,5 +1874,6 @@ std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object& elf, const
ovlm->name = path.substr(path.find_last_of('/') + 1); ovlm->name = path.substr(path.find_last_of('/') + 1);
ovlm->path = path; ovlm->path = path;
return ovlm; idm::import_existing<lv2_obj, lv2_overlay>(ovlm);
return {std::move(ovlm), {}};
} }

View file

@ -18,6 +18,8 @@
#include "SPURecompiler.h" #include "SPURecompiler.h"
#include "lv2/sys_sync.h" #include "lv2/sys_sync.h"
#include "lv2/sys_prx.h" #include "lv2/sys_prx.h"
#include "lv2/sys_overlay.h"
#include "lv2/sys_process.h"
#include "lv2/sys_memory.h" #include "lv2/sys_memory.h"
#include "Emu/GDB.h" #include "Emu/GDB.h"
@ -117,8 +119,9 @@ const ppu_decoder<ppu_itype> g_ppu_itype;
extern void ppu_initialize(); extern void ppu_initialize();
extern void ppu_finalize(const ppu_module& info); extern void ppu_finalize(const ppu_module& info);
extern void ppu_initialize(const ppu_module& info); extern bool ppu_initialize(const ppu_module& info, bool = false);
static void ppu_initialize2(class jit_compiler& jit, const ppu_module& module_part, const std::string& cache_path, const std::string& obj_name); static void ppu_initialize2(class jit_compiler& jit, const ppu_module& module_part, const std::string& cache_path, const std::string& obj_name);
extern std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, const std::string& path);
extern void ppu_unload_prx(const lv2_prx&); extern void ppu_unload_prx(const lv2_prx&);
extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&); extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&);
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code); extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
@ -788,7 +791,7 @@ void ppu_thread::cpu_task()
} }
case ppu_cmd::initialize: case ppu_cmd::initialize:
{ {
cmd_pop(), ppu_initialize(); cmd_pop(), ppu_initialize(), spu_cache::initialize();
break; break;
} }
case ppu_cmd::sleep: case ppu_cmd::sleep:
@ -2064,19 +2067,27 @@ extern void ppu_finalize(const ppu_module& info)
#endif #endif
} }
extern void ppu_precompile(std::vector<std::string>& dir_queue) extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<lv2_prx*>* loaded_prx)
{ {
// Remove duplicates
std::sort(dir_queue.begin(), dir_queue.end());
dir_queue.erase(std::unique(dir_queue.begin(), dir_queue.end()), dir_queue.end());
const std::string firmware_sprx_path = vfs::get("/dev_flash/sys/external/");
// Map fixed address executables area, fake overlay support
const bool had_ovl = !vm::map(0x3000'0000, 0x1000'0000, 0x200).operator bool();
const u32 ppc_seg = std::exchange(g_ps3_process_info.ppc_seg, 0x3);
std::vector<std::pair<std::string, u64>> file_queue; std::vector<std::pair<std::string, u64>> file_queue;
file_queue.reserve(2000); file_queue.reserve(2000);
// Initialize progress dialog
g_progr = "Scanning directories for SPRX libraries...";
// Find all .sprx files recursively (TODO: process .mself files) // Find all .sprx files recursively (TODO: process .mself files)
for (usz i = 0; i < dir_queue.size(); i++) for (usz i = 0; i < dir_queue.size(); i++)
{ {
if (Emu.IsStopped()) if (Emu.IsStopped())
{ {
file_queue.clear();
break; break;
} }
@ -2086,6 +2097,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
{ {
if (Emu.IsStopped()) if (Emu.IsStopped())
{ {
file_queue.clear();
break; break;
} }
@ -2099,17 +2111,72 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
continue; continue;
} }
std::string upper = fmt::to_upper(entry.name);
// Check .sprx filename // Check .sprx filename
if (fmt::to_upper(entry.name).ends_with(".SPRX")) if (upper.ends_with(".SPRX"))
{ {
// Skip already loaded modules or HLEd ones
if (dir_queue[i] == firmware_sprx_path)
{
bool ignore = false;
if (loaded_prx)
{
for (auto* obj : *loaded_prx)
{
if (obj->name == entry.name)
{
ignore = true;
break;
}
}
if (ignore)
{
continue;
}
}
if (g_cfg.core.libraries_control.get_set().count(entry.name + ":lle"))
{
// Force LLE
ignore = false;
}
else if (g_cfg.core.libraries_control.get_set().count(entry.name + ":hle"))
{
// Force HLE
ignore = true;
}
else
{
extern const std::map<std::string_view, int> g_prx_list;
// Use list
ignore = g_prx_list.count(entry.name) && g_prx_list.at(entry.name) != 0;
}
if (ignore)
{
continue;
}
}
// Get full path // Get full path
file_queue.emplace_back(dir_queue[i] + entry.name, 0); file_queue.emplace_back(dir_queue[i] + entry.name, 0);
g_progr_ftotal++; continue;
}
// Check .self filename
if (upper.ends_with(".SELF"))
{
// Get full path
file_queue.emplace_back(dir_queue[i] + entry.name, 0);
continue; continue;
} }
// Check .mself filename // Check .mself filename
if (fmt::to_upper(entry.name).ends_with(".MSELF")) if (upper.ends_with(".MSELF"))
{ {
if (fs::file mself{dir_queue[i] + entry.name}) if (fs::file mself{dir_queue[i] + entry.name})
{ {
@ -2125,11 +2192,20 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
{ {
std::string name = rec.name; std::string name = rec.name;
if (fmt::to_upper(name).ends_with(".SPRX")) upper = fmt::to_upper(name);
if (upper.ends_with(".SPRX"))
{ {
// .sprx inside .mself found // .sprx inside .mself found
file_queue.emplace_back(dir_queue[i] + entry.name, rec.off); file_queue.emplace_back(dir_queue[i] + entry.name, rec.off);
g_progr_ftotal++; continue;
}
if (upper.ends_with(".SELF"))
{
// .self inside .mself found
file_queue.emplace_back(dir_queue[i] + entry.name, rec.off);
continue;
} }
} }
else else
@ -2146,22 +2222,35 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
g_progr = "Compiling PPU modules"; g_progr = "Compiling PPU modules";
g_progr_ftotal += file_queue.size();
atomic_t<usz> fnext = 0; atomic_t<usz> fnext = 0;
shared_mutex sprx_mtx; shared_mutex sprx_mtx, ovl_mtx;
named_thread_group workers("SPRX Worker ", utils::get_thread_count(), [&] named_thread_group workers("SPRX Worker ", std::min<u32>(utils::get_thread_count(), ::size32(file_queue)), [&]
{ {
for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++) for (usz func_i = fnext++; func_i < file_queue.size(); func_i = fnext++, g_progr_fdone++)
{ {
std::string path = std::as_const(file_queue)[func_i].first; if (Emu.IsStopped())
{
continue;
}
ppu_log.notice("Trying to load SPRX: %s", path); auto [path, offset] = std::as_const(file_queue)[func_i];
// Load MSELF or SPRX ppu_log.notice("Trying to load: %s", path);
// Load MSELF, SPRX or SELF
fs::file src{path}; fs::file src{path};
if (u64 off = file_queue[func_i].second) if (!src)
{
ppu_log.error("Failed to open '%s' (%s)", path, fs::g_tls_error);
continue;
}
if (u64 off = offset)
{ {
// Adjust offset for MSELF // Adjust offset for MSELF
src.reset(std::make_unique<file_view>(std::move(src), off)); src.reset(std::make_unique<file_view>(std::move(src), off));
@ -2173,9 +2262,15 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
// Some files may fail to decrypt due to the lack of klic // Some files may fail to decrypt due to the lack of klic
src = decrypt_self(std::move(src)); src = decrypt_self(std::move(src));
const ppu_prx_object obj = src; if (!src)
{
ppu_log.error("Failed to decrypt '%s'", path);
continue;
}
if (obj == elf_error::ok) elf_error prx_err{}, ovl_err{};
if (const ppu_prx_object obj = src; (prx_err = obj, obj == elf_error::ok))
{ {
std::unique_lock lock(sprx_mtx); std::unique_lock lock(sprx_mtx);
@ -2188,19 +2283,64 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue)
ppu_unload_prx(*prx); ppu_unload_prx(*prx);
lock.unlock(); lock.unlock();
ppu_finalize(*prx); ppu_finalize(*prx);
g_progr_fdone++; continue;
}
// Log error
prx_err = elf_error::header_type;
}
if (const ppu_exec_object obj = src; (ovl_err = obj, obj == elf_error::ok))
{
while (ovl_err == elf_error::ok)
{
// Only one thread compiles OVL atm, other can compile PRX cuncurrently
std::unique_lock lock(ovl_mtx);
auto [ovlm, error] = ppu_load_overlay(obj, path);
if (error)
{
// Abort
ovl_err = elf_error::header_type;
break;
}
ppu_initialize(*ovlm);
for (auto& seg : ovlm->segs)
{
vm::dealloc(seg.addr);
}
lock.unlock();
idm::remove<lv2_obj, lv2_overlay>(idm::last_id());
ppu_finalize(*ovlm);
break;
}
if (ovl_err == elf_error::ok)
{
continue; continue;
} }
} }
ppu_log.error("Failed to load SPRX '%s' (%s)", path, obj.get_error()); ppu_log.error("Failed to precompile '%s' (prx: %s, ovl: %s)", path, prx_err, ovl_err);
g_progr_fdone++;
continue; continue;
} }
}); });
// Join every thread // Join every thread
workers.join(); workers.join();
// Revert changes
if (!had_ovl)
{
vm::unmap(0x3000'0000);
}
g_ps3_process_info.ppc_seg = ppc_seg;
} }
extern void ppu_initialize() extern void ppu_initialize()
@ -2217,10 +2357,12 @@ extern void ppu_initialize()
return; return;
} }
// Initialize main module bool compile_main = false;
// Check main module cache
if (!_main->segs.empty()) if (!_main->segs.empty())
{ {
ppu_initialize(*_main); compile_main = ppu_initialize(*_main, true);
} }
std::vector<lv2_prx*> prx_list; std::vector<lv2_prx*> prx_list;
@ -2230,20 +2372,68 @@ extern void ppu_initialize()
prx_list.emplace_back(&prx); prx_list.emplace_back(&prx);
}); });
// If empty we have no indication for cache state, check everything
bool compile_fw = prx_list.empty();
// Check preloaded libraries cache
for (auto ptr : prx_list)
{
compile_fw |= ppu_initialize(*ptr, true);
}
std::vector<std::string> dir_queue;
if (compile_fw)
{
const std::string firmware_sprx_path = vfs::get("/dev_flash/sys/external/");
dir_queue.emplace_back(firmware_sprx_path);
}
// Avoid compilation if main's cache exists or it is a standalone SELF with no PARAM.SFO
if (compile_main && !Emu.GetTitleID().empty())
{
dir_queue.emplace_back(vfs::get(Emu.GetDir()) + '/');
if (const std::string dev_bdvd = vfs::get("/dev_bdvd/PS3_GAME"); !dev_bdvd.empty())
{
dir_queue.emplace_back(dev_bdvd + '/');
}
}
ppu_precompile(dir_queue, &prx_list);
if (Emu.IsStopped())
{
return;
}
// Initialize main module cache
if (!_main->segs.empty())
{
ppu_initialize(*_main);
}
// Initialize preloaded libraries // Initialize preloaded libraries
for (auto ptr : prx_list) for (auto ptr : prx_list)
{ {
if (Emu.IsStopped())
{
return;
}
ppu_initialize(*ptr); ppu_initialize(*ptr);
} }
// Initialize SPU cache
spu_cache::initialize();
} }
extern void ppu_initialize(const ppu_module& info) bool ppu_initialize(const ppu_module& info, bool check_only)
{ {
if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm) if (g_cfg.core.ppu_decoder != ppu_decoder_type::llvm)
{ {
if (check_only)
{
return false;
}
// Temporarily // Temporarily
s_ppu_toc = g_fxo->get<std::unordered_map<u32, u32>>(); s_ppu_toc = g_fxo->get<std::unordered_map<u32, u32>>();
@ -2261,7 +2451,7 @@ extern void ppu_initialize(const ppu_module& info)
} }
} }
return; return false;
} }
// Link table // Link table
@ -2375,6 +2565,8 @@ extern void ppu_initialize(const ppu_module& info)
// Sync variable to acquire workloads // Sync variable to acquire workloads
atomic_t<u32> work_cv = 0; atomic_t<u32> work_cv = 0;
bool compiled_new = false;
while (jit_mod.vars.empty() && fpos < info.funcs.size()) while (jit_mod.vars.empty() && fpos < info.funcs.size())
{ {
// Initialize compiler instance // Initialize compiler instance
@ -2581,7 +2773,7 @@ extern void ppu_initialize(const ppu_module& info)
// Check object file // Check object file
if (jit_compiler::check(cache_path + obj_name)) if (jit_compiler::check(cache_path + obj_name))
{ {
if (!jit) if (!jit && !check_only)
{ {
ppu_log.success("LLVM: Module exists: %s", obj_name); ppu_log.success("LLVM: Module exists: %s", obj_name);
continue; continue;
@ -2590,6 +2782,14 @@ extern void ppu_initialize(const ppu_module& info)
continue; continue;
} }
// Remember, used in ppu_initialize(void)
compiled_new = true;
if (check_only)
{
return true;
}
// Adjust information (is_compiled) // Adjust information (is_compiled)
link_workload.back().second = true; link_workload.back().second = true;
@ -2651,7 +2851,7 @@ extern void ppu_initialize(const ppu_module& info)
if (Emu.IsStopped() || !get_current_cpu_thread()) if (Emu.IsStopped() || !get_current_cpu_thread())
{ {
return; return compiled_new;
} }
for (auto [obj_name, is_compiled] : link_workload) for (auto [obj_name, is_compiled] : link_workload)
@ -2672,7 +2872,7 @@ extern void ppu_initialize(const ppu_module& info)
if (Emu.IsStopped() || !get_current_cpu_thread()) if (Emu.IsStopped() || !get_current_cpu_thread())
{ {
return; return compiled_new;
} }
// Jit can be null if the loop doesn't ever enter. // Jit can be null if the loop doesn't ever enter.
@ -2742,6 +2942,8 @@ extern void ppu_initialize(const ppu_module& info)
} }
} }
} }
return compiled_new;
#else #else
fmt::throw_exception("LLVM is not available in this build."); fmt::throw_exception("LLVM is not available in this build.");
#endif #endif

View file

@ -12,9 +12,9 @@
#include "sys_overlay.h" #include "sys_overlay.h"
#include "sys_fs.h" #include "sys_fs.h"
extern std::shared_ptr<lv2_overlay> ppu_load_overlay(const ppu_exec_object&, const std::string& path); extern std::pair<std::shared_ptr<lv2_overlay>, CellError> ppu_load_overlay(const ppu_exec_object&, const std::string& path);
extern void ppu_initialize(const ppu_module&); extern bool ppu_initialize(const ppu_module&, bool = false);
extern void ppu_finalize(const ppu_module&); extern void ppu_finalize(const ppu_module&);
LOG_CHANNEL(sys_overlay); LOG_CHANNEL(sys_overlay);
@ -42,7 +42,12 @@ static error_code overlay_load_module(vm::ptr<u32> ovlmid, const std::string& vp
return {CELL_ENOEXEC, obj.operator elf_error()}; return {CELL_ENOEXEC, obj.operator elf_error()};
} }
const auto ovlm = ppu_load_overlay(obj, vfs::get(vpath)); const auto [ovlm, error] = ppu_load_overlay(obj, vfs::get(vpath));
if (error)
{
return error;
}
ppu_initialize(*ovlm); ppu_initialize(*ovlm);

View file

@ -17,7 +17,7 @@
extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&); extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&);
extern void ppu_unload_prx(const lv2_prx& prx); extern void ppu_unload_prx(const lv2_prx& prx);
extern void ppu_initialize(const ppu_module&); extern bool ppu_initialize(const ppu_module&, bool = false);
extern void ppu_finalize(const ppu_module&); extern void ppu_finalize(const ppu_module&);
LOG_CHANNEL(sys_prx); LOG_CHANNEL(sys_prx);

View file

@ -66,8 +66,8 @@ atomic_t<u64> g_watchdog_hold_ctr{0};
extern void ppu_load_exec(const ppu_exec_object&); extern void ppu_load_exec(const ppu_exec_object&);
extern void spu_load_exec(const spu_exec_object&); extern void spu_load_exec(const spu_exec_object&);
extern void ppu_precompile(std::vector<std::string>& dir_queue); extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<lv2_prx*>* loaded_prx);
extern void ppu_initialize(const ppu_module&); extern bool ppu_initialize(const ppu_module&, bool = false);
extern void ppu_finalize(const ppu_module&); extern void ppu_finalize(const ppu_module&);
extern void ppu_unload_prx(const lv2_prx&); extern void ppu_unload_prx(const lv2_prx&);
extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&); extern std::shared_ptr<lv2_prx> ppu_load_prx(const ppu_prx_object&, const std::string&);
@ -1150,7 +1150,12 @@ game_boot_result Emulator::Load(const std::string& title_id, bool add_only, bool
ensure(vm::falloc(0x10000, 0xf0000, vm::main)); ensure(vm::falloc(0x10000, 0xf0000, vm::main));
} }
ppu_precompile(dir_queue); if (Emu.IsStopped())
{
return;
}
ppu_precompile(dir_queue, nullptr);
// Exit "process" // Exit "process"
Emu.CallAfter([] Emu.CallAfter([]