mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-07 23:41:26 +12:00
PPU LLVM: paradigm shift
For now, compile only one block at time Use tail calls to move between blocks Fully write PPU context (except CIA) This fixes many compatibility problems
This commit is contained in:
parent
a29d7d3962
commit
aea094730b
13 changed files with 1076 additions and 856 deletions
|
@ -102,6 +102,7 @@ const ppu_decoder<ppu_interpreter_fast> s_ppu_interpreter_fast;
|
|||
|
||||
extern void ppu_initialize();
|
||||
extern void ppu_initialize(const ppu_module& info);
|
||||
static void ppu_initialize2(const ppu_module& info);
|
||||
extern void ppu_execute_syscall(ppu_thread& ppu, u64 code);
|
||||
|
||||
// Get pointer to executable cache
|
||||
|
@ -371,7 +372,7 @@ std::string ppu_thread::dump() const
|
|||
fmt::append(ret, "XER = [CA=%u | OV=%u | SO=%u | CNT=%u]\n", xer.ca, xer.ov, xer.so, xer.cnt);
|
||||
fmt::append(ret, "VSCR = [SAT=%u | NJ=%u]\n", sat, nj);
|
||||
fmt::append(ret, "FPSCR = [FL=%u | FG=%u | FE=%u | FU=%u]\n", fpscr.fl, fpscr.fg, fpscr.fe, fpscr.fu);
|
||||
fmt::append(ret, "\nCall stack:\n=========\n0x%08x (0x0) called\n", g_cfg.core.ppu_decoder == ppu_decoder_type::llvm ? 0 : cia);
|
||||
fmt::append(ret, "\nCall stack:\n=========\n0x%08x (0x0) called\n", cia);
|
||||
|
||||
// Determine stack range
|
||||
u32 stack_ptr = static_cast<u32>(gpr[1]);
|
||||
|
@ -474,7 +475,11 @@ void ppu_thread::exec_task()
|
|||
{
|
||||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||
{
|
||||
reinterpret_cast<ppu_function_t>(static_cast<std::uintptr_t>(ppu_ref(cia)))(*this);
|
||||
while (!test(state, cpu_flag::ret + cpu_flag::exit + cpu_flag::stop))
|
||||
{
|
||||
reinterpret_cast<ppu_function_t>(static_cast<std::uintptr_t>(ppu_ref(cia)))(*this);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -769,19 +774,21 @@ extern __m128i sse_cellbe_lvrx(u64 addr);
|
|||
extern void sse_cellbe_stvlx(u64 addr, __m128i a);
|
||||
extern void sse_cellbe_stvrx(u64 addr, __m128i a);
|
||||
|
||||
[[noreturn]] static void ppu_trap(u64 addr)
|
||||
[[noreturn]] static void ppu_trap(ppu_thread& ppu, u64 addr)
|
||||
{
|
||||
ppu.cia = ::narrow<u32>(addr);
|
||||
fmt::throw_exception("Trap! (0x%llx)", addr);
|
||||
}
|
||||
|
||||
[[noreturn]] static void ppu_unreachable(u64 addr)
|
||||
[[noreturn]] static void ppu_error(ppu_thread& ppu, u64 addr, u32 op)
|
||||
{
|
||||
fmt::throw_exception("Unreachable! (0x%llx)", addr);
|
||||
ppu.cia = ::narrow<u32>(addr);
|
||||
fmt::throw_exception("Unknown/Illegal opcode 0x08x (0x%llx)", op, addr);
|
||||
}
|
||||
|
||||
static void ppu_check(ppu_thread& ppu, u64 addr)
|
||||
{
|
||||
ppu.cia = addr;
|
||||
ppu.cia = ::narrow<u32>(addr);
|
||||
ppu.test_state();
|
||||
}
|
||||
|
||||
|
@ -867,6 +874,141 @@ static bool adde_carry(u64 a, u64 b, bool c)
|
|||
#endif
|
||||
}
|
||||
|
||||
static std::string ppu_context_prologue()
|
||||
{
|
||||
std::string c;
|
||||
//c += "\xCC";
|
||||
#ifndef _WIN32
|
||||
c += "\x48\x89\xF9"; // mov rcx, rdi
|
||||
#endif
|
||||
c += "\x48\xB8"; // mov rax, imm64
|
||||
uptr ptr = (uptr)&vm::g_base_addr;
|
||||
c.append((const char*)&ptr, 8);
|
||||
c += "\x48\x8B"; // mov rax, [rax]
|
||||
c += '\0';
|
||||
c += "\x48\x03\x41"; // add rax, [ppu+r3]
|
||||
c += char(offset32(&ppu_thread::gpr, 3));
|
||||
c += "\x48\x83\xC0\x0F"; // add rax, 15
|
||||
c += "\x48\x83\xE0\xF0"; // and rax, -16
|
||||
return c;
|
||||
}
|
||||
|
||||
const auto ppu_get_context = []() -> std::string
|
||||
{
|
||||
std::string c = ppu_context_prologue();
|
||||
c += "\x48\x8B\x51"; // mov rdx, [rcx+r1]
|
||||
c += char(offset32(&ppu_thread::gpr, 1));
|
||||
c += "\x48\x89\x10"; // mov [rax], rdx
|
||||
c += "\x48\x8B\x51"; // mov rdx, [rcx+r2]
|
||||
c += char(offset32(&ppu_thread::gpr, 2));
|
||||
c += "\x48\x89\x50\x08"; // mov [rax+8], rdx
|
||||
c += "\x48\x8B\x54\x24\xF8"; // mov rdx, [rsp-8]
|
||||
c += "\x48\x89\x50\x10"; // mov [rax+0x10], rdx
|
||||
|
||||
c += "\x48\x89\x60\x18"; // mov [rax+0x18], rsp
|
||||
c += "\x48\x89\x58\x20"; // mov [rax+0x20], rbx
|
||||
c += "\x48\x89\x68\x28"; // mov [rax+0x28], rbp
|
||||
#ifdef _WIN32
|
||||
c += "\x48\x89\x70\x30"; // mov [rax+0x30], rsi
|
||||
c += "\x48\x89\x78\x38"; // mov [rax+0x38], rdi
|
||||
#endif
|
||||
c += "\x4C\x89\x60\x40"; // mov [rax+0x40], r12
|
||||
c += "\x4C\x89\x68\x48"; // mov [rax+0x48], r13
|
||||
c += "\x4C\x89\x70\x50"; // mov [rax+0x50], r14
|
||||
c += "\x4C\x89\x78\x58"; // mov [rax+0x58], r15
|
||||
|
||||
#ifdef _WIN32
|
||||
c += "\x66\x0F\x7F\x70\x60"; // movdqa [rax+0x60], xmm6
|
||||
c += "\x66\x0F\x7F\x78\x70"; // movdqa [rax+0x70], xmm7
|
||||
c += "\x66\x44\x0F\x7F\x80\x80\x00\x00\x00"s; // ...
|
||||
c += "\x66\x44\x0F\x7F\x88\x90\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x7F\x90\xA0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x7F\x98\xB0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x7F\xA0\xC0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x7F\xA8\xD0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x7F\xB0\xE0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x7F\xB8\xF0\x00\x00\x00"s;
|
||||
#endif
|
||||
|
||||
c += "\x48\xC7\x41"; // mov [rcx+r3], 0
|
||||
c += char(offset32(&ppu_thread::gpr, 3));
|
||||
c.append(4, '\0');
|
||||
//c += "\xCC";
|
||||
c += "\xC3"; // ret
|
||||
return c;
|
||||
}();
|
||||
|
||||
const auto ppu_set_context = []() -> std::string
|
||||
{
|
||||
std::string c = ppu_context_prologue();
|
||||
c += "\xCC";
|
||||
c += "\x48\x8B\x10"; // mov rdx, [rax]
|
||||
c += "\x48\x89\x51"; // mov [rcx+r1], rdx
|
||||
c += char(offset32(&ppu_thread::gpr, 1));
|
||||
|
||||
c += "\x48\x8B\x50\x08"; // mov rdx, [rax+8]
|
||||
c += "\x48\x89\x51"; // mov [rcx+r2], rdx
|
||||
c += char(offset32(&ppu_thread::gpr, 2));
|
||||
|
||||
c += "\x48\x8B\x60\x18"; // mov rsp, [rax+0x18]
|
||||
c += "\x48\x8B\x58\x20"; // mov rbx, [rax+0x20]
|
||||
c += "\x48\x8B\x68\x28"; // mov rbp, [rax+0x28]
|
||||
#ifdef _WIN32
|
||||
c += "\x48\x8B\x70\x30"; // mov rsi, [rax+0x30]
|
||||
c += "\x48\x8B\x78\x38"; // mov rdi, [rax+0x38]
|
||||
#endif
|
||||
c += "\x4C\x8B\x60\x40"; // mov r12, [rax+0x40]
|
||||
c += "\x4C\x8B\x68\x48"; // mov r13, [rax+0x48]
|
||||
c += "\x4C\x8B\x70\x50"; // mov r14, [rax+0x50]
|
||||
c += "\x4C\x8B\x78\x58"; // mov r15, [rax+0x58]
|
||||
|
||||
#ifdef _WIN32
|
||||
c += "\x66\x0F\x6F\x70\x60"; // movdqa xmm6, [rax+0x60]
|
||||
c += "\x66\x0F\x6F\x78\x70"; // movdqa xmm7, [rax+0x70]
|
||||
c += "\x66\x44\x0F\x6F\x80\x80\x00\x00\x00"s; // ...
|
||||
c += "\x66\x44\x0F\x6F\x88\x90\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x6F\x90\xA0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x6F\x98\xB0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x6F\xA0\xC0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x6F\xA8\xD0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x6F\xB0\xE0\x00\x00\x00"s;
|
||||
c += "\x66\x44\x0F\x6F\xB8\xF0\x00\x00\x00"s;
|
||||
#endif
|
||||
|
||||
c += "\x48\x8B\x50\x10"; // mov rdx, [rax+0x10]
|
||||
c += "\x48\x89\x54\x24\xF8"; // mov [rsp-8], rdx
|
||||
c += "\x48\x8B\x51"; // mov rdx, [rcx+r4]
|
||||
c += char(offset32(&ppu_thread::gpr, 4));
|
||||
c += "\x48\x85\xD2"; // test rdx, rdx
|
||||
c += "\x0F\x94\xC2"; // setz dl
|
||||
c += "\x48\x0F\xB6\xD2"; // movzx rdx, dl
|
||||
c += "\x48\x89\x51"; // mov [rcx+r3], rdx
|
||||
c += char(offset32(&ppu_thread::gpr, 3));
|
||||
c += "\xC3"; // ret
|
||||
return c;
|
||||
}();
|
||||
|
||||
const auto ppu_use_context = []() -> std::string
|
||||
{
|
||||
std::string c;
|
||||
c += "\x48\xB8"; // mov rax, imm64
|
||||
uptr ptr = (uptr)&vm::g_exec_addr;
|
||||
c.append((const char*)&ptr, 8);
|
||||
c += "\x48\x8B\x20"; // mov rsp, [rax]
|
||||
#ifdef _WIN32
|
||||
c += "\x48\x01\xD4"; // add rsp,rdx
|
||||
#else
|
||||
c += "\x48\x01\xFC"; // add rsp,rsi
|
||||
#endif
|
||||
//c += "\x48\x83\xE4\xE0"; // and rsp, -0x20
|
||||
#ifdef _WIN32
|
||||
c += "\x41\xFF\xD0"; // call r8
|
||||
#else
|
||||
c += "\xFF\xD2"; // call rdx
|
||||
#endif
|
||||
return c;
|
||||
}();
|
||||
|
||||
extern void ppu_initialize()
|
||||
{
|
||||
const auto _funcs = fxm::withdraw<std::vector<ppu_function>>();
|
||||
|
@ -876,25 +1018,8 @@ extern void ppu_initialize()
|
|||
return;
|
||||
}
|
||||
|
||||
std::size_t fpos = 0;
|
||||
|
||||
while (fpos < _funcs->size())
|
||||
{
|
||||
// Split module (TODO)
|
||||
ppu_module info;
|
||||
info.name = fmt::format("%05X", _funcs->at(fpos).addr);
|
||||
info.funcs.reserve(2000);
|
||||
|
||||
while (fpos < _funcs->size() && info.funcs.size() < 2000)
|
||||
{
|
||||
info.funcs.emplace_back(std::move(_funcs->at(fpos++)));
|
||||
}
|
||||
|
||||
if (!Emu.IsStopped())
|
||||
{
|
||||
ppu_initialize(info);
|
||||
}
|
||||
}
|
||||
// Initialize main module
|
||||
ppu_initialize({"", std::move(*_funcs)});
|
||||
|
||||
std::vector<lv2_prx*> prx_list;
|
||||
|
||||
|
@ -903,12 +1028,10 @@ extern void ppu_initialize()
|
|||
prx_list.emplace_back(&prx);
|
||||
});
|
||||
|
||||
// Initialize preloaded libraries
|
||||
for (auto ptr : prx_list)
|
||||
{
|
||||
if (!Emu.IsStopped())
|
||||
{
|
||||
ppu_initialize(*ptr);
|
||||
}
|
||||
ppu_initialize(*ptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -936,6 +1059,136 @@ extern void ppu_initialize(const ppu_module& info)
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
using namespace llvm;
|
||||
|
||||
// Initialize JIT compiler
|
||||
if (!fxm::check<jit_compiler>())
|
||||
{
|
||||
std::unordered_map<std::string, u64> link_table
|
||||
{
|
||||
{ "__mptr", (u64)&vm::g_base_addr },
|
||||
{ "__cptr", (u64)&vm::g_exec_addr },
|
||||
{ "__trap", (u64)&ppu_trap },
|
||||
{ "__error", (u64)&ppu_error },
|
||||
{ "__check", (u64)&ppu_check },
|
||||
{ "__trace", (u64)&ppu_trace },
|
||||
{ "__syscall", (u64)&ppu_execute_syscall },
|
||||
{ "__get_tb", (u64)&get_timebased_time },
|
||||
{ "__lwarx", (u64)&ppu_lwarx },
|
||||
{ "__ldarx", (u64)&ppu_ldarx },
|
||||
{ "__stwcx", (u64)&ppu_stwcx },
|
||||
{ "__stdcx", (u64)&ppu_stdcx },
|
||||
{ "__vexptefp", (u64)&sse_exp2_ps },
|
||||
{ "__vlogefp", (u64)&sse_log2_ps },
|
||||
{ "__vperm", (u64)&sse_altivec_vperm },
|
||||
{ "__lvsl", (u64)&sse_altivec_lvsl },
|
||||
{ "__lvsr", (u64)&sse_altivec_lvsr },
|
||||
{ "__lvlx", (u64)&sse_cellbe_lvlx },
|
||||
{ "__lvrx", (u64)&sse_cellbe_lvrx },
|
||||
{ "__stvlx", (u64)&sse_cellbe_stvlx },
|
||||
{ "__stvrx", (u64)&sse_cellbe_stvrx },
|
||||
};
|
||||
|
||||
for (u64 index = 0; index < 1024; index++)
|
||||
{
|
||||
if (auto sc = ppu_get_syscall(index))
|
||||
{
|
||||
link_table.emplace(ppu_get_syscall_name(index), (u64)sc);
|
||||
}
|
||||
}
|
||||
|
||||
fxm::make<jit_compiler>(std::move(link_table), g_cfg.core.llvm_cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Split module into fragments <= 1 MiB
|
||||
std::size_t fpos = 0;
|
||||
|
||||
ppu_module part;
|
||||
part.funcs.reserve(65536);
|
||||
|
||||
while (fpos < info.funcs.size())
|
||||
{
|
||||
const auto fstart = fpos;
|
||||
|
||||
std::size_t bsize = 0;
|
||||
|
||||
part.funcs.clear();
|
||||
|
||||
while (fpos < info.funcs.size())
|
||||
{
|
||||
auto& func = info.funcs[fpos];
|
||||
|
||||
if (bsize + func.size > 1024 * 1024 && bsize)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
for (auto&& block : func.blocks)
|
||||
{
|
||||
bsize += block.second;
|
||||
|
||||
// Also split functions blocks into functions (TODO)
|
||||
ppu_function entry;
|
||||
entry.addr = block.first;
|
||||
entry.size = block.second;
|
||||
entry.toc = func.toc;
|
||||
fmt::append(entry.name, "__0x%x", block.first);
|
||||
part.funcs.emplace_back(std::move(entry));
|
||||
}
|
||||
|
||||
fpos++;
|
||||
}
|
||||
|
||||
part.name.clear();
|
||||
|
||||
if (info.name.size())
|
||||
{
|
||||
part.name += '-';
|
||||
part.name += info.name;
|
||||
}
|
||||
|
||||
if (fstart)
|
||||
{
|
||||
fmt::append(part.name, "+%06X", info.funcs.at(fstart).addr);
|
||||
}
|
||||
else if (fpos < info.funcs.size())
|
||||
{
|
||||
part.name.append("+0");
|
||||
}
|
||||
|
||||
ppu_initialize2(part);
|
||||
}
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
const auto jit = fxm::check_unlocked<jit_compiler>();
|
||||
|
||||
jit->fin(Emu.GetCachePath());
|
||||
|
||||
// Get and install function addresses
|
||||
for (const auto& func : info.funcs)
|
||||
{
|
||||
if (!func.size) continue;
|
||||
|
||||
for (const auto& block : func.blocks)
|
||||
{
|
||||
if (block.second)
|
||||
{
|
||||
ppu_ref(block.first) = ::narrow<u32>(jit->get(fmt::format("__0x%x", block.first)));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ppu_initialize2(const ppu_module& module_part)
|
||||
{
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute module hash
|
||||
std::string obj_name;
|
||||
{
|
||||
|
@ -943,7 +1196,7 @@ extern void ppu_initialize(const ppu_module& info)
|
|||
u8 output[20];
|
||||
sha1_starts(&ctx);
|
||||
|
||||
for (const auto& func : info.funcs)
|
||||
for (const auto& func : module_part.funcs)
|
||||
{
|
||||
if (func.size == 0)
|
||||
{
|
||||
|
@ -964,59 +1217,19 @@ extern void ppu_initialize(const ppu_module& info)
|
|||
|
||||
sha1_update(&ctx, vm::ps3::_ptr<const u8>(block.first), block.second);
|
||||
}
|
||||
|
||||
sha1_update(&ctx, vm::ps3::_ptr<const u8>(func.addr), func.size);
|
||||
}
|
||||
|
||||
sha1_finish(&ctx, output);
|
||||
|
||||
// Version, module name and hash: vX-liblv2.sprx-0123456789ABCDEF.obj
|
||||
fmt::append(obj_name, "v1-%s-%016X.obj", info.name, reinterpret_cast<be_t<u64>&>(output));
|
||||
fmt::append(obj_name, "b1%s-%016X.obj", module_part.name, reinterpret_cast<be_t<u64>&>(output));
|
||||
}
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
using namespace llvm;
|
||||
|
||||
if (!fxm::check<jit_compiler>())
|
||||
{
|
||||
std::unordered_map<std::string, std::uintptr_t> link_table
|
||||
{
|
||||
{ "__mptr", (u64)&vm::g_base_addr },
|
||||
{ "__cptr", (u64)&vm::g_exec_addr },
|
||||
{ "__trap", (u64)&ppu_trap },
|
||||
{ "__end", (u64)&ppu_unreachable },
|
||||
{ "__check", (u64)&ppu_check },
|
||||
{ "__trace", (u64)&ppu_trace },
|
||||
{ "__syscall", (u64)&ppu_execute_syscall },
|
||||
{ "__get_tb", (u64)&get_timebased_time },
|
||||
{ "__lwarx", (u64)&ppu_lwarx },
|
||||
{ "__ldarx", (u64)&ppu_ldarx },
|
||||
{ "__stwcx", (u64)&ppu_stwcx },
|
||||
{ "__stdcx", (u64)&ppu_stdcx },
|
||||
{ "__adde_get_ca", (u64)&adde_carry },
|
||||
{ "__vexptefp", (u64)&sse_exp2_ps },
|
||||
{ "__vlogefp", (u64)&sse_log2_ps },
|
||||
{ "__vperm", (u64)&sse_altivec_vperm },
|
||||
{ "__lvsl", (u64)&sse_altivec_lvsl },
|
||||
{ "__lvsr", (u64)&sse_altivec_lvsr },
|
||||
{ "__lvlx", (u64)&sse_cellbe_lvlx },
|
||||
{ "__lvrx", (u64)&sse_cellbe_lvrx },
|
||||
{ "__stvlx", (u64)&sse_cellbe_stvlx },
|
||||
{ "__stvrx", (u64)&sse_cellbe_stvrx },
|
||||
};
|
||||
|
||||
for (u64 index = 0; index < 1024; index++)
|
||||
{
|
||||
if (auto sc = ppu_get_syscall(index))
|
||||
{
|
||||
link_table.emplace(ppu_get_syscall_name(index), (u64)sc);
|
||||
}
|
||||
}
|
||||
|
||||
const auto jit = fxm::make<jit_compiler>(std::move(link_table), g_cfg.core.llvm_cpu);
|
||||
|
||||
LOG_SUCCESS(PPU, "LLVM: JIT initialized (%s)", jit->cpu());
|
||||
}
|
||||
|
||||
// Initialize compiler
|
||||
const auto jit = fxm::get<jit_compiler>();
|
||||
|
||||
// Create LLVM module
|
||||
|
@ -1030,216 +1243,186 @@ extern void ppu_initialize(const ppu_module& info)
|
|||
|
||||
// Define some types
|
||||
const auto _void = Type::getVoidTy(g_llvm_ctx);
|
||||
const auto _func = FunctionType::get(_void, { translator->GetContextType()->getPointerTo() }, false);
|
||||
const auto _func = FunctionType::get(_void, {translator->GetContextType()->getPointerTo()}, false);
|
||||
|
||||
// Initialize function list
|
||||
for (const auto& func : info.funcs)
|
||||
for (const auto& func : module_part.funcs)
|
||||
{
|
||||
if (func.size)
|
||||
{
|
||||
const auto f = cast<Function>(module->getOrInsertFunction(fmt::format("__0x%x", func.addr), _func));
|
||||
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func));
|
||||
f->addAttribute(1, Attribute::NoAlias);
|
||||
translator->AddFunction(func.addr, f);
|
||||
}
|
||||
}
|
||||
|
||||
if (fs::file cached{Emu.GetCachePath() + obj_name})
|
||||
{
|
||||
std::string buf;
|
||||
buf.reserve(cached.size());
|
||||
cached.read(buf, cached.size());
|
||||
auto buffer = llvm::MemoryBuffer::getMemBuffer(buf, obj_name);
|
||||
auto result = llvm::object::ObjectFile::createObjectFile(*buffer);
|
||||
|
||||
if (result)
|
||||
{
|
||||
jit->load(std::move(module), std::move(result.get()));
|
||||
std::shared_ptr<MsgDialogBase> dlg;
|
||||
|
||||
for (const auto& func : info.funcs)
|
||||
// Check cached file
|
||||
if (!fs::is_file(Emu.GetCachePath() + obj_name))
|
||||
{
|
||||
legacy::FunctionPassManager pm(module.get());
|
||||
|
||||
// Basic optimizations
|
||||
pm.add(createCFGSimplificationPass());
|
||||
pm.add(createPromoteMemoryToRegisterPass());
|
||||
pm.add(createEarlyCSEPass());
|
||||
pm.add(createTailCallEliminationPass());
|
||||
pm.add(createReassociatePass());
|
||||
pm.add(createInstructionCombiningPass());
|
||||
//pm.add(createBasicAAWrapperPass());
|
||||
//pm.add(new MemoryDependenceAnalysis());
|
||||
pm.add(createLICMPass());
|
||||
pm.add(createLoopInstSimplifyPass());
|
||||
pm.add(createNewGVNPass());
|
||||
pm.add(createDeadStoreEliminationPass());
|
||||
pm.add(createSCCPPass());
|
||||
pm.add(createInstructionCombiningPass());
|
||||
pm.add(createInstructionSimplifierPass());
|
||||
pm.add(createAggressiveDCEPass());
|
||||
pm.add(createCFGSimplificationPass());
|
||||
//pm.add(createLintPass()); // Check
|
||||
|
||||
// Initialize message dialog
|
||||
dlg = Emu.GetCallbacks().get_msg_dialog();
|
||||
dlg->type.se_normal = true;
|
||||
dlg->type.bg_invisible = true;
|
||||
dlg->type.progress_bar_count = 1;
|
||||
dlg->on_close = [](s32 status)
|
||||
{
|
||||
Emu.CallAfter([]()
|
||||
{
|
||||
if (func.size)
|
||||
{
|
||||
const std::uintptr_t uptr = jit->get(fmt::format("__0x%x", func.addr));
|
||||
ppu_ref(func.addr) = ::narrow<u32>(uptr);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_SUCCESS(PPU, "LLVM: Loaded executable: %s", obj_name);
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_ERROR(PPU, "LLVM: Failed to load executable: %s", obj_name);
|
||||
}
|
||||
|
||||
legacy::FunctionPassManager pm(module.get());
|
||||
|
||||
// Basic optimizations
|
||||
pm.add(createCFGSimplificationPass());
|
||||
pm.add(createPromoteMemoryToRegisterPass());
|
||||
pm.add(createEarlyCSEPass());
|
||||
pm.add(createTailCallEliminationPass());
|
||||
pm.add(createReassociatePass());
|
||||
pm.add(createInstructionCombiningPass());
|
||||
//pm.add(createBasicAAWrapperPass());
|
||||
//pm.add(new MemoryDependenceAnalysis());
|
||||
pm.add(createLICMPass());
|
||||
pm.add(createLoopInstSimplifyPass());
|
||||
pm.add(createNewGVNPass());
|
||||
pm.add(createDeadStoreEliminationPass());
|
||||
pm.add(createSCCPPass());
|
||||
pm.add(createInstructionCombiningPass());
|
||||
pm.add(createInstructionSimplifierPass());
|
||||
pm.add(createAggressiveDCEPass());
|
||||
pm.add(createCFGSimplificationPass());
|
||||
//pm.add(createLintPass()); // Check
|
||||
|
||||
// Initialize message dialog
|
||||
const auto dlg = Emu.GetCallbacks().get_msg_dialog();
|
||||
dlg->type.se_normal = true;
|
||||
dlg->type.bg_invisible = true;
|
||||
dlg->type.progress_bar_count = 1;
|
||||
dlg->on_close = [](s32 status)
|
||||
{
|
||||
Emu.CallAfter([]()
|
||||
{
|
||||
// Abort everything
|
||||
Emu.Stop();
|
||||
});
|
||||
};
|
||||
|
||||
Emu.CallAfter([=]()
|
||||
{
|
||||
dlg->Create("Compiling PPU executable: " + info.name + "\nPlease wait...");
|
||||
});
|
||||
|
||||
// Translate functions
|
||||
for (size_t fi = 0, fmax = info.funcs.size(); fi < fmax; fi++)
|
||||
{
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
LOG_SUCCESS(PPU, "LLVM: Translation cancelled");
|
||||
return;
|
||||
}
|
||||
|
||||
if (info.funcs[fi].size)
|
||||
{
|
||||
// Update dialog
|
||||
Emu.CallAfter([=, max = info.funcs.size()]()
|
||||
{
|
||||
dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax));
|
||||
|
||||
if (fi * 100 / fmax != (fi + 1) * 100 / fmax)
|
||||
dlg->ProgressBarInc(0, 1);
|
||||
// Abort everything
|
||||
Emu.Stop();
|
||||
});
|
||||
};
|
||||
|
||||
// Translate
|
||||
const auto func = translator->TranslateToIR(info.funcs[fi], vm::_ptr<u32>(info.funcs[fi].addr));
|
||||
Emu.CallAfter([=]()
|
||||
{
|
||||
dlg->Create("Compiling PPU module " + obj_name + "\nPlease wait...");
|
||||
});
|
||||
|
||||
// Run optimization passes
|
||||
pm.run(*func);
|
||||
|
||||
const auto _syscall = module->getFunction("__syscall");
|
||||
|
||||
for (auto i = inst_begin(*func), end = inst_end(*func); i != end;)
|
||||
// Translate functions
|
||||
for (size_t fi = 0, fmax = module_part.funcs.size(); fi < fmax; fi++)
|
||||
{
|
||||
if (Emu.IsStopped())
|
||||
{
|
||||
const auto inst = &*i++;
|
||||
LOG_SUCCESS(PPU, "LLVM: Translation cancelled");
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto ci = dyn_cast<CallInst>(inst))
|
||||
if (module_part.funcs[fi].size && !test(module_part.funcs[fi].attr & ppu_attr::special))
|
||||
{
|
||||
// Update dialog
|
||||
Emu.CallAfter([=, max = module_part.funcs.size()]()
|
||||
{
|
||||
const auto cif = ci->getCalledFunction();
|
||||
const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr;
|
||||
dlg->ProgressBarSetMsg(0, fmt::format("Compiling %u of %u", fi + 1, fmax));
|
||||
|
||||
if (cif == _syscall && op1 && isa<ConstantInt>(op1))
|
||||
if (fi * 100 / fmax != (fi + 1) * 100 / fmax)
|
||||
dlg->ProgressBarInc(0, 1);
|
||||
});
|
||||
|
||||
// Translate
|
||||
const auto func = translator->Translate(module_part.funcs[fi]);
|
||||
|
||||
// Run optimization passes
|
||||
pm.run(*func);
|
||||
|
||||
const auto _syscall = module->getFunction("__syscall");
|
||||
|
||||
for (auto i = inst_begin(*func), end = inst_end(*func); i != end;)
|
||||
{
|
||||
const auto inst = &*i++;
|
||||
|
||||
if (const auto ci = dyn_cast<CallInst>(inst))
|
||||
{
|
||||
// Try to determine syscall using the value from r11 (requires constant propagation)
|
||||
const u64 index = cast<ConstantInt>(op1)->getZExtValue();
|
||||
const auto cif = ci->getCalledFunction();
|
||||
const auto op1 = ci->getNumArgOperands() > 1 ? ci->getArgOperand(1) : nullptr;
|
||||
|
||||
if (const auto ptr = ppu_get_syscall(index))
|
||||
if (cif == _syscall && op1 && isa<ConstantInt>(op1))
|
||||
{
|
||||
const auto n = ppu_get_syscall_name(index);
|
||||
const auto f = cast<Function>(module->getOrInsertFunction(n, _func));
|
||||
// Try to determine syscall using the value from r11 (requires constant propagation)
|
||||
const u64 index = cast<ConstantInt>(op1)->getZExtValue();
|
||||
|
||||
// Call the syscall directly
|
||||
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
|
||||
if (const auto ptr = ppu_get_syscall(index))
|
||||
{
|
||||
const auto n = ppu_get_syscall_name(index);
|
||||
const auto f = cast<Function>(module->getOrInsertFunction(n, _func));
|
||||
|
||||
// Call the syscall directly
|
||||
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (const auto li = dyn_cast<LoadInst>(inst))
|
||||
{
|
||||
// TODO: more careful check
|
||||
if (li->getNumUses() == 0)
|
||||
if (const auto li = dyn_cast<LoadInst>(inst))
|
||||
{
|
||||
// Remove unreferenced volatile loads
|
||||
li->eraseFromParent();
|
||||
// TODO: more careful check
|
||||
if (li->getNumUses() == 0)
|
||||
{
|
||||
// Remove unreferenced volatile loads
|
||||
li->eraseFromParent();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (const auto si = dyn_cast<StoreInst>(inst))
|
||||
{
|
||||
// TODO: more careful check
|
||||
if (isa<UndefValue>(si->getOperand(0)) && si->getParent() == &func->getEntryBlock())
|
||||
if (const auto si = dyn_cast<StoreInst>(inst))
|
||||
{
|
||||
// Remove undef volatile stores
|
||||
si->eraseFromParent();
|
||||
}
|
||||
// TODO: more careful check
|
||||
if (isa<UndefValue>(si->getOperand(0)) && si->getParent() == &func->getEntryBlock())
|
||||
{
|
||||
// Remove undef volatile stores
|
||||
si->eraseFromParent();
|
||||
}
|
||||
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
legacy::PassManager mpm;
|
||||
legacy::PassManager mpm;
|
||||
|
||||
// Remove unused functions, structs, global variables, etc
|
||||
mpm.add(createStripDeadPrototypesPass());
|
||||
//mpm.add(createFunctionInliningPass());
|
||||
mpm.add(createDeadInstEliminationPass());
|
||||
mpm.run(*module);
|
||||
// Remove unused functions, structs, global variables, etc
|
||||
mpm.add(createStripDeadPrototypesPass());
|
||||
//mpm.add(createFunctionInliningPass());
|
||||
mpm.add(createDeadInstEliminationPass());
|
||||
mpm.run(*module);
|
||||
|
||||
// Update dialog
|
||||
Emu.CallAfter([=]()
|
||||
{
|
||||
dlg->ProgressBarSetMsg(0, "Generating code...");
|
||||
dlg->ProgressBarInc(0, 100);
|
||||
});
|
||||
|
||||
std::string result;
|
||||
raw_string_ostream out(result);
|
||||
|
||||
if (g_cfg.core.llvm_logs)
|
||||
{
|
||||
out << *module; // print IR
|
||||
fs::file(Emu.GetCachePath() + obj_name + ".log", fs::rewrite).write(out.str());
|
||||
result.clear();
|
||||
}
|
||||
|
||||
if (verifyModule(*module, &out))
|
||||
{
|
||||
out.flush();
|
||||
LOG_ERROR(PPU, "LLVM: Verification failed for %s:\n%s", obj_name, result);
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_NOTICE(PPU, "LLVM: %zu functions generated", module->getFunctionList().size());
|
||||
|
||||
jit->make(std::move(module), Emu.GetCachePath() + obj_name);
|
||||
|
||||
// Get and install function addresses
|
||||
for (const auto& func : info.funcs)
|
||||
{
|
||||
if (func.size)
|
||||
// Update dialog
|
||||
Emu.CallAfter([=]()
|
||||
{
|
||||
const std::uintptr_t uptr = jit->get(fmt::format("__0x%x", func.addr));
|
||||
ppu_ref(func.addr) = ::narrow<u32>(uptr);
|
||||
dlg->ProgressBarSetMsg(0, "Generating code, this may take a long time...");
|
||||
dlg->ProgressBarInc(0, 100);
|
||||
});
|
||||
|
||||
std::string result;
|
||||
raw_string_ostream out(result);
|
||||
|
||||
if (g_cfg.core.llvm_logs)
|
||||
{
|
||||
out << *module; // print IR
|
||||
fs::file(Emu.GetCachePath() + obj_name + ".log", fs::rewrite).write(out.str());
|
||||
result.clear();
|
||||
}
|
||||
|
||||
if (verifyModule(*module, &out))
|
||||
{
|
||||
out.flush();
|
||||
LOG_ERROR(PPU, "LLVM: Verification failed for %s:\n%s", obj_name, result);
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_NOTICE(PPU, "LLVM: %zu functions generated", module->getFunctionList().size());
|
||||
}
|
||||
|
||||
LOG_SUCCESS(PPU, "LLVM: Created executable: %s", obj_name);
|
||||
// Access JIT compiler
|
||||
if (const auto jit = fxm::check_unlocked<jit_compiler>())
|
||||
{
|
||||
// Load or compile module
|
||||
jit->add(std::move(module), Emu.GetCachePath());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue